mirror of
				https://github.com/signcl/docsearch-scraper-action.git
				synced 2025-10-31 08:41:47 +08:00 
			
		
		
		
	feat: add readme and metadata
This commit is contained in:
		
							
								
								
									
										20
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | |||||||
|  | The MIT License (MIT) | ||||||
|  |  | ||||||
|  | Copyright (c) 2021 OpenBayes | ||||||
|  |  | ||||||
|  | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||||
|  | this software and associated documentation files (the "Software"), to deal in | ||||||
|  | the Software without restriction, including without limitation the rights to | ||||||
|  | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | ||||||
|  | the Software, and to permit persons to whom the Software is furnished to do so, | ||||||
|  | subject to the following conditions: | ||||||
|  |  | ||||||
|  | The above copyright notice and this permission notice shall be included in all | ||||||
|  | copies or substantial portions of the Software. | ||||||
|  |  | ||||||
|  | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||||
|  | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||||||
|  | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | ||||||
|  | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | ||||||
|  | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||||||
|  | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||||||
							
								
								
									
										36
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | |||||||
|  | # Algolia DocSearch Scraper in Docker for GitHub Actions | ||||||
|  |  | ||||||
|  | Run self-hosted Algolia [DocSearch scraper](https://github.com/algolia/docsearch-scraper) in Docker with Github Actions | ||||||
|  |  | ||||||
|  | - The base image can make GitHub Actions workflow faster (less than 1 min image build time) and keep the scraper up-to-date automatically thanks to Docker Hub base image auto build | ||||||
|  | - You can get some config examples at [algolia/docsearch-configs](https://github.com/algolia/docsearch-configs) | ||||||
|  |  | ||||||
|  | ## Usage | ||||||
|  |  | ||||||
|  | Basic usage: | ||||||
|  |  | ||||||
|  | ```yaml | ||||||
|  | - name: Push indices to Algolia | ||||||
|  |   uses: signcl/docsearch-scraper-action@master | ||||||
|  |   env: | ||||||
|  |     APPLICATION_ID: ${{ secrets.ALGOLIA_APPLICATION_ID }} | ||||||
|  |     API_KEY: ${{ secrets.ALGOLIA_API_KEY }} | ||||||
|  |     CONFIG: '{"index_name": "docs","start_urls": ["https://example.com/"],"sitemap_urls": ["https://example.com/sitemap.xml"],"sitemap_alternate_links": true,"stop_urls": [],"selectors": {"lvl1": "header h1","lvl2": "article h2","lvl3": "article h3","lvl4": "article h4","lvl5": "article h5, article td:first-child","lvl6": "article h6","text": "article p, article li, article td:last-child"},"strip_chars": " .,;:#","custom_settings": {"separatorsToIndex": "_","attributesForFaceting": ["language","version","type","docusaurus_tag"],"attributesToRetrieve": ["hierarchy","content","anchor","url","url_without_anchor","type"]}}' | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | The tricky part is how to pass `CONFIG` to the scraper. The above example won't work if your configuration contains XPath select like `ul[contains(@class,'menu__list')]`. A more elegant way is committing your config as `algolia.json` into the repository and checkout within the workflow: | ||||||
|  |  | ||||||
|  | ```yaml | ||||||
|  | - uses: actions/checkout@v2 | ||||||
|  |  | ||||||
|  | - name: Get the content of algolia.json as config | ||||||
|  |   id: algolia_config | ||||||
|  |   run: echo "::set-output name=config::$(cat algolia.json | jq -r tostring)" | ||||||
|  |  | ||||||
|  | - name: Push indices to Algolia | ||||||
|  |   uses: signcl/docsearch-scraper-action@master | ||||||
|  |   env: | ||||||
|  |     APPLICATION_ID: ${{ secrets.ALGOLIA_APPLICATION_ID }} | ||||||
|  |     API_KEY: ${{ secrets.ALGOLIA_API_KEY }} | ||||||
|  |     CONFIG: ${{ steps.algolia_config.outputs.config }} | ||||||
|  | ``` | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| name: 'DocSearch Scraper Action' | name: 'DocSearch Scraper Action' | ||||||
| description: 'DocSearch Scraper in Docker for Github Actions' | description: 'Algolia DocSearch Scraper in Docker for GitHub Actions' | ||||||
| runs: | runs: | ||||||
|   using: 'docker' |   using: 'docker' | ||||||
|   image: 'Dockerfile' |   image: 'Dockerfile' | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user