mirror of
https://github.com/signcl/docsearch-scraper-action.git
synced 2025-06-08 02:34:32 +08:00
Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
816fdbb24b | ||
|
d333d45a58 | ||
|
29e00fe7dd | ||
|
504242b8a0 | ||
|
711df738f4 | ||
|
5770064c68 | ||
|
7bc83e6f4c | ||
|
77bb9c27f4 | ||
|
5d32f913bd | ||
|
0529121dcf |
@ -1,4 +1,5 @@
|
|||||||
FROM algolia/docsearch-scraper:latest
|
FROM openbayes/docsearch-scraper-action-base
|
||||||
|
|
||||||
LABEL maintainer="t@sparanoid.com"
|
LABEL maintainer="t@sparanoid.com"
|
||||||
|
|
||||||
COPY entrypoint.sh /entrypoint.sh
|
COPY entrypoint.sh /entrypoint.sh
|
||||||
|
10
Dockerfile.base
Normal file
10
Dockerfile.base
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
FROM algolia/docsearch-scraper:latest
|
||||||
|
|
||||||
|
LABEL maintainer="t@sparanoid.com"
|
||||||
|
|
||||||
|
# Get rid of /github/home
|
||||||
|
# https://stackoverflow.com/a/63144407/412385
|
||||||
|
ENV WORKON_HOME /root
|
||||||
|
ENV PIPENV_PIPFILE /root/Pipfile
|
||||||
|
|
||||||
|
RUN pipenv install
|
20
LICENSE
Normal file
20
LICENSE
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2021 OpenBayes
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
Makefile
Normal file
22
Makefile
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
all: build
|
||||||
|
|
||||||
|
build-base:
|
||||||
|
docker build -f Dockerfile.base -t openbayes/docsearch-scraper-action-base:latest .
|
||||||
|
|
||||||
|
build:
|
||||||
|
docker build -t openbayes/docsearch-scraper-action:latest .
|
||||||
|
|
||||||
|
run:
|
||||||
|
docker run --rm -it --name docsearch-scraper-action openbayes/docsearch-scraper-action:latest
|
||||||
|
|
||||||
|
push-base:
|
||||||
|
docker push openbayes/docsearch-scraper-action-base:latest
|
||||||
|
|
||||||
|
push:
|
||||||
|
docker push openbayes/docsearch-scraper-action:latest
|
||||||
|
|
||||||
|
stop:
|
||||||
|
docker rm -f docsearch-scraper-action
|
||||||
|
|
||||||
|
clean:
|
||||||
|
docker rmi openbayes/docsearch-scraper-action:latest
|
36
README.md
Normal file
36
README.md
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# Algolia DocSearch Scraper in Docker for GitHub Actions
|
||||||
|
|
||||||
|
Run self-hosted Algolia [DocSearch scraper](https://github.com/algolia/docsearch-scraper) in Docker with Github Actions
|
||||||
|
|
||||||
|
- The base image can make GitHub Actions workflow faster (less than 1 min image build time) and keep the scraper up-to-date automatically thanks to Docker Hub base image auto build
|
||||||
|
- You can get some config examples at [algolia/docsearch-configs](https://github.com/algolia/docsearch-configs)
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Basic usage:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- name: Push indices to Algolia
|
||||||
|
uses: signcl/docsearch-scraper-action@master
|
||||||
|
env:
|
||||||
|
APPLICATION_ID: ${{ secrets.ALGOLIA_APPLICATION_ID }}
|
||||||
|
API_KEY: ${{ secrets.ALGOLIA_API_KEY }}
|
||||||
|
CONFIG: '{"index_name": "docs","start_urls": ["https://example.com/"],"sitemap_urls": ["https://example.com/sitemap.xml"],"sitemap_alternate_links": true,"stop_urls": [],"selectors": {"lvl1": "header h1","lvl2": "article h2","lvl3": "article h3","lvl4": "article h4","lvl5": "article h5, article td:first-child","lvl6": "article h6","text": "article p, article li, article td:last-child"},"strip_chars": " .,;:#","custom_settings": {"separatorsToIndex": "_","attributesForFaceting": ["language","version","type","docusaurus_tag"],"attributesToRetrieve": ["hierarchy","content","anchor","url","url_without_anchor","type"]}}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The tricky part is how to pass `CONFIG` to the scraper. The above example won't work if your configuration contains XPath select like `ul[contains(@class,'menu__list')]`. A more elegant way is committing your config as `algolia.json` into the repository and checkout within the workflow:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Get the content of algolia.json as config
|
||||||
|
id: algolia_config
|
||||||
|
run: echo "config=$(cat algolia.json | jq -r tostring)" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Push indices to Algolia
|
||||||
|
uses: signcl/docsearch-scraper-action@master
|
||||||
|
env:
|
||||||
|
APPLICATION_ID: ${{ secrets.ALGOLIA_APPLICATION_ID }}
|
||||||
|
API_KEY: ${{ secrets.ALGOLIA_API_KEY }}
|
||||||
|
CONFIG: ${{ steps.algolia_config.outputs.config }}
|
||||||
|
```
|
8
action.yml
Normal file
8
action.yml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
name: 'DocSearch Scraper Action'
|
||||||
|
description: 'Algolia DocSearch Scraper in Docker for GitHub Actions'
|
||||||
|
runs:
|
||||||
|
using: 'docker'
|
||||||
|
image: 'Dockerfile'
|
||||||
|
branding:
|
||||||
|
icon: 'search'
|
||||||
|
color: 'purple'
|
@ -1,6 +1,8 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# https://docs.github.com/en/actions/creating-actions/dockerfile-support-for-github-actions
|
# https://docs.github.com/en/actions/creating-actions/dockerfile-support-for-github-actions
|
||||||
|
|
||||||
|
cd /root
|
||||||
|
|
||||||
# `$*` expands the `args` supplied in an `array` individually
|
# `$*` expands the `args` supplied in an `array` individually
|
||||||
# or splits `args` in a string separated by whitespace.
|
# or splits `args` in a string separated by whitespace.
|
||||||
sh -c "pipenv run python -m src.index $*"
|
sh -c "pipenv run python -m src.index $*"
|
||||||
|
6
renovate.json
Normal file
6
renovate.json
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||||
|
"extends": [
|
||||||
|
"config:base"
|
||||||
|
]
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user