-
Notifications
You must be signed in to change notification settings - Fork 0
116 lines (103 loc) · 3.95 KB
/
crawler.yml
File metadata and controls
116 lines (103 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
name: Download File or Web page
on:
issues:
types:
- labeled
workflow_dispatch:
inputs:
url:
description: 'URL of the file or Web page to download'
required: true
type: string
env:
R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }}
jobs:
Fetch-and-Save:
if: github.event_name == 'workflow_dispatch' || (github.event_name == 'issues' && github.event.label.name == 'crawler')
runs-on: ubuntu-latest
permissions:
contents: write
issues: write
steps:
- uses: pnpm/action-setup@v4
with:
version: 10
- uses: actions/setup-node@v4
with:
node-version: 22
- uses: actions/checkout@v4
- id: issue-parser
if: github.event_name == 'issues'
uses: stefanbuck/github-issue-parser@v3
with:
template-path: ".github/ISSUE_TEMPLATE/crawler.yml"
- name: Set URL & MIME variables
env:
URL: ${{ github.event_name == 'workflow_dispatch' && inputs.url || (github.event_name == 'issues' && steps.issue-parser.outputs.issueparser_url) }}
run: |
cat >> $GITHUB_ENV <<EOF
FILE_URL=$URL
FILE_PATH=$(echo "$URL" | grep -oP '^\w+://[^/]+/?\K([^?]*)')
MIME_TYPE=$(curl -sI "$URL" | grep -oPi 'Content-Type:\s+\K(\w+\/\w+)')
EOF
- name: Set Path variables
run: |
pnpm i mime -g
FILE_PATH="${{ env.FILE_PATH }}.$(mime -r ${{ env.MIME_TYPE }})"
FOLDER_PATH="$(dirname $FILE_PATH)"
cat >> $GITHUB_ENV <<EOF
FILE_PATH=$FILE_PATH
FOLDER_PATH=$FOLDER_PATH
EOF
mkdir -p $FOLDER_PATH
- name: Comment Cache URLs
if: github.event_name == 'issues'
env:
GH_TOKEN: ${{ github.token }}
run: |
cat >> comment.md <<EOF
- The GitHub action URL is: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
- JSDelivr URL: https://cdn.jsdelivr.net/gh/${{ github.repository }}@${{ github.ref_name }}/${{ env.FILE_PATH }}
EOF
gh issue comment ${{ github.event.issue.number }} --body-file comment.md
- name: Download the File
if: env.MIME_TYPE != 'text/html'
run: curl -s -o "${{ env.FILE_PATH }}" "${{ env.FILE_URL }}"
- uses: browser-actions/setup-chrome@v1
if: env.MIME_TYPE == 'text/html'
- name: Download the Web page & its assets
if: env.MIME_TYPE == 'text/html'
run: |
cd ${{ env.FOLDER_PATH }}
pnpx web-fetch "${{ env.FILE_URL }}"
- uses: ryand56/r2-upload-action@master
if: env.R2_ACCOUNT_ID != ''
with:
r2-account-id: ${{ secrets.R2_ACCOUNT_ID }}
r2-access-key-id: ${{ secrets.R2_ACCESS_KEY_ID }}
r2-secret-access-key: ${{ secrets.R2_SECRET_ACCESS_KEY }}
r2-bucket: ${{ secrets.R2_BUCKET }}
source-dir: "${{ env.FILE_PATH }}"
destination-dir: "${{ env.FOLDER_PATH }}"
- uses: ryand56/r2-upload-action@master
if: env.MIME_TYPE == 'text/html'
with:
r2-account-id: ${{ secrets.R2_ACCOUNT_ID }}
r2-access-key-id: ${{ secrets.R2_ACCESS_KEY_ID }}
r2-secret-access-key: ${{ secrets.R2_SECRET_ACCESS_KEY }}
r2-bucket: ${{ secrets.R2_BUCKET }}
source-dir: "${{ env.FILE_PATH }}.html"
destination-dir: "${{ env.FOLDER_PATH }}"
- name: Git commit
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
git add .
if [ "${{ github.event_name }}" = "issues" ]; then
git commit -m "[add] ${{ github.event.issue.title }} (closes #${{ github.event.issue.number }})"
else
git commit -m "[add] Cache file from workflow_dispatch: ${{ inputs.url }}"
fi
git fetch
git rebase origin/main
git push