Skip to content

Commit af57edf

Browse files
Merge pull request #5 from spiralhouse/lower-python-versions
ci: added python 3.9 and 3.10 to tests
2 parents c244bb1 + c0dfbdd commit af57edf

6 files changed

Lines changed: 124 additions & 15 deletions

File tree

.github/workflows/python-package.yml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
strategy:
1616
fail-fast: false
1717
matrix:
18-
python-version: ["3.11", "3.12"]
18+
python-version: ["3.9", "3.10", "3.11", "3.12"]
1919

2020
steps:
2121
- uses: actions/checkout@v4
@@ -27,7 +27,7 @@ jobs:
2727
run: |
2828
python -m pip install --upgrade pip
2929
python -m pip install -r requirements-dev.txt
30-
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30+
python -m pip install -r requirements.txt
3131
- name: Lint with flake8
3232
run: |
3333
# stop the build if there are Python syntax errors or undefined names
@@ -36,11 +36,16 @@ jobs:
3636
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
3737
- name: Test with pytest and coverage
3838
run: |
39-
pytest --cov=scraper --cov-report=xml --cov-report=term-missing --cov-fail-under=70
40-
- name: Upload coverage report to Codecov
39+
pytest --cov --junitxml=junit.xml --cov-report=term-missing --cov-fail-under=70
40+
- name: Upload coverage to Codecov
4141
uses: codecov/codecov-action@v5
4242
with:
43-
file: ./coverage.xml
44-
fail_ci_if_error: true
4543
token: ${{ secrets.CODECOV_TOKEN }}
44+
fail_ci_if_error: true
4645
verbose: true
46+
- name: Upload test results to Codecov
47+
if: ${{ !cancelled() }}
48+
uses: codecov/test-results-action@v1
49+
with:
50+
token: ${{ secrets.CODECOV_TOKEN }}
51+

README.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Scraper
22

3-
[![Python Tests](https://github.com/johnburbridge/scraper/actions/workflows/python-package.yml/badge.svg)](https://github.com/johnburbridge/scraper/actions/workflows/python-package.yml)
4-
[![Coverage](https://codecov.io/gh/johnburbridge/scraper/branch/main/graph/badge.svg)](https://codecov.io/gh/johnburbridge/scraper)
3+
[![Python Tests](https://github.com/spiralhouse/scraper/actions/workflows/python-package.yml/badge.svg)](https://github.com/spiralhouse/scraper/actions/workflows/python-package.yml)
4+
[![Coverage](https://codecov.io/gh/spiralhouse/scraper/branch/main/graph/badge.svg)](https://codecov.io/gh/spiralhouse/scraper)
55

66
A flexible web crawler that recursively crawls websites, respects robots.txt, and provides various output options.
77

@@ -15,7 +15,7 @@ A flexible web crawler that recursively crawls websites, respects robots.txt, an
1515

1616
1. Clone the repository:
1717
```bash
18-
git clone https://github.com/johnburbridge/scraper.git
18+
git clone https://github.com/spiralhouse/scraper.git
1919
cd scraper
2020
```
2121

@@ -30,6 +30,12 @@ source venv/bin/activate # On Windows: venv\Scripts\activate
3030
pip install -r requirements.txt
3131
```
3232

33+
## Requirements
34+
35+
- **Python**: Compatible with Python 3.9, 3.10, 3.11, and 3.12
36+
- All dependencies are listed in the `requirements.txt` file and are automatically installed during the installation process.
37+
- Some optional dependencies are available for development in `requirements-dev.txt`.
38+
3339
## Basic Usage
3440

3541
To start crawling a website:

docs/develop.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ This guide provides instructions for setting up a development environment, runni
1616

1717
1. Clone the repository:
1818
```bash
19-
git clone https://github.com/johnburbridge/scraper.git
19+
git clone https://github.com/spiralhouse/scraper.git
2020
cd scraper
2121
```
2222

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ httpcore==1.0.7
2424
httpx==0.28.1
2525
idna==3.10
2626
ipykernel==6.29.5
27-
ipython==9.0.2
27+
ipython>=7.34.0; python_version < '3.11'
28+
ipython==9.0.2; python_version >= '3.11'
2829
ipython_pygments_lexers==1.1.1
2930
isoduration==20.11.0
3031
jedi==0.19.2

scripts/check_py_compat.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to verify Python version compatibility with project requirements.
4+
This helps identify packages that might not be compatible with specific Python versions.
5+
"""
6+
7+
import sys
8+
import subprocess
9+
import tempfile
10+
import os
11+
import platform
12+
from pathlib import Path
13+
14+
def check_requirements(requirements_file):
15+
"""Test if all packages in the requirements file can be installed"""
16+
print(f"Checking compatibility of {requirements_file} with Python {platform.python_version()}")
17+
18+
with tempfile.TemporaryDirectory() as tmpdir:
19+
# Create a virtual environment in the temp directory
20+
venv_dir = os.path.join(tmpdir, "venv")
21+
subprocess.run([sys.executable, "-m", "venv", venv_dir], check=True)
22+
23+
# Determine pip path
24+
if sys.platform.startswith('win'):
25+
pip_path = os.path.join(venv_dir, "Scripts", "pip")
26+
else:
27+
pip_path = os.path.join(venv_dir, "bin", "pip")
28+
29+
# Upgrade pip
30+
subprocess.run([pip_path, "install", "--upgrade", "pip"], check=True)
31+
32+
# Test installing the requirements
33+
try:
34+
subprocess.run(
35+
[pip_path, "install", "-r", requirements_file],
36+
check=True,
37+
capture_output=True,
38+
text=True
39+
)
40+
print(f"✅ All packages in {requirements_file} are compatible with Python {platform.python_version()}")
41+
return True
42+
except subprocess.CalledProcessError as e:
43+
print(f"❌ Some packages in {requirements_file} are NOT compatible with Python {platform.python_version()}")
44+
print("Error details:")
45+
print(e.stdout)
46+
print(e.stderr)
47+
return False
48+
49+
def main():
50+
"""Main function"""
51+
proj_root = Path(__file__).parent.parent
52+
53+
# Check both requirements files
54+
req_files = [
55+
proj_root / "requirements.txt",
56+
proj_root / "requirements-dev.txt"
57+
]
58+
59+
success = True
60+
for req_file in req_files:
61+
if req_file.exists():
62+
if not check_requirements(req_file):
63+
success = False
64+
65+
return 0 if success else 1
66+
67+
if __name__ == "__main__":
68+
sys.exit(main())

tests/test_crawler.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,35 @@
1111

1212
def async_run(coro):
1313
"""Helper function to run coroutines in tests with a fresh event loop."""
14-
loop = asyncio.new_event_loop()
15-
asyncio.set_event_loop(loop)
14+
try:
15+
# Try to get an existing event loop
16+
loop = asyncio.get_event_loop()
17+
if loop.is_closed():
18+
loop = asyncio.new_event_loop()
19+
asyncio.set_event_loop(loop)
20+
except RuntimeError:
21+
# Create a new event loop if there isn't one
22+
loop = asyncio.new_event_loop()
23+
asyncio.set_event_loop(loop)
24+
1625
try:
1726
return loop.run_until_complete(coro)
1827
finally:
19-
loop.close()
20-
asyncio.set_event_loop(None)
28+
# Clean up but don't close the loop as it might be reused
29+
pass
2130

2231

2332
class TestCrawler(unittest.TestCase):
2433
"""Tests for the Crawler class."""
2534

2635
def setUp(self):
2736
"""Set up test fixtures."""
37+
# Create and set an event loop for Python 3.9 compatibility
38+
try:
39+
asyncio.get_event_loop()
40+
except RuntimeError:
41+
asyncio.set_event_loop(asyncio.new_event_loop())
42+
2843
self.crawler = Crawler(
2944
max_depth=2,
3045
concurrency_limit=5,
@@ -35,6 +50,20 @@ def setUp(self):
3550
def tearDown(self):
3651
"""Clean up after tests."""
3752
self.crawler.close()
53+
# Reset the event loop for next test
54+
try:
55+
# Get the current event loop
56+
loop = asyncio.get_event_loop()
57+
# If the loop is running, stop it
58+
if loop.is_running():
59+
loop.stop()
60+
# Close it
61+
loop.close()
62+
except RuntimeError:
63+
pass # No event loop exists
64+
finally:
65+
# Reset to None to clean up
66+
asyncio.set_event_loop(None)
3867

3968
def test_is_allowed_domain_same_domain(self):
4069
"""Test that same domain is always allowed."""

0 commit comments

Comments
 (0)