Analyze Python Repositories #1179
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # .github/workflows/analyze_python_repos.yml | |
| name: Analyze Python Repositories | |
| on: | |
| workflow_dispatch: # Manual trigger | |
| schedule: | |
| - cron: '0 */6 * * *' # Run daily at midnight UTC | |
| jobs: | |
| analyze: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 360 # 6 hour timeout (GitHub Actions limit) | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install requests | |
| - name: Download previous data | |
| run: | | |
| # Create data directory if it doesn't exist | |
| mkdir -p data | |
| # Download previous results if they exist in artifacts | |
| if curl -s -f -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ | |
| "https://api.github.com/repos/${{ github.repository }}/actions/artifacts" | \ | |
| grep -q "data-archive"; then | |
| echo "Downloading previous data archive" | |
| # Get artifact ID | |
| ARTIFACT_ID=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ | |
| "https://api.github.com/repos/${{ github.repository }}/actions/artifacts" | \ | |
| jq -r '.artifacts[] | select(.name=="data-archive") | .id' | head -n 1) | |
| # Download and extract artifact | |
| curl -L -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ | |
| -o artifact.zip \ | |
| "https://api.github.com/repos/${{ github.repository }}/actions/artifacts/$ARTIFACT_ID/zip" | |
| unzip -o artifact.zip -d data/ | |
| rm artifact.zip | |
| else | |
| echo "No previous data archive found" | |
| fi | |
| - name: Run analysis | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| python scripts/main.py \ | |
| --repos-file data/repos.jsonl \ | |
| --imports-file data/imports.jsonl \ | |
| --processed-file data/processed_repos.txt \ | |
| --repos-to-find 50 \ | |
| --repos-to-process 50 \ | |
| --min-stars 1 \ | |
| --language python \ | |
| --max-files 5 | |
| - name: Generate library statistics | |
| run: | | |
| # Generate library usage statistics | |
| python scripts/count_libs.py data/imports.jsonl -o data/library_counts.csv | |
| # Generate optimized dashboard data files | |
| python scripts/generate_dashboard_data.py --csv data/library_counts.csv --processed data/processed_repos.txt --output data | |
| - name: Update README with top libraries | |
| run: | | |
| # Update README.md with top 10 libraries | |
| python scripts/update_readme.py --csv data/library_counts.csv --readme README.md --top 10 | |
| - name: Archive data | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: data-archive | |
| path: data/ | |
| - name: Commit and push results | |
| run: | | |
| git config --global user.name "GitHub Actions Bot" | |
| git config --global user.email "[email protected]" | |
| # Only commit if there are changes | |
| if git status -s | grep -q "data/" || git status -s | grep -q "README.md"; then | |
| git add data/ README.md | |
| git commit -m "Update repository analysis data and README [skip ci]" | |
| git push | |
| else | |
| echo "No changes to commit" | |
| fi |