WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Analyze Python Repositories #1179

Analyze Python Repositories

Analyze Python Repositories #1179

Workflow file for this run

# .github/workflows/analyze_python_repos.yml
name: Analyze Python Repositories
on:
workflow_dispatch: # Manual trigger
schedule:
- cron: '0 */6 * * *' # Run daily at midnight UTC
jobs:
analyze:
runs-on: ubuntu-latest
timeout-minutes: 360 # 6 hour timeout (GitHub Actions limit)
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests
- name: Download previous data
run: |
# Create data directory if it doesn't exist
mkdir -p data
# Download previous results if they exist in artifacts
if curl -s -f -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/${{ github.repository }}/actions/artifacts" | \
grep -q "data-archive"; then
echo "Downloading previous data archive"
# Get artifact ID
ARTIFACT_ID=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/${{ github.repository }}/actions/artifacts" | \
jq -r '.artifacts[] | select(.name=="data-archive") | .id' | head -n 1)
# Download and extract artifact
curl -L -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-o artifact.zip \
"https://api.github.com/repos/${{ github.repository }}/actions/artifacts/$ARTIFACT_ID/zip"
unzip -o artifact.zip -d data/
rm artifact.zip
else
echo "No previous data archive found"
fi
- name: Run analysis
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python scripts/main.py \
--repos-file data/repos.jsonl \
--imports-file data/imports.jsonl \
--processed-file data/processed_repos.txt \
--repos-to-find 50 \
--repos-to-process 50 \
--min-stars 1 \
--language python \
--max-files 5
- name: Generate library statistics
run: |
# Generate library usage statistics
python scripts/count_libs.py data/imports.jsonl -o data/library_counts.csv
# Generate optimized dashboard data files
python scripts/generate_dashboard_data.py --csv data/library_counts.csv --processed data/processed_repos.txt --output data
- name: Update README with top libraries
run: |
# Update README.md with top 10 libraries
python scripts/update_readme.py --csv data/library_counts.csv --readme README.md --top 10
- name: Archive data
uses: actions/upload-artifact@v4
with:
name: data-archive
path: data/
- name: Commit and push results
run: |
git config --global user.name "GitHub Actions Bot"
git config --global user.email "[email protected]"
# Only commit if there are changes
if git status -s | grep -q "data/" || git status -s | grep -q "README.md"; then
git add data/ README.md
git commit -m "Update repository analysis data and README [skip ci]"
git push
else
echo "No changes to commit"
fi