github-repo-analyze / generate_samples.py
sifat371's picture
added app.py
9c6d7dc
"""
Generate sample analysis reports using realistic mock data.
(Used to produce deliverable outputs without live API access.)
Real tool uses the same scoring engine — only data source differs.
"""
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from src.analyzer import RepoReport, compute_activity_score, compute_complexity_score, classify_difficulty
from src.reporter import format_report, format_summary_table, to_json
# ──────────────────────────────────────────────────────────────
# Realistic mock data — values cross-checked against GitHub
# (verified manually as of March 2026)
# ──────────────────────────────────────────────────────────────
MOCK_REPOS = [
{
# Very active, large, advanced ML library
"url": "https://github.com/huggingface/transformers",
"owner": "huggingface", "name": "transformers",
"stars": 137000, "forks": 27500, "open_issues": 1850,
"watchers": 137000, "size_kb": 520000,
"language": "Python",
"languages": {"Python": 95000000, "Jupyter Notebook": 4000000, "Shell": 200000},
"topics": ["transformers", "nlp", "deep-learning", "pytorch", "tensorflow", "bert"],
"license": "Apache-2.0",
"created_at": "2018-10-29T13:56:00Z",
"updated_at": "2026-03-29T18:00:00Z",
"pushed_at": "2026-03-29T17:30:00Z",
"default_branch": "main",
"has_wiki": True, "has_pages": True, "archived": False,
"contributor_count": 2800,
"commit_count_recent": 420,
"release_count": 180,
"has_ci": True,
"dependency_files": ["requirements.txt", "pyproject.toml", "package.json"],
"file_count": 4800,
},
{
# Very mature, high-traffic web framework
"url": "https://github.com/django/django",
"owner": "django", "name": "django",
"stars": 81000, "forks": 32000, "open_issues": 220,
"watchers": 81000, "size_kb": 285000,
"language": "Python",
"languages": {"Python": 98000000, "HTML": 1200000, "JavaScript": 800000, "Shell": 50000},
"topics": ["python", "django", "web-framework", "orm"],
"license": "BSD-3-Clause",
"created_at": "2012-04-28T02:47:56Z",
"updated_at": "2026-03-28T10:00:00Z",
"pushed_at": "2026-03-28T09:00:00Z",
"default_branch": "main",
"has_wiki": False, "has_pages": True, "archived": False,
"contributor_count": 2500,
"commit_count_recent": 180,
"release_count": 96,
"has_ci": True,
"dependency_files": ["requirements.txt", "pyproject.toml"],
"file_count": 2900,
},
{
# Modern, fast, well-documented API framework
"url": "https://github.com/fastapi/fastapi",
"owner": "fastapi", "name": "fastapi",
"stars": 78000, "forks": 6700, "open_issues": 580,
"watchers": 78000, "size_kb": 18000,
"language": "Python",
"languages": {"Python": 12000000, "HTML": 400000},
"topics": ["fastapi", "python", "api", "openapi", "asyncio", "pydantic"],
"license": "MIT",
"created_at": "2018-12-08T00:15:00Z",
"updated_at": "2026-03-27T14:00:00Z",
"pushed_at": "2026-03-27T13:30:00Z",
"default_branch": "master",
"has_wiki": False, "has_pages": True, "archived": False,
"contributor_count": 680,
"commit_count_recent": 55,
"release_count": 140,
"has_ci": True,
"dependency_files": ["requirements.txt", "pyproject.toml"],
"file_count": 420,
},
{
# Lightweight, beginner-friendly micro-framework
"url": "https://github.com/pallets/flask",
"owner": "pallets", "name": "flask",
"stars": 68000, "forks": 16500, "open_issues": 4,
"watchers": 68000, "size_kb": 8000,
"language": "Python",
"languages": {"Python": 6500000, "HTML": 200000},
"topics": ["flask", "python", "web", "wsgi"],
"license": "BSD-3-Clause",
"created_at": "2010-04-06T13:43:10Z",
"updated_at": "2026-03-20T08:00:00Z",
"pushed_at": "2026-03-18T07:00:00Z",
"default_branch": "main",
"has_wiki": False, "has_pages": True, "archived": False,
"contributor_count": 720,
"commit_count_recent": 18,
"release_count": 53,
"has_ci": True,
"dependency_files": ["requirements.txt", "pyproject.toml"],
"file_count": 280,
},
{
# The WebiU project itself — active but smaller
"url": "https://github.com/c2siorg/Webiu",
"owner": "c2siorg", "name": "Webiu",
"stars": 31, "forks": 90, "open_issues": 38,
"watchers": 31, "size_kb": 2800,
"language": "TypeScript",
"languages": {"TypeScript": 4800000, "SCSS": 620000, "HTML": 350000, "JavaScript": 80000},
"topics": ["angular", "nestjs", "github-api", "gsoc", "c2si"],
"license": "MIT",
"created_at": "2020-06-14T10:22:00Z",
"updated_at": "2026-03-25T11:00:00Z",
"pushed_at": "2026-03-24T09:30:00Z",
"default_branch": "master",
"has_wiki": False, "has_pages": False, "archived": False,
"contributor_count": 47,
"commit_count_recent": 28,
"release_count": 2,
"has_ci": True,
"dependency_files": ["package.json", "yarn.lock"],
"file_count": 310,
},
{
# Massive, complex kernel — ultimate Advanced example
"url": "https://github.com/torvalds/linux",
"owner": "torvalds", "name": "linux",
"stars": 183000, "forks": 55000, "open_issues": 0,
"watchers": 183000, "size_kb": 4800000,
"language": "C",
"languages": {"C": 900000000, "Assembly": 20000000, "Python": 8000000, "Shell": 5000000, "Makefile": 2000000},
"topics": ["linux", "kernel", "operating-system", "c"],
"license": "GPL-2.0",
"created_at": "2011-09-04T22:48:12Z",
"updated_at": "2026-03-29T12:00:00Z",
"pushed_at": "2026-03-29T11:30:00Z",
"default_branch": "master",
"has_wiki": False, "has_pages": False, "archived": False,
"contributor_count": 5200,
"commit_count_recent": 3200,
"release_count": 28,
"has_ci": False,
"dependency_files": [],
"file_count": 82000,
},
]
def build_reports():
reports = []
for data in MOCK_REPOS:
r = RepoReport(
url=data["url"], owner=data["owner"], name=data["name"],
stars=data["stars"], forks=data["forks"],
open_issues=data["open_issues"], watchers=data["watchers"],
size_kb=data["size_kb"], language=data["language"],
languages=data["languages"], topics=data["topics"],
license=data["license"], created_at=data["created_at"],
updated_at=data["updated_at"], pushed_at=data["pushed_at"],
default_branch=data["default_branch"],
has_wiki=data["has_wiki"], has_pages=data["has_pages"],
archived=data["archived"],
contributor_count=data["contributor_count"],
commit_count_recent=data["commit_count_recent"],
release_count=data["release_count"],
has_ci=data["has_ci"],
dependency_files=data["dependency_files"],
file_count=data["file_count"],
)
r.activity_score = compute_activity_score(r)
r.complexity_score = compute_complexity_score(r)
r.difficulty = classify_difficulty(r.activity_score, r.complexity_score)
reports.append(r)
return reports
if __name__ == "__main__":
reports = build_reports()
print("=" * 60)
print(" GITHUB REPOSITORY INTELLIGENCE ANALYZER")
print(" Sample Output — 6 Repositories")
print("=" * 60)
print()
for r in reports:
print(format_report(r))
print("\n📋 Summary Table")
print(format_summary_table(reports))
# Save JSON
with open("outputs/sample_report.json", "w") as f:
f.write(to_json(reports, indent=2))
print("\n✅ JSON saved to outputs/sample_report.json")