Spaces:
Running
Running
Update github_repo_analyzer.py
Browse files- github_repo_analyzer.py +43 -16
github_repo_analyzer.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
| 2 |
import sys
|
| 3 |
import tempfile
|
| 4 |
import shutil
|
| 5 |
-
from urllib.parse import urlparse
|
| 6 |
import requests
|
| 7 |
from github import Github
|
| 8 |
from git import Repo
|
|
@@ -45,8 +45,28 @@ def get_repo_info(input_str):
|
|
| 45 |
|
| 46 |
def clone_repo(owner, repo_name, temp_dir):
|
| 47 |
repo_url = f"https://github.com/{owner}/{repo_name}.git"
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
def analyze_code(repo_path):
|
| 52 |
file_types = defaultdict(int)
|
|
@@ -519,18 +539,20 @@ def llm_synthesize_findings(client, code_analysis, issues_analysis, pr_analysis)
|
|
| 519 |
|
| 520 |
return call_llm(client, prompt, max_tokens=8192)
|
| 521 |
|
| 522 |
-
def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis
|
| 523 |
repo_url = f"https://github.com/{repo_info['owner']}/{repo_info['repo_name']}"
|
| 524 |
-
|
|
|
|
| 525 |
report = f"""# LLM-Assisted Workflow Analysis for {repo_info['owner']}/{repo_info['repo_name']}
|
| 526 |
|
| 527 |
## Repository Overview
|
| 528 |
-
- Owner
|
| 529 |
-
- Repository
|
| 530 |
-
- URL
|
| 531 |
-
- File types
|
| 532 |
|
| 533 |
## Code Analysis
|
|
|
|
| 534 |
"""
|
| 535 |
|
| 536 |
if isinstance(code_analysis.get('llm_analysis'), dict):
|
|
@@ -541,12 +563,14 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
| 541 |
report += f"- {pattern}\n"
|
| 542 |
|
| 543 |
report += "\n### Best Practices\n"
|
| 544 |
-
report += "
|
| 545 |
for practice in code_llm_analysis.get('best_practices', {}).get('followed', []):
|
| 546 |
report += f"- {practice}\n"
|
| 547 |
-
report += "\n
|
|
|
|
| 548 |
for practice in code_llm_analysis.get('best_practices', {}).get('missing', []):
|
| 549 |
report += f"- {practice}\n"
|
|
|
|
| 550 |
|
| 551 |
report += "\n### Areas for Improvement\n"
|
| 552 |
for area in code_llm_analysis.get('areas_for_improvement', []):
|
|
@@ -554,12 +578,16 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
| 554 |
|
| 555 |
report += "\n### Potential Vulnerabilities\n"
|
| 556 |
for vuln in code_llm_analysis.get('potential_vulnerabilities', []):
|
| 557 |
-
|
|
|
|
|
|
|
| 558 |
|
| 559 |
report += "\n### LLM-Assisted Coding Opportunities\n"
|
| 560 |
for opp in code_llm_analysis.get('llm_opportunities', []):
|
|
|
|
|
|
|
| 561 |
report += f"- **Task:** {opp['task']}\n"
|
| 562 |
-
report += f" - **File:** `{
|
| 563 |
report += f" - **Improvement:** {opp['improvement']}\n\n"
|
| 564 |
else:
|
| 565 |
report += "No structured code analysis available.\n"
|
|
@@ -574,7 +602,7 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
| 574 |
report += "\n### Promising LLM-Assisted Workflows for Issues\n"
|
| 575 |
for workflow in issues_analysis['summary'].get('promising_workflows', []):
|
| 576 |
report += f"- **Workflow:** {workflow['workflow']}\n"
|
| 577 |
-
report += f" - **Example Issue:** [{workflow['applicable_issue']}]({repo_url}/issues/{workflow['applicable_issue']})\n\n"
|
| 578 |
|
| 579 |
report += "### Overall Recommendations for Issue Management\n"
|
| 580 |
for rec in issues_analysis['summary'].get('overall_recommendations', ['No recommendations available.']):
|
|
@@ -592,7 +620,7 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
| 592 |
report += "\n### Promising LLM-Assisted Workflows for Pull Requests\n"
|
| 593 |
for workflow in pr_analysis['summary'].get('promising_workflows', []):
|
| 594 |
report += f"- **Workflow:** {workflow['workflow']}\n"
|
| 595 |
-
report += f" - **Example PR:** [{workflow['applicable_pr']}]({repo_url}/pull/{workflow['applicable_pr']})\n\n"
|
| 596 |
|
| 597 |
report += "### Overall Recommendations for PR Process\n"
|
| 598 |
for rec in pr_analysis['summary'].get('overall_recommendations', ['No recommendations available.']):
|
|
@@ -600,5 +628,4 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
| 600 |
else:
|
| 601 |
report += "No structured pull requests analysis available.\n"
|
| 602 |
|
| 603 |
-
report += f"\n## Synthesis and Recommendations\n{final_analysis}\n"
|
| 604 |
return report
|
|
|
|
| 2 |
import sys
|
| 3 |
import tempfile
|
| 4 |
import shutil
|
| 5 |
+
from urllib.parse import urlparse, quote
|
| 6 |
import requests
|
| 7 |
from github import Github
|
| 8 |
from git import Repo
|
|
|
|
| 45 |
|
| 46 |
def clone_repo(owner, repo_name, temp_dir):
|
| 47 |
repo_url = f"https://github.com/{owner}/{repo_name}.git"
|
| 48 |
+
|
| 49 |
+
# Clone the repository
|
| 50 |
+
repo = Repo.clone_from(repo_url, temp_dir)
|
| 51 |
+
|
| 52 |
+
# Get the default branch
|
| 53 |
+
default_branch = repo.active_branch.name
|
| 54 |
+
|
| 55 |
+
# Get the full name of the repository (owner/repo_name)
|
| 56 |
+
full_name = f"{owner}/{repo_name}"
|
| 57 |
+
|
| 58 |
+
# Create the repo_info object
|
| 59 |
+
repo_info = {
|
| 60 |
+
"owner": owner,
|
| 61 |
+
"repo_name": repo_name,
|
| 62 |
+
"full_name": full_name,
|
| 63 |
+
"repo_url": repo_url,
|
| 64 |
+
"local_path": temp_dir,
|
| 65 |
+
"default_branch": default_branch,
|
| 66 |
+
"git_repo": repo
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
return repo_info
|
| 70 |
|
| 71 |
def analyze_code(repo_path):
|
| 72 |
file_types = defaultdict(int)
|
|
|
|
| 539 |
|
| 540 |
return call_llm(client, prompt, max_tokens=8192)
|
| 541 |
|
| 542 |
+
def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis):
|
| 543 |
repo_url = f"https://github.com/{repo_info['owner']}/{repo_info['repo_name']}"
|
| 544 |
+
default_branch = repo_info.get('default_branch', 'master') # Assume 'master' if not provided
|
| 545 |
+
|
| 546 |
report = f"""# LLM-Assisted Workflow Analysis for {repo_info['owner']}/{repo_info['repo_name']}
|
| 547 |
|
| 548 |
## Repository Overview
|
| 549 |
+
- **Owner:** {repo_info['owner']}
|
| 550 |
+
- **Repository:** {repo_info['repo_name']}
|
| 551 |
+
- **URL:** [{repo_url}]({repo_url})
|
| 552 |
+
- **File types:** {', '.join(f"{ext} ({count})" for ext, count in code_analysis.get('file_types', {}).items())}
|
| 553 |
|
| 554 |
## Code Analysis
|
| 555 |
+
|
| 556 |
"""
|
| 557 |
|
| 558 |
if isinstance(code_analysis.get('llm_analysis'), dict):
|
|
|
|
| 563 |
report += f"- {pattern}\n"
|
| 564 |
|
| 565 |
report += "\n### Best Practices\n"
|
| 566 |
+
report += "<details><summary>Followed</summary>\n\n"
|
| 567 |
for practice in code_llm_analysis.get('best_practices', {}).get('followed', []):
|
| 568 |
report += f"- {practice}\n"
|
| 569 |
+
report += "</details>\n\n"
|
| 570 |
+
report += "<details><summary>Missing</summary>\n\n"
|
| 571 |
for practice in code_llm_analysis.get('best_practices', {}).get('missing', []):
|
| 572 |
report += f"- {practice}\n"
|
| 573 |
+
report += "</details>\n"
|
| 574 |
|
| 575 |
report += "\n### Areas for Improvement\n"
|
| 576 |
for area in code_llm_analysis.get('areas_for_improvement', []):
|
|
|
|
| 578 |
|
| 579 |
report += "\n### Potential Vulnerabilities\n"
|
| 580 |
for vuln in code_llm_analysis.get('potential_vulnerabilities', []):
|
| 581 |
+
relative_path = os.path.relpath(vuln['file_path'], repo_info['local_path'])
|
| 582 |
+
file_url = f"{repo_url}/blob/{default_branch}/{quote(relative_path)}"
|
| 583 |
+
report += f"- [{vuln['description']}]({file_url}) (Severity: {vuln['severity']})\n"
|
| 584 |
|
| 585 |
report += "\n### LLM-Assisted Coding Opportunities\n"
|
| 586 |
for opp in code_llm_analysis.get('llm_opportunities', []):
|
| 587 |
+
relative_path = os.path.relpath(opp['file_path'], repo_info['local_path'])
|
| 588 |
+
file_url = f"{repo_url}/blob/{default_branch}/{quote(relative_path)}"
|
| 589 |
report += f"- **Task:** {opp['task']}\n"
|
| 590 |
+
report += f" - **File:** [`{relative_path}`]({file_url})\n"
|
| 591 |
report += f" - **Improvement:** {opp['improvement']}\n\n"
|
| 592 |
else:
|
| 593 |
report += "No structured code analysis available.\n"
|
|
|
|
| 602 |
report += "\n### Promising LLM-Assisted Workflows for Issues\n"
|
| 603 |
for workflow in issues_analysis['summary'].get('promising_workflows', []):
|
| 604 |
report += f"- **Workflow:** {workflow['workflow']}\n"
|
| 605 |
+
report += f" - **Example Issue:** [#{workflow['applicable_issue']}]({repo_url}/issues/{workflow['applicable_issue']})\n\n"
|
| 606 |
|
| 607 |
report += "### Overall Recommendations for Issue Management\n"
|
| 608 |
for rec in issues_analysis['summary'].get('overall_recommendations', ['No recommendations available.']):
|
|
|
|
| 620 |
report += "\n### Promising LLM-Assisted Workflows for Pull Requests\n"
|
| 621 |
for workflow in pr_analysis['summary'].get('promising_workflows', []):
|
| 622 |
report += f"- **Workflow:** {workflow['workflow']}\n"
|
| 623 |
+
report += f" - **Example PR:** [#{workflow['applicable_pr']}]({repo_url}/pull/{workflow['applicable_pr']})\n\n"
|
| 624 |
|
| 625 |
report += "### Overall Recommendations for PR Process\n"
|
| 626 |
for rec in pr_analysis['summary'].get('overall_recommendations', ['No recommendations available.']):
|
|
|
|
| 628 |
else:
|
| 629 |
report += "No structured pull requests analysis available.\n"
|
| 630 |
|
|
|
|
| 631 |
return report
|