Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| import time | |
| import json | |
| import google.generativeai as genai | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_google_genai import GoogleGenerativeAI | |
| from dotenv import load_dotenv | |
| # Configure page | |
| st.set_page_config( | |
| page_title="GitHub Repository Analyzer", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| # Add custom CSS | |
| st.markdown(""" | |
| <style> | |
| .metric-card { | |
| background-color: #f0f2f6; | |
| padding: 20px; | |
| border-radius: 10px; | |
| margin: 10px 0; | |
| } | |
| .repo-card { | |
| background-color: white; | |
| padding: 20px; | |
| border-radius: 10px; | |
| margin: 10px 0; | |
| border: 1px solid #e6e6e6; | |
| } | |
| .stTextArea textarea { | |
| height: 200px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Initialize session state | |
| if 'analysis_complete' not in st.session_state: | |
| st.session_state.analysis_complete = False | |
| if 'github_token' not in st.session_state: | |
| st.session_state.github_token = "" | |
| if 'gemini_key' not in st.session_state: | |
| st.session_state.gemini_key = "" | |
| def initialize_api(github_token, gemini_key): | |
| """Initialize API configurations""" | |
| try: | |
| headers = {"Authorization": f"token {github_token}", "Accept": "application/vnd.github.v3+json"} | |
| # Configure Gemini API | |
| genai.configure(api_key=gemini_key) | |
| llm = GoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2, google_api_key=gemini_key) | |
| # Test the configuration | |
| test_response = llm.invoke("Test") | |
| return headers, llm | |
| except Exception as e: | |
| st.error(f"Error initializing APIs: {str(e)}") | |
| st.error("Please ensure your API keys are correct and try again.") | |
| return None, None | |
| def get_github_repos(username, headers): | |
| """Fetch repositories from a user's GitHub profile.""" | |
| url = f"https://api.github.com/users/{username}/repos" | |
| response = requests.get(url, headers=headers) | |
| if response.status_code == 200: | |
| return response.json() | |
| else: | |
| st.error(f"Failed to fetch repositories. Status code: {response.status_code}") | |
| return [] | |
| def get_repo_details(username, repo_name, headers): | |
| """Fetch README, latest commits, and repo structure.""" | |
| readme_url = f"https://api.github.com/repos/{username}/{repo_name}/readme" | |
| commits_url = f"https://api.github.com/repos/{username}/{repo_name}/commits" | |
| contents_url = f"https://api.github.com/repos/{username}/{repo_name}/contents" | |
| languages_url = f"https://api.github.com/repos/{username}/{repo_name}/languages" | |
| readme_content = "" | |
| commit_messages = [] | |
| file_structure = [] | |
| languages_used = [] | |
| with st.spinner(f"Fetching details for {repo_name}..."): | |
| # Fetch README | |
| readme_response = requests.get(readme_url, headers=headers) | |
| if readme_response.status_code == 200: | |
| readme_content = requests.get(readme_response.json()['download_url']).text | |
| # Fetch latest 5 commits | |
| commit_response = requests.get(commits_url, headers=headers) | |
| if commit_response.status_code == 200: | |
| commit_messages = [commit['commit']['message'] for commit in commit_response.json()[:5]] | |
| # Fetch file structure | |
| content_response = requests.get(contents_url, headers=headers) | |
| if content_response.status_code == 200: | |
| file_structure = [file['name'] for file in content_response.json()] | |
| # Fetch languages used | |
| lang_response = requests.get(languages_url, headers=headers) | |
| if lang_response.status_code == 200: | |
| languages_used = list(lang_response.json().keys()) | |
| return readme_content, commit_messages, file_structure, languages_used | |
| def analyze_repo_and_jd_match(readme, file_structure, commits, languages, jd, llm): | |
| """Use Gemini AI to analyze repository and match with JD.""" | |
| prompt_template = PromptTemplate( | |
| input_variables=["readme", "files", "commits", "languages", "jd"], | |
| template=""" | |
| You are an AI technical recruiter. Analyze the following GitHub project details and job description: | |
| Job Description: | |
| {jd} | |
| Repository Details: | |
| README: {readme} | |
| File Structure: {files} | |
| Commit Messages: {commits} | |
| Languages: {languages} | |
| Provide output as structured JSON: | |
| {{ | |
| "languages": ["list of languages"], | |
| "tech_stack": ["list of frameworks & libraries"], | |
| "algorithms": ["list of key algorithms used"], | |
| "complexity": "low/medium/high", | |
| "commit_activity": "active/moderate/inactive", | |
| "jd_match_score": "1-100", | |
| "jd_match_reasons": ["list of reasons why this repository matches or doesn't match the JD"] | |
| }} | |
| """ | |
| ) | |
| try: | |
| response = llm.invoke(prompt_template.format( | |
| readme=readme, | |
| files=", ".join(file_structure), | |
| commits=", ".join(commits), | |
| languages=", ".join(languages), | |
| jd=jd | |
| )) | |
| json_start = response.find("{") | |
| json_end = response.rfind("}") + 1 | |
| json_data = json.loads(response[json_start:json_end].strip()) | |
| return json_data | |
| except Exception as e: | |
| st.error(f"Error analyzing repository: {e}") | |
| return { | |
| "languages": [], | |
| "tech_stack": [], | |
| "algorithms": [], | |
| "complexity": "unknown", | |
| "commit_activity": "unknown", | |
| "jd_match_score": 0, | |
| "jd_match_reasons": [] | |
| } | |
| def calculate_repo_score(analysis_data): | |
| """Calculate a score for a repository based on its analysis and JD match.""" | |
| base_score = 0 | |
| # Score based on number of languages (max 10 points) | |
| base_score += min(len(analysis_data['languages']) * 2, 10) | |
| # Score based on tech stack (max 15 points) | |
| base_score += min(len(analysis_data['tech_stack']) * 3, 15) | |
| # Score based on algorithms (max 15 points) | |
| base_score += min(len(analysis_data['algorithms']) * 3, 15) | |
| # Score based on complexity (max 30 points) | |
| complexity_scores = {"low": 10, "medium": 20, "high": 30, "unknown": 0} | |
| base_score += complexity_scores.get(analysis_data['complexity'].lower(), 0) | |
| # Score based on commit activity (max 30 points) | |
| activity_scores = {"inactive": 10, "moderate": 20, "active": 30, "unknown": 0} | |
| base_score += activity_scores.get(analysis_data['commit_activity'].lower(), 0) | |
| # Include JD match score in final calculation | |
| jd_match_score = float(analysis_data.get('jd_match_score', 0)) | |
| # Final score is weighted average of base score and JD match score | |
| final_score = (base_score * 0.6) + (jd_match_score * 0.4) | |
| return round(final_score) | |
| def evaluate_candidate(total_score, num_repos): | |
| """Evaluate candidate suitability based on average repository score.""" | |
| if num_repos == 0: | |
| return "Unable to evaluate - no repositories found" | |
| avg_score = total_score / num_repos | |
| if avg_score >= 75: | |
| return "Highly Suitable" | |
| elif avg_score >= 50: | |
| return "Moderately Suitable" | |
| elif avg_score >= 25: | |
| return "Potentially Suitable" | |
| else: | |
| return "Not Suitable" | |
| def display_repo_analysis(repo_name, analysis_data, repo_score): | |
| """Display repository analysis in Streamlit.""" | |
| with st.expander(f"π {repo_name} - Score: {repo_score}/100", expanded=True): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("### π Technical Details") | |
| st.write("**Languages:**", ", ".join(analysis_data['languages'])) | |
| st.write("**Tech Stack:**", ", ".join(analysis_data['tech_stack']) if analysis_data['tech_stack'] else "None detected") | |
| st.write("**Algorithms:**", ", ".join(analysis_data['algorithms']) if analysis_data['algorithms'] else "None detected") | |
| with col2: | |
| st.markdown("### π Metrics") | |
| st.write("**Complexity:**", analysis_data['complexity'].capitalize()) | |
| st.write("**Commit Activity:**", analysis_data['commit_activity'].capitalize()) | |
| st.write("**JD Match Score:**", f"{analysis_data.get('jd_match_score', 0)}/100") | |
| st.progress(repo_score/100) | |
| if analysis_data.get('jd_match_reasons'): | |
| st.markdown("### π― JD Match Analysis") | |
| for reason in analysis_data['jd_match_reasons']: | |
| st.write(f"- {reason}") | |
| def analyze_github_repos(username, headers, llm, jd): | |
| """Analyze GitHub projects and generate summaries.""" | |
| repos = get_github_repos(username, headers) | |
| if not repos: | |
| st.error("No repositories found or failed to fetch repositories.") | |
| return [] | |
| results = [] | |
| total_score = 0 | |
| progress_bar = st.progress(0) | |
| for idx, repo in enumerate(repos): | |
| repo_name = repo['name'] | |
| with st.spinner(f"Analyzing {repo_name}..."): | |
| readme, commits, file_structure, languages = get_repo_details(username, repo_name, headers) | |
| analysis_data = analyze_repo_and_jd_match(readme, file_structure, commits, languages, jd, llm) | |
| repo_score = calculate_repo_score(analysis_data) | |
| total_score += repo_score | |
| results.append((repo_name, analysis_data, repo_score)) | |
| progress_bar.progress((idx + 1) / len(repos)) | |
| time.sleep(1) | |
| progress_bar.empty() | |
| return results, total_score | |
| def main(): | |
| st.title("π GitHub Repository Analyzer") | |
| st.markdown(""" | |
| This tool analyzes GitHub repositories to evaluate technical capabilities and project quality. | |
| Please provide the required information below to begin the analysis. | |
| """) | |
| # API Keys input | |
| with st.expander("π API Configuration", expanded=True): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| github_token = st.text_input("GitHub Token", type="password", | |
| value=st.session_state.get('github_token', '')) | |
| with col2: | |
| gemini_key = st.text_input("Google Gemini API Key", type="password", | |
| value=st.session_state.get('gemini_key', '')) | |
| # Job Description input | |
| st.subheader("π Job Description") | |
| jd = st.text_area("Paste the job description here", height=200) | |
| # GitHub username input | |
| username = st.text_input("π€ Enter GitHub Username") | |
| # Save API keys to session state | |
| if github_token: | |
| st.session_state.github_token = github_token | |
| if gemini_key: | |
| st.session_state.gemini_key = gemini_key | |
| if st.button("Analyze Repositories") and username and jd and github_token and gemini_key: | |
| headers, llm = initialize_api(github_token, gemini_key) | |
| if headers and llm: | |
| with st.spinner("Analyzing repositories..."): | |
| repo_analysis, total_score = analyze_github_repos(username, headers, llm, jd) | |
| if repo_analysis: | |
| num_repos = len(repo_analysis) | |
| # Display overall summary | |
| st.header("π Analysis Summary") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Total Repositories", num_repos) | |
| with col2: | |
| avg_score = round(total_score / num_repos if num_repos > 0 else 0) | |
| st.metric("Average Repository Score", f"{avg_score}/100") | |
| with col3: | |
| suitability = evaluate_candidate(total_score, num_repos) | |
| st.metric("Candidate Suitability", suitability) | |
| # Display individual repository analysis | |
| st.header("π Repository Details") | |
| sorted_analysis = sorted(repo_analysis, key=lambda x: x[2], reverse=True) | |
| for repo_name, analysis_data, repo_score in sorted_analysis: | |
| display_repo_analysis(repo_name, analysis_data, repo_score) | |
| # Export option | |
| if st.button("Export Analysis"): | |
| export_data = { | |
| "username": username, | |
| "total_repos": num_repos, | |
| "average_score": avg_score, | |
| "suitability": suitability, | |
| "repositories": [ | |
| { | |
| "name": repo_name, | |
| "score": repo_score, | |
| "analysis": analysis_data | |
| } | |
| for repo_name, analysis_data, repo_score in sorted_analysis | |
| ] | |
| } | |
| st.download_button( | |
| "Download Analysis Report", | |
| data=json.dumps(export_data, indent=2), | |
| file_name=f"github_analysis_{username}.json", | |
| mime="application/json" | |
| ) | |
| else: | |
| st.error("No repositories found or analysis failed.") | |
| else: | |
| st.error("Failed to initialize APIs. Please check your API keys and try again.") | |
| if __name__ == "__main__": | |
| main() |