Spaces:

DreamStream-1
/

HR-2

Build error

App Files Files Community

DreamStream-1 commited on Oct 24, 2024

Commit

5019102

verified ·

1 Parent(s): 40f08cb

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -90

app.py CHANGED Viewed

@@ -1,97 +1,89 @@
 import os
 import gradio as gr
-from transformers import pipeline
-import torch
 import PyPDF2
-import io
-import re
-from datetime import datetime
-# Initialize sentiment analysis pipeline
-sentiment_analyzer = pipeline("sentiment-analysis")
 def extract_text_from_pdf(file):
-    """Extract text from uploaded PDF file"""
     if file is None:
         return ""
     try:
-        pdf_reader = PyPDF2.PdfReader(io.BytesIO(file))
         text = ""
         for page in pdf_reader.pages:
-            text += page.extract_text()
         return text
     except Exception as e:
         return f"Error extracting PDF text: {str(e)}"
 def extract_text_from_file(file):
-    """Extract text from uploaded file (PDF or TXT)"""
     if file is None:
         return ""
-    file_content = file.read()
     if file.name.endswith('.pdf'):
-        return extract_text_from_pdf(file_content)
     elif file.name.endswith('.txt'):
-        return file_content.decode('utf-8')
     else:
         return "Unsupported file format. Please upload PDF or TXT files only."
 def extract_skills(text):
-    """Extract skills from text using keyword matching"""
-    # Common programming languages and technologies
-    skills_keywords = [
-        'python', 'java', 'javascript', 'react', 'angular', 'vue', 'node.js',
-        'sql', 'mongodb', 'aws', 'docker', 'kubernetes', 'machine learning',
-        'artificial intelligence', 'data science', 'html', 'css', 'git'
-    ]
-    found_skills = []
-    for skill in skills_keywords:
-        if re.search(r'\b' + re.escape(skill) + r'\b', text.lower()):
-            found_skills.append(skill)
-    return found_skills
-def extract_education(text):
-    """Extract education information from text"""
-    education_patterns = [
-        r'\b(B\.?S\.?|B\.?A\.?|M\.?S\.?|M\.?A\.?|Ph\.?D\.?|Bachelor\'?s?|Master\'?s?|Doctorate)\b',
-        r'\b(Computer Science|Information Technology|Software Engineering|Information Systems)\b'
-    ]
-    education = []
-    for pattern in education_patterns:
-        matches = re.finditer(pattern, text, re.IGNORECASE)
-        education.extend(match.group() for match in matches)
-    return list(set(education))
-def extract_experience(text):
-    """Extract years of experience and job titles"""
-    experience_pattern = r'(\d+)\+?\s*(?:years?|yrs?)(?:\s+of)?\s+experience'
-    job_titles_pattern = r'\b(Software Engineer|Developer|Architect|Manager|Lead|Director)\b'
-    experience_matches = re.findall(experience_pattern, text, re.IGNORECASE)
-    years = [int(year) for year in experience_matches]
-    job_titles = re.findall(job_titles_pattern, text)
     return {
-        'years': max(years) if years else 0,
-        'titles': list(set(job_titles))
     }
 def calculate_match_percentage(resume_skills, job_skills):
-    """Calculate the match percentage between resume skills and job requirements"""
     if not job_skills:
         return 0
     matching_skills = set(resume_skills).intersection(set(job_skills))
     return (len(matching_skills) / len(job_skills)) * 100
 def analyze_resume_and_job(resume_file, job_desc_file):
-    """Main function to analyze resume and job description"""
     try:
         # Extract text from files
         resume_text = extract_text_from_file(resume_file)
@@ -104,26 +96,27 @@ def analyze_resume_and_job(resume_file, job_desc_file):
         # Extract information from resume
         resume_skills = extract_skills(resume_text)
-        resume_education = extract_education(resume_text)
-        resume_experience = extract_experience(resume_text)
         # Extract information from job description
         job_skills = extract_skills(job_desc_text)
-        job_education = extract_education(job_desc_text)
-        job_experience = extract_experience(job_desc_text)
         # Calculate match percentages
         skills_match = calculate_match_percentage(resume_skills, job_skills)
-        # Analyze sentiment of resume
-        sentiment_result = sentiment_analyzer(resume_text[:512])[0]
         # Prepare analysis results
         summary = f"""
 ### Summary Analysis
 - Overall Skills Match: {skills_match:.1f}%
-- Experience: {resume_experience['years']} years
-- Sentiment: {sentiment_result['label']} ({sentiment_result['score']:.2f})
         """
         skills = f"""
@@ -141,42 +134,32 @@ Missing Skills:
         qualifications = f"""
 ### Qualifications
 Education Found:
-{', '.join(resume_education)}
 Required Education:
-{', '.join(job_education)}
         """
-        experience = f"""
-### Experience Analysis
-- Years of Experience: {resume_experience['years']}
-- Recent Positions: {', '.join(resume_experience['titles'])}
-- Required Experience: {job_experience['years']} years
-        """
-        # Generate recommendation
-        if skills_match >= 70 and resume_experience['years'] >= job_experience['years']:
-            recommendation = "Strong Match - Recommended for interview"
         elif skills_match >= 50:
-            recommendation = "Moderate Match - Consider for interview with focus on missing skills"
         else:
-            recommendation = "Low Match - May not meet core requirements"
         recommendation = f"""
 ### Recommendation
 {recommendation}
-Key Strengths:
-- {'High' if skills_match >= 70 else 'Moderate' if skills_match >= 50 else 'Low'} skill match
-- {'Sufficient' if resume_experience['years'] >= job_experience['years'] else 'Insufficient'} experience
         """
         return {
             "summary": summary.strip(),
             "skills": skills.strip(),
             "qualifications": qualifications.strip(),
-            "experience": experience.strip(),
-            "recommendation": recommendation.strip()
         }
     except Exception as e:
@@ -203,10 +186,10 @@ def create_interface():
                 skills_output = gr.Markdown()
             with gr.TabItem("Qualifications"):
                 qualifications_output = gr.Markdown()
-            with gr.TabItem("Experience"):
-                experience_output = gr.Markdown()
             with gr.TabItem("Recommendation"):
                 recommendation_output = gr.Markdown()
         def analyze(resume_file, job_desc_file):
             if not resume_file or not job_desc_file:
@@ -221,15 +204,14 @@ def create_interface():
                 result["summary"],
                 result["skills"],
                 result["qualifications"],
-                result["experience"],
-                result["recommendation"]
             )
         analyze_button.click(
             analyze,
             inputs=[resume_input, job_desc_input],
-            outputs=[summary_output, skills_output, qualifications_output,
-                    experience_output, recommendation_output]
         )
     return demo

 import os
 import gradio as gr
+import requests
 import PyPDF2
+import spacy
+# Load spaCy for NER tasks
+nlp = spacy.load("en_core_web_sm")
+# Set up your Groq API endpoint and API key
+GROQ_API_URL = "https://api.groq.com/v1/llama"
+GROQ_API_KEY = "YOUR_API_KEY"  # Replace with your actual API key
 def extract_text_from_pdf(file):
+    """Extract text from uploaded PDF file."""
     if file is None:
         return ""
     try:
+        pdf_reader = PyPDF2.PdfReader(file)
         text = ""
         for page in pdf_reader.pages:
+            page_text = page.extract_text() or ""
+            text += page_text
         return text
     except Exception as e:
         return f"Error extracting PDF text: {str(e)}"
 def extract_text_from_file(file):
+    """Extract text from uploaded file (PDF or TXT)."""
     if file is None:
         return ""
     if file.name.endswith('.pdf'):
+        return extract_text_from_pdf(file)
     elif file.name.endswith('.txt'):
+        return file.read().decode('utf-8')
     else:
         return "Unsupported file format. Please upload PDF or TXT files only."
 def extract_skills(text):
+    """Extract skills from text using a pre-trained NER model."""
+    doc = nlp(text)
+    skills = [ent.text for ent in doc.ents if ent.label_ == "SKILL"]
+    return list(set(skills))
+def extract_education_and_experience(text):
+    """Extract education and experience information from text using NER."""
+    doc = nlp(text)
+    education = [ent.text for ent in doc.ents if ent.label_ in ["EDUCATION", "DEGREE"]]
+    experience = [ent.text for ent in doc.ents if ent.label_ == "EXPERIENCE"]
     return {
+        'education': list(set(education)),
+        'experience': list(set(experience))
     }
 def calculate_match_percentage(resume_skills, job_skills):
+    """Calculate the match percentage between resume skills and job requirements."""
     if not job_skills:
         return 0
     matching_skills = set(resume_skills).intersection(set(job_skills))
     return (len(matching_skills) / len(job_skills)) * 100
+def call_groq_api(prompt):
+    """Call the Groq API with the prompt and return the response."""
+    headers = {
+        "Authorization": f"Bearer {GROQ_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "model": "llama3-8b-8192",  # Use the specified LLaMA model
+        "prompt": prompt,
+        "max_tokens": 150  # Adjust as needed
+    }
+    response = requests.post(GROQ_API_URL, headers=headers, json=payload)
+    if response.status_code == 200:
+        return response.json().get("output", "No output received.")
+    else:
+        return f"API call failed with status {response.status_code}: {response.text}"
 def analyze_resume_and_job(resume_file, job_desc_file):
+    """Main function to analyze resume and job description."""
     try:
         # Extract text from files
         resume_text = extract_text_from_file(resume_file)
         # Extract information from resume
         resume_skills = extract_skills(resume_text)
+        resume_info = extract_education_and_experience(resume_text)
         # Extract information from job description
         job_skills = extract_skills(job_desc_text)
+        job_info = extract_education_and_experience(job_desc_text)
         # Calculate match percentages
         skills_match = calculate_match_percentage(resume_skills, job_skills)
+        # Prepare input for LLaMA via Groq API
+        input_prompt = f"Analyze the following resume: {resume_text[:300]} and job description: {job_desc_text[:300]}."
+        # Call Groq API to analyze using LLaMA
+        llama_analysis = call_groq_api(input_prompt)
         # Prepare analysis results
         summary = f"""
 ### Summary Analysis
 - Overall Skills Match: {skills_match:.1f}%
+- Experience Found: {', '.join(resume_info['experience'])}
+- Education Found: {', '.join(resume_info['education'])}
         """
         skills = f"""
         qualifications = f"""
 ### Qualifications
 Education Found:
+{', '.join(resume_info['education'])}
 Required Education:
+{', '.join(job_info['education'])}
         """
+        # Generate recommendation based on skills match
+        recommendation = "Recommendation based on skills match and experience."
+        if skills_match >= 70:
+            recommendation = "Strong Match - Recommended for interview."
         elif skills_match >= 50:
+            recommendation = "Moderate Match - Consider for interview with focus on missing skills."
         else:
+            recommendation = "Low Match - May not meet core requirements."
         recommendation = f"""
 ### Recommendation
 {recommendation}
         """
         return {
             "summary": summary.strip(),
             "skills": skills.strip(),
             "qualifications": qualifications.strip(),
+            "recommendation": recommendation.strip(),
+            "llama_analysis": llama_analysis.strip()
         }
     except Exception as e:
                 skills_output = gr.Markdown()
             with gr.TabItem("Qualifications"):
                 qualifications_output = gr.Markdown()
             with gr.TabItem("Recommendation"):
                 recommendation_output = gr.Markdown()
+            with gr.TabItem("LLaMA Analysis"):
+                llama_output = gr.Markdown()
         def analyze(resume_file, job_desc_file):
             if not resume_file or not job_desc_file:
                 result["summary"],
                 result["skills"],
                 result["qualifications"],
+                result["recommendation"],
+                result["llama_analysis"]
             )
         analyze_button.click(
             analyze,
             inputs=[resume_input, job_desc_input],
+            outputs=[summary_output, skills_output, qualifications_output, recommendation_output, llama_output]
         )
     return demo