Spaces:

DreamStream-1
/

HR-Test

Sleeping

App Files Files Community

DreamStream-1 commited on Nov 15, 2024

Commit

36492c8

verified ·

1 Parent(s): 1cf7d5f

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -105

app.py CHANGED Viewed

@@ -1,182 +1,198 @@
 import os
 import pandas as pd
 import google.generativeai as genai
-import PyPDF2
 import io
 import re
 import streamlit as st
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
-# Set API Key
-api_key = os.getenv("GOOGLE_API_KEY")
 if not api_key:
-    st.error("API key not found. Please set GOOGLE_API_KEY in your environment variables.")
-    st.stop()
-# Configure Generative AI client
 genai.configure(api_key=api_key)
-# Generate Response using Gemini Flash 1.5
-def generate_with_gemini(prompt, model="gemini-1p5", max_output_tokens=256):
-    """
-    Generate a response using the Gemini Flash 1.5 model.
-    Args:
-        prompt (str): Input prompt for the AI model.
-        model (str): Model to use (default: "gemini-1p5").
-        max_output_tokens (int): Limit for the generated output tokens.
-    Returns:
-        str: Generated text response from the model.
-    """
-    try:
-        response = genai.generate_text(
-            model=model,
-            prompt=prompt,
-            temperature=0.7,
-            max_output_tokens=max_output_tokens
-        )
-        return response.result  # Adjust this if response structure differs
-    except Exception as e:
-        return f"Error generating text: {str(e)}"
-# Extract Text from Uploaded PDF
-def extract_text_from_pdf(file):
     """
-    Extract text from uploaded PDF file.
     Args:
-        file (UploadedFile): PDF file uploaded via Streamlit.
     Returns:
-        str: Extracted text or error message.
     """
     try:
-        reader = PyPDF2.PdfReader(io.BytesIO(file.read()))
-        text = ''.join(page.extract_text() for page in reader.pages)
         return text.strip()
     except Exception as e:
-        st.error(f"Error extracting text from PDF: {str(e)}")
         return ""
-# Extract Contact Information
 def extract_contact_info(text):
     """
-    Extract email and phone number from text using regex.
     Args:
-        text (str): Input text.
     Returns:
-        tuple: Extracted email and phone number or "Not Available".
     """
     email = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
-    phone = re.search(r"\+?[\d\s().-]{7,15}", text)
-    return (email.group(0) if email else "Not Available",
             phone.group(0) if phone else "Not Available")
-# Extract Management Experience
-def extract_management_experience(text):
     """
-    Extract management and leadership keywords and years.
     Args:
-        text (str): Input resume text.
     Returns:
-        tuple: Total years of experience and matching keywords.
     """
-    keywords = ["manager", "team lead", "director", "executive", "supervisor", "leadership", "head"]
-    patterns = [
-        r"(\d+)\s?(years|yrs|year)\s?of\s?(management|leadership)",
-        r"(\d+)\s?(years|yrs|year)\s?experience\s?(managing|leading)"
-    ]
-    found_keywords = [kw for kw in keywords if kw in text.lower()]
-    years = sum(int(match[0]) for pattern in patterns for match in re.findall(pattern, text))
-    return years, ", ".join(found_keywords) if found_keywords else "Not Available"
-# Calculate Match Percentage
 def calculate_match_percentage(resume_text, job_description):
     """
-    Calculate similarity between resume and job description using TF-IDF.
     Args:
-        resume_text (str): Resume content.
-        job_description (str): Job description.
     Returns:
         float: Match percentage (0-100).
     """
     try:
-        vectorizer = TfidfVectorizer(stop_words='english')
-        tfidf_matrix = vectorizer.fit_transform([resume_text, job_description])
-        cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
-        return round(cosine_sim[0][0] * 100, 2)
     except Exception as e:
-        st.error(f"Error calculating match percentage: {str(e)}")
-        return 0.0
-# Streamlit User Interface
-st.title("Resume ATS Analysis Tool: Powered by Gemini Flash 1.5")
-st.markdown("### Upload a Resume PDF and Enter a Job Description")
-uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"])
 job_description = st.text_area("Job Description", height=200)
 if uploaded_file and job_description.strip():
     if st.button("Analyze"):
-        # Extract resume text
-        resume_text = extract_text_from_pdf(uploaded_file)
         if not resume_text:
-            st.error("Failed to extract text from PDF. Ensure the file is valid.")
             st.stop()
-        # Extract contact information
         email, phone = extract_contact_info(resume_text)
-        # Extract management experience
-        management_years, management_keywords = extract_management_experience(resume_text)
-        # Calculate match percentage
         match_percentage = calculate_match_percentage(resume_text, job_description)
-        # Generate AI analysis
-        prompt = f"""
-        Analyze the resume with respect to the job description.
-        Resume Text: {resume_text}
-        Job Description: {job_description}
-        Provide details:
-        - Key Skills
-        - Education
-        - Management Experience (Years)
-        - Leadership Keywords
-        - Match Percentage
-        """
-        gemini_response = generate_with_gemini(prompt)
-        # Display results
         results = {
             "Email": email,
             "Contact": phone,
-            "Management Experience (Years)": management_years,
-            "Leadership Keywords": management_keywords,
             "Match Percentage": match_percentage,
-            "AI Summary": gemini_response
         }
         st.write(pd.DataFrame([results]))
-        # Allow CSV download
         csv = pd.DataFrame([results]).to_csv(index=False)
         st.download_button(
-            "Download Results",
             data=csv,
             file_name="resume_analysis_results.csv",
             mime="text/csv"
         )
 else:
-    st.info("Please upload a resume and enter a job description to proceed.")

 import os
 import pandas as pd
 import google.generativeai as genai
+import PyPDF2 as pdf
 import io
 import re
 import streamlit as st
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+import torch
+# Set API key for Google Generative AI
+api_key = os.getenv('GOOGLE_API_KEY')
 if not api_key:
+    raise ValueError("API key not found. Please set GOOGLE_API_KEY as an environment variable.")
+# Initialize the generative AI client
 genai.configure(api_key=api_key)
+# Load Hugging Face pipelines and models
+skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
+education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
+# Sentiment analysis using Hugging Face RoBERTa
+task = "sentiment-analysis"
+model_name = "roberta-base"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+# Function to extract text from uploaded PDF
+def extract_pdf_text(uploaded_file):
     """
+    Extract text from the uploaded PDF file.
     Args:
+        uploaded_file: Streamlit uploaded file object.
     Returns:
+        str: Extracted text content.
     """
     try:
+        file_stream = io.BytesIO(uploaded_file.read())
+        reader = pdf.PdfReader(file_stream)
+        text = "".join([page.extract_text() for page in reader.pages])
         return text.strip()
     except Exception as e:
+        st.error(f"Error extracting text from PDF: {e}")
         return ""
+# Function to extract email and phone numbers
 def extract_contact_info(text):
     """
+    Extract email and phone number using regex.
     Args:
+        text: Extracted text content from the resume.
     Returns:
+        tuple: Extracted email and phone number.
     """
     email = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
+    phone = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}", text)
+    return (email.group(0) if email else "Not Available",
             phone.group(0) if phone else "Not Available")
+# Function to extract skills using NER
+def extract_skills(text):
     """
+    Extract skills from resume text using NER.
     Args:
+        text: Resume text.
     Returns:
+        str: Comma-separated skills or "Not Available".
     """
+    ner_results = skill_extractor(text)
+    skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
+    return ", ".join(skills) if skills else "Not Available"
+# Function to extract education details
+def extract_education(text):
+    """
+    Extract education information using NER and regex.
+    Args:
+        text: Resume text.
+    Returns:
+        str: Extracted education details.
+    """
+    ner_results = education_extractor(text)
+    education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
+    if education_entities:
+        return ", ".join(education_entities)
+    else:
+        education_patterns = [
+            r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Engineering|Data Science)",
+            r"(University of [A-Za-z]+)"
+        ]
+        matches = []
+        for pattern in education_patterns:
+            matches.extend(re.findall(pattern, text))
+        return ", ".join(matches) if matches else "Not Available"
+# Function to calculate match percentage using TF-IDF
 def calculate_match_percentage(resume_text, job_description):
     """
+    Calculate the match percentage using TF-IDF and cosine similarity.
     Args:
+        resume_text: Resume text.
+        job_description: Job description.
     Returns:
         float: Match percentage (0-100).
     """
+    documents = [resume_text, job_description]
+    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
+    tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
+    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
+    return round(cosine_sim[0][0] * 100, 2)
+# Function to analyze resume with Gemini Flash 1.5
+def analyze_with_gemini(resume_text, job_description):
+    """
+    Use Gemini Flash 1.5 to generate an ATS analysis.
+    Args:
+        resume_text: Text content of the resume.
+        job_description: Job description content.
+    Returns:
+        str: AI-generated analysis.
+    """
+    prompt = f"""
+    Act as an advanced ATS. Analyze the resume and job description.
+    Resume: {resume_text}
+    Job Description: {job_description}
+    Extract:
+    - Candidate Name
+    - Skills
+    - Education
+    - Leadership Experience (years)
+    - Match Percentage
+    Provide a summary of the candidate's strengths in bullet points.
+    """
     try:
+        response = genai.generate_text(
+            model="gemini-1p5",
+            prompt=prompt,
+            temperature=0.7,
+            max_output_tokens=500
+        )
+        return response.result
     except Exception as e:
+        return f"Error generating analysis: {e}"
+# Streamlit Interface
+st.title("Resume ATS Analysis Tool")
+st.markdown("### Upload Resume PDF and Enter Job Description for Analysis")
+uploaded_file = st.file_uploader("Upload Resume (PDF format)", type=["pdf"])
 job_description = st.text_area("Job Description", height=200)
 if uploaded_file and job_description.strip():
     if st.button("Analyze"):
+        resume_text = extract_pdf_text(uploaded_file)
         if not resume_text:
+            st.error("No text extracted from PDF. Please upload a valid file.")
             st.stop()
+        # Extract candidate details
         email, phone = extract_contact_info(resume_text)
+        skills = extract_skills(resume_text)
+        education = extract_education(resume_text)
         match_percentage = calculate_match_percentage(resume_text, job_description)
+        gemini_analysis = analyze_with_gemini(resume_text, job_description)
+        # Prepare the results
         results = {
             "Email": email,
             "Contact": phone,
+            "Skills": skills,
+            "Education": education,
             "Match Percentage": match_percentage,
+            "Gemini Analysis": gemini_analysis
         }
+        # Display results
         st.write(pd.DataFrame([results]))
+        # Allow download as CSV
         csv = pd.DataFrame([results]).to_csv(index=False)
         st.download_button(
+            label="Download Results as CSV",
             data=csv,
             file_name="resume_analysis_results.csv",
             mime="text/csv"
         )
 else:
+    st.info("Upload a resume and provide a job description to start the analysis.")