Spaces:

DreamStream-1
/

CDF-HR

Sleeping

App Files Files Community

DreamStream-1 commited on Nov 25, 2024

Commit

9a1feff

verified ·

1 Parent(s): f8a1025

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -134

app.py CHANGED Viewed

@@ -1,42 +1,68 @@
 import streamlit as st
 import requests
-from PyPDF2 import PdfReader
-from docx import Document
 import pandas as pd
-# Set up API key for Google Generative Language
-API_KEY = st.secrets["GOOGLE_API_KEY"]
-def extract_text_from_pdf(pdf_file):
-    """Extract text from PDF file using PyPDF2."""
-    reader = PdfReader(pdf_file)
-    text = ""
-    for page in reader.pages:
-        text += page.extract_text()
-    return text
-def extract_text_from_docx(docx_file):
-    """Extract text from DOCX file."""
-    doc = Document(docx_file)
-    text = ""
-    for para in doc.paragraphs:
-        text += para.text + "\n"
-    return text
 def analyze_documents(resume_text, job_description):
     """Analyze resume text against the job description using Gemini 1.5 Flash."""
     custom_prompt = f"""
     Please analyze the following resume in the context of the job description provided.
-    Provide the following information:
-    1. The candidate's name, contact number, and email address extracted from the resume.
-    2. A numeric match percentage (0-100%) based on the alignment of the resume with the job description.
-    3. A list of missing keywords that are present in the job description but not in the resume.
-    4. Any recommendations to improve the resume for better alignment with the job description.
     Job Description: {job_description}
     Resume: {resume_text}
     """
     url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
     headers = {'Content-Type': 'application/json'}
     data = {
@@ -47,93 +73,45 @@ def analyze_documents(resume_text, job_description):
     response = requests.post(url, headers=headers, json=data)
     return response.json()
-def display_resume(file, index):
-    """Display uploaded resume content."""
-    file_type = file.name.split('.')[-1].lower()
-    unique_key = f"{file.name}_{index}"  # Ensure the key is unique by appending an index
-    if file_type == 'pdf':
-        text = extract_text_from_pdf(file)
-        st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
-    elif file_type == 'docx':
-        text = extract_text_from_docx(file)
-        st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
-    else:
-        st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
-def analyze_multiple_resumes(resumes, job_description):
-    """Analyze multiple resumes and display the results."""
-    results = []
-    for index, resume in enumerate(resumes):
-        resume.seek(0)  # Reset file pointer
-        file_type = resume.name.split('.')[-1].lower()
-        # Extract resume text based on file type
-        if file_type == 'pdf':
-            resume_text = extract_text_from_pdf(resume)
-        elif file_type == 'docx':
-            resume_text = extract_text_from_docx(resume)
-        # Analyze the resume text using the model
-        analysis = analyze_documents(resume_text, job_description)
-        # Extract details from the model's response
-        name, email, phone, match_percentage = "N/A", "N/A", "N/A", 0
-        missing_keywords = []
-        if "candidates" in analysis:
-            for candidate in analysis["candidates"]:
-                if "content" in candidate and "parts" in candidate["content"]:
-                    for part in candidate["content"]["parts"]:
-                        response_text = part["text"]
-                        st.write(response_text)  # Optional: Display the response for debugging
-                        # Extract details based on patterns in the response
-                        lines = response_text.split("\n")
-                        for line in lines:
-                            line_lower = line.lower()
-                            if "name:" in line_lower:
-                                name = line.split(":")[-1].strip()
-                            elif "email:" in line_lower:
-                                email = line.split(":")[-1].strip()
-                            elif "contact:" in line_lower:
-                                phone = line.split(":")[-1].strip()
-                            elif "match percentage" in line_lower:
-                                # Extract numeric match percentage
-                                percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
-                                if percentage_str:
-                                    try:
-                                        match_percentage = int(percentage_str)
-                                        if match_percentage > 100:
-                                            match_percentage = 100
-                                    except ValueError:
-                                        match_percentage = 0
-                            elif "missing keywords" in line_lower:
-                                missing_keywords = line.split(":")[-1].strip().split(", ")
-        # Append results for the table
-        results.append({
-            "Name": name,
-            "Contact": phone,
-            "Email": email,
-            "Match Percentage": match_percentage,
-            "Missing Keywords": ", ".join(missing_keywords)
-        })
-    # Create a DataFrame for the results
-    df = pd.DataFrame(results)
-    # Display the table
-    st.write("### Candidate Match Summary")
-    st.dataframe(df)
-    # Downloadable CSV
-    csv = df.to_csv(index=False)
-    st.download_button(
-        label="📥 Download Results as CSV",
-        data=csv,
-        file_name="resume_analysis_results.csv",
-        mime="text/csv",
-    )
 # Streamlit app configuration
 st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
@@ -148,27 +126,20 @@ st.markdown(
     """, unsafe_allow_html=True
 )
 st.markdown('<div class="title">📝🔍🌟 ATS Resume Evaluation System</div>', unsafe_allow_html=True)
-st.markdown('<div class="subtitle">Upload up to 10 resumes and analyze them against the job description</div>', unsafe_allow_html=True)
-# Inputs: Job description and multiple resume file uploads
-st.sidebar.header("Upload Your Inputs")
-job_description = st.sidebar.text_area("Enter the Job Description:", height=250)
-resumes = st.sidebar.file_uploader("Upload Your Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
-# Display uploaded resume content
-if resumes:
-    for index, resume in enumerate(resumes):
-        with st.expander(f"📄 Uploaded Resume Content - {resume.name}", expanded=True):
-            st.write("### Extracted Text from Resume")
-            display_resume(resume, index)
-# Analyze button
-if st.sidebar.button("Analyze Resumes"):
-    if job_description and resumes:
-        if len(resumes) <= 10:  # Limit to a maximum of 10 resumes
-            with st.spinner("Analyzing..."):
-                analyze_multiple_resumes(resumes, job_description)
-        else:
-            st.error("You can upload a maximum of 10 resumes.")
     else:
-        st.error("Please provide both a job description and at least one resume file.")

+import spacy
 import streamlit as st
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
 import requests
+import re
 import pandas as pd
+# Download necessary NLTK data
+nltk.download('punkt')
+nltk.download('stopwords')
+# Load the SpaCy model
+nlp = spacy.load("en_core_web_sm")
+# Function to clean and normalize text
+def clean_and_normalize_text(text):
+    """Clean and normalize the resume/job description text."""
+    # Tokenization
+    tokens = word_tokenize(text)
+    # Lowercasing and removing non-alphabetical tokens
+    tokens = [word.lower() for word in tokens if word.isalpha()]
+    # Removing stopwords using NLTK
+    stop_words = set(stopwords.words("english"))
+    filtered_tokens = [word for word in tokens if word not in stop_words]
+    # Lemmatization using SpaCy
+    doc = nlp(' '.join(filtered_tokens))
+    lemmatized_tokens = [token.lemma_ for token in doc]
+    # Reconstruct the cleaned text
+    cleaned_text = ' '.join(lemmatized_tokens)
+    # Optionally, remove extra spaces or characters
+    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
+    return cleaned_text
+# Function for Named Entity Recognition (NER)
+def extract_named_entities(text):
+    """Extract named entities from text using SpaCy."""
+    doc = nlp(text)
+    # Extract named entities
+    entities = [(ent.text, ent.label_) for ent in doc.ents]
+    return entities
+# Function to analyze the resume and job description using Gemini 1.5 Flash model
 def analyze_documents(resume_text, job_description):
     """Analyze resume text against the job description using Gemini 1.5 Flash."""
     custom_prompt = f"""
     Please analyze the following resume in the context of the job description provided.
+    For the match percentage, please consider:
+    - The relevance of the hard skills mentioned.
+    - The match of experiences and achievements listed in the resume.
+    - Only return a 100% match if all critical skills, experiences, and keywords align well and meaningfully with the job description.
     Job Description: {job_description}
     Resume: {resume_text}
     """
     url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
     headers = {'Content-Type': 'application/json'}
     data = {
     response = requests.post(url, headers=headers, json=data)
     return response.json()
+# Streamlit interface to handle text analysis
+def process_text(resume_text, job_description):
+    """Process and analyze resume and job description text."""
+    # Clean and normalize the text
+    cleaned_resume = clean_and_normalize_text(resume_text)
+    cleaned_job_description = clean_and_normalize_text(job_description)
+    # Perform Named Entity Recognition (NER)
+    resume_entities = extract_named_entities(cleaned_resume)
+    job_desc_entities = extract_named_entities(cleaned_job_description)
+    # Refine the prompt with cleaned data and extracted entities
+    custom_prompt = f"""
+    Please analyze the following resume in the context of the job description provided.
+    Here are the named entities found in the job description:
+    {job_desc_entities}
+    Here are the named entities found in the resume:
+    {resume_entities}
+    For the match percentage, please consider:
+    - The relevance of the hard skills mentioned.
+    - The match of experiences and achievements listed in the resume.
+    - Only return a 100% match if all critical skills, experiences, and keywords align well and meaningfully with the job description.
+    Job Description: {cleaned_job_description}
+    Resume: {cleaned_resume}
+    """
+    # Call the Gemini 1.5 model
+    analysis = analyze_documents(cleaned_resume, cleaned_job_description)
+    # Extract the results from the model's response
+    results = {
+        "Match Percentage": "Not Available",  # Placeholder, modify as needed
+        "Recommendations": "Not Available"    # Placeholder, modify as needed
+    }
+    # Logic to extract results from the model response can be added here.
+    return results
 # Streamlit app configuration
 st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
     """, unsafe_allow_html=True
 )
 st.markdown('<div class="title">📝🔍🌟 ATS Resume Evaluation System</div>', unsafe_allow_html=True)
+st.markdown('<div class="subtitle">Upload your resume and job description for analysis</div>', unsafe_allow_html=True)
+# Inputs: Job description and resume file upload
+job_description = st.text_area("Enter the Job Description:", height=250)
+resume_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx"])
+# Process the uploaded resume and job description
+if resume_file:
+    if job_description:
+        resume_text = resume_file.read().decode("utf-8")  # Assuming the resume is a text file
+        result = process_text(resume_text, job_description)
+        # Display the analysis results
+        st.write(f"**Match Percentage**: {result['Match Percentage']}")
+        st.write(f"**Recommendations**: {result['Recommendations']}")
     else:
+        st.warning("Please enter the job description to begin analysis.")