Spaces:

DreamStream-1
/

HR-Test

Sleeping

App Files Files Community

DreamStream-1 commited on Nov 15, 2024

Commit

fb835f9

verified ·

1 Parent(s): 36492c8

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -183

app.py CHANGED Viewed

@@ -1,198 +1,91 @@
-import os
-import pandas as pd
-import google.generativeai as genai
-import PyPDF2 as pdf
-import io
-import re
 import streamlit as st
-from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
-import torch
-# Set API key for Google Generative AI
-api_key = os.getenv('GOOGLE_API_KEY')
-if not api_key:
-    raise ValueError("API key not found. Please set GOOGLE_API_KEY as an environment variable.")
-# Initialize the generative AI client
-genai.configure(api_key=api_key)
-# Load Hugging Face pipelines and models
-skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
-education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
-# Sentiment analysis using Hugging Face RoBERTa
-task = "sentiment-analysis"
-model_name = "roberta-base"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
-# Function to extract text from uploaded PDF
-def extract_pdf_text(uploaded_file):
-    """
-    Extract text from the uploaded PDF file.
-    Args:
-        uploaded_file: Streamlit uploaded file object.
-    Returns:
-        str: Extracted text content.
-    """
-    try:
-        file_stream = io.BytesIO(uploaded_file.read())
-        reader = pdf.PdfReader(file_stream)
-        text = "".join([page.extract_text() for page in reader.pages])
-        return text.strip()
-    except Exception as e:
-        st.error(f"Error extracting text from PDF: {e}")
-        return ""
-# Function to extract email and phone numbers
-def extract_contact_info(text):
-    """
-    Extract email and phone number using regex.
-    Args:
-        text: Extracted text content from the resume.
-    Returns:
-        tuple: Extracted email and phone number.
-    """
-    email = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
-    phone = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}", text)
-    return (email.group(0) if email else "Not Available",
-            phone.group(0) if phone else "Not Available")
-# Function to extract skills using NER
-def extract_skills(text):
-    """
-    Extract skills from resume text using NER.
-    Args:
-        text: Resume text.
-    Returns:
-        str: Comma-separated skills or "Not Available".
-    """
-    ner_results = skill_extractor(text)
-    skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
-    return ", ".join(skills) if skills else "Not Available"
-# Function to extract education details
-def extract_education(text):
-    """
-    Extract education information using NER and regex.
-    Args:
-        text: Resume text.
-    Returns:
-        str: Extracted education details.
-    """
-    ner_results = education_extractor(text)
-    education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
-    if education_entities:
-        return ", ".join(education_entities)
-    else:
-        education_patterns = [
-            r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Engineering|Data Science)",
-            r"(University of [A-Za-z]+)"
-        ]
-        matches = []
-        for pattern in education_patterns:
-            matches.extend(re.findall(pattern, text))
-        return ", ".join(matches) if matches else "Not Available"
-# Function to calculate match percentage using TF-IDF
-def calculate_match_percentage(resume_text, job_description):
-    """
-    Calculate the match percentage using TF-IDF and cosine similarity.
-    Args:
-        resume_text: Resume text.
-        job_description: Job description.
-    Returns:
-        float: Match percentage (0-100).
-    """
-    documents = [resume_text, job_description]
-    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
-    tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
     cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
-    return round(cosine_sim[0][0] * 100, 2)
-# Function to analyze resume with Gemini Flash 1.5
-def analyze_with_gemini(resume_text, job_description):
-    """
-    Use Gemini Flash 1.5 to generate an ATS analysis.
-    Args:
-        resume_text: Text content of the resume.
-        job_description: Job description content.
-    Returns:
-        str: AI-generated analysis.
-    """
     prompt = f"""
-    Act as an advanced ATS. Analyze the resume and job description.
     Resume: {resume_text}
     Job Description: {job_description}
-    Extract:
-    - Candidate Name
-    - Skills
-    - Education
-    - Leadership Experience (years)
-    - Match Percentage
-    Provide a summary of the candidate's strengths in bullet points.
     """
-    try:
-        response = genai.generate_text(
-            model="gemini-1p5",
-            prompt=prompt,
-            temperature=0.7,
-            max_output_tokens=500
-        )
-        return response.result
-    except Exception as e:
-        return f"Error generating analysis: {e}"
-# Streamlit Interface
-st.title("Resume ATS Analysis Tool")
-st.markdown("### Upload Resume PDF and Enter Job Description for Analysis")
-uploaded_file = st.file_uploader("Upload Resume (PDF format)", type=["pdf"])
-job_description = st.text_area("Job Description", height=200)
-if uploaded_file and job_description.strip():
-    if st.button("Analyze"):
-        resume_text = extract_pdf_text(uploaded_file)
-        if not resume_text:
-            st.error("No text extracted from PDF. Please upload a valid file.")
-            st.stop()
-        # Extract candidate details
-        email, phone = extract_contact_info(resume_text)
-        skills = extract_skills(resume_text)
-        education = extract_education(resume_text)
-        match_percentage = calculate_match_percentage(resume_text, job_description)
-        gemini_analysis = analyze_with_gemini(resume_text, job_description)
-        # Prepare the results
-        results = {
-            "Email": email,
-            "Contact": phone,
-            "Skills": skills,
-            "Education": education,
-            "Match Percentage": match_percentage,
-            "Gemini Analysis": gemini_analysis
-        }
-        # Display results
-        st.write(pd.DataFrame([results]))
-        # Allow download as CSV
-        csv = pd.DataFrame([results]).to_csv(index=False)
-        st.download_button(
-            label="Download Results as CSV",
-            data=csv,
-            file_name="resume_analysis_results.csv",
-            mime="text/csv"
-        )
-else:
-    st.info("Upload a resume and provide a job description to start the analysis.")

+import spacy
 import streamlit as st
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+import PyPDF2
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from gemini_flash import GeminiFlash  # Assuming Gemini Flash is installed
+# Ensure that NLTK's stopwords are available
+nltk.download('punkt')
+nltk.download('stopwords')
+# Load spaCy model for NER
+nlp = spacy.load("en_core_web_sm")
+# Initialize Gemini Flash for prompt engineering
+prompt_engineer = GeminiFlash()
+# Streamlit Interface
+st.title("AI Resume and Job Description Analyzer")
+# Step 1: Resume Upload
+uploaded_file = st.file_uploader("Upload Resume (PDF)", type="pdf")
+if uploaded_file is not None:
+    # Read the PDF file
+    pdf_reader = PyPDF2.PdfReader(uploaded_file)
+    resume_text = ""
+    for page in range(len(pdf_reader.pages)):
+        resume_text += pdf_reader.pages[page].extract_text()
+    # Display the resume text
+    st.text_area("Resume Text", resume_text, height=300)
+# Step 2: Job Description Input
+job_description = st.text_area("Enter Job Description")
+if job_description:
+    # Preprocess job description using NLTK
+    stop_words = set(stopwords.words("english"))
+    tokens = word_tokenize(job_description)
+    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
+    ps = nltk.PorterStemmer()
+    stemmed_tokens = [ps.stem(word) for word in filtered_tokens]
+    preprocessed_job_description = " ".join(stemmed_tokens)
+    # Display preprocessed job description
+    st.text_area("Processed Job Description", preprocessed_job_description)
+# Step 3: Named Entity Recognition (NER) on Resume
+if resume_text:
+    doc = nlp(resume_text)
+    entities = [(ent.text, ent.label_) for ent in doc.ents]
+    # Display extracted entities
+    st.subheader("Named Entities from Resume")
+    st.write(entities)
+# Step 4: Candidate-Job Relevance Using Cosine Similarity
+if resume_text and job_description:
+    vectorizer = TfidfVectorizer()
+    tfidf_matrix = vectorizer.fit_transform([job_description, resume_text])
     cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
+    st.write(f"Cosine Similarity (Relevance): {cosine_sim[0][0]:.2f}")
+# Step 5: Gemini Flash - Prompt Engineering (using Gemini Flash to craft relevant prompts for an LLM)
+if resume_text and job_description:
     prompt = f"""
+    Given the resume text and job description, evaluate how well the candidate's qualifications match the job requirements.
     Resume: {resume_text}
     Job Description: {job_description}
+    Based on the information provided, generate a detailed match score between the candidate and the job.
     """
+    # Use Gemini Flash to refine and enhance the prompt (assuming Gemini Flash enhances the prompt)
+    enhanced_prompt = prompt_engineer.refine_prompt(prompt)
+    # Display the enhanced prompt (for debugging or transparency)
+    st.subheader("Enhanced Prompt for LLM")
+    st.write(enhanced_prompt)
+    # Here you would typically pass the `enhanced_prompt` to a large language model (LLM) API or model for evaluation
+    # For demonstration purposes, assume a function `get_llm_response` exists that interacts with a model.
+    # response = get_llm_response(enhanced_prompt)
+    # st.write("LLM Response:", response)