Spaces:

akazmi
/

resume_scanner

Build error

App Files Files Community

akazmi commited on Jan 3, 2025

Commit

1d4d6a7

verified ·

1 Parent(s): d4f3e5d

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -40

app.py CHANGED Viewed

@@ -1,44 +1,122 @@
-from transformers import pipeline
 import streamlit as st
-# Function to summarize text using Hugging Face Transformers
-def summarize_text(text, model_name="facebook/bart-large-cnn"):
-    summarizer = pipeline("summarization", model=model_name)
-    summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
-    return summary[0]["summary_text"]
-# Streamlit UI additions
-if st.button("Analyze Resumes"):
-    if not uploaded_files:
-        st.error("Please upload at least one resume.")
     else:
-        # Extract text from resumes
-        resumes = [extract_text(file) for file in uploaded_files]
-        resumes = [resume for resume in resumes if resume.strip()]  # Filter out empty files
-        if not resumes:
-            st.error("No valid text extracted from resumes. Please check your files.")
         else:
-            # Combine job description and resumes for analysis
-            documents = [job_description] + resumes
-            # Extract keywords and calculate similarity
-            vectorizer, tfidf_matrix = extract_keywords(documents)
-            similarities = calculate_similarity(tfidf_matrix)
-            # Display match scores and summaries
-            st.subheader("Resume Analysis")
-            for i, file in enumerate(uploaded_files):
-                st.write(f"**Resume {i+1}: {file.name}**")
-                st.write(f"Match Score: {similarities[0][i + 1] * 100:.2f}%")
-                # Generate and display summary
-                resume_summary = summarize_text(resumes[i])
-                st.write("**Summary:**")
-                st.write(resume_summary)
-                # Display weightage basis (keywords match)
-                job_keywords = set(vectorizer.get_feature_names_out())
-                resume_keywords = set(resumes[i].lower().split())
-                matched_keywords = job_keywords.intersection(resume_keywords)
-                st.write("**Matched Keywords:**", ", ".join(matched_keywords))
-                st.write("---")

 import streamlit as st
+import os
+from groq import Groq
+import numpy as np
+import re
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from docx import Document
+from PyPDF2 import PdfReader
+from transformers import pipeline
+# Initialize Groq client
+client = Groq(
+    api_key=os.environ.get("GROQ_API_KEY"),
+)
+# Initialize HuggingFace summarization pipeline
+summarizer = pipeline("summarization")
+# Function to get Groq analysis of the job description
+def groq_chat_completion(prompt):
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": prompt,
+            }
+        ],
+        model="llama3-8b-8192",
+    )
+    return chat_completion.choices[0].message.content
+# Function to extract text from uploaded files
+def extract_text(file):
+    if file.type == "text/plain":
+        return file.read().decode("utf-8")
+    elif file.type == "application/pdf":
+        pdf_reader = PdfReader(file)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text() or ""
+        return text
+    elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        doc = Document(file)
+        text = ""
+        for para in doc.paragraphs:
+            text += para.text + "\n"
+        return text
     else:
+        return ""
+# Function to extract keywords and calculate similarity
+def extract_keywords(documents):
+    vectorizer = TfidfVectorizer(stop_words="english")
+    tfidf_matrix = vectorizer.fit_transform(documents)
+    return vectorizer, tfidf_matrix
+def calculate_similarity(tfidf_matrix):
+    similarity_matrix = cosine_similarity(tfidf_matrix)
+    return similarity_matrix
+# Function to generate summary for each resume
+def generate_summary(text):
+    if len(text.split()) > 200:  # Summarize only if the text is long enough
+        summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
+        return summary[0]['summary_text']
+    return text  # Return original text if it's short
+# Streamlit UI
+st.title("Detail Job Creator and Resume Scanner")
+st.write("Analyze resumes and match them with job descriptions.")
+# Upload job description and display Groq analysis first
+st.subheader("Job Description")
+job_description = st.text_area(
+    "Paste the job description here:",
+    height=150,
+)
+if job_description:
+    st.subheader("Groq Analysis")
+    groq_response = groq_chat_completion(job_description)
+    st.write("Groq's analysis of the job description:")
+    st.write(groq_response)
+    # Proceed with resume upload only if job description is provided
+    st.subheader("Upload Resumes")
+    uploaded_files = st.file_uploader(
+        "Upload resume files (Text, Word, or PDF):",
+        accept_multiple_files=True,
+        type=["txt", "docx", "pdf"]
+    )
+    if st.button("Analyze Resumes"):
+        if not uploaded_files:
+            st.error("Please upload at least one resume.")
         else:
+            # Extract text from resumes
+            resumes = [extract_text(file) for file in uploaded_files]
+            resumes = [resume for resume in resumes if resume.strip()]  # Filter out empty files
+            if not resumes:
+                st.error("No valid text extracted from resumes. Please check your files.")
+            else:
+                # Combine job description and resumes for analysis
+                documents = [job_description] + resumes
+                # Extract keywords and calculate similarity
+                vectorizer, tfidf_matrix = extract_keywords(documents)
+                similarities = calculate_similarity(tfidf_matrix)
+                # Display match scores and summaries
+                st.subheader("Resume Match Scores and Summaries")
+                for i, file in enumerate(uploaded_files):
+                    st.write(f"**Resume {i+1}: {file.name}**")
+                    st.write(f"Match Score: {similarities[0][i + 1] * 100:.2f}%")
+                    # Generate and display summary
+                    summary = generate_summary(resumes[i])
+                    st.write("**Summary:**")
+                    st.write(summary)
+                    st.write("---")