Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| from groq import Groq | |
| import numpy as np | |
| import re | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from docx import Document | |
| from PyPDF2 import PdfReader | |
| from transformers import pipeline | |
| # Initialize Groq client | |
| client = Groq( | |
| api_key=os.environ.get("GROQ_API_KEY"), | |
| ) | |
| # Initialize HuggingFace summarization pipeline | |
| summarizer = pipeline("summarization") | |
| # Function to get Groq analysis of the job description | |
| def groq_chat_completion(prompt): | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": prompt, | |
| } | |
| ], | |
| model="llama3-8b-8192", | |
| ) | |
| return chat_completion.choices[0].message.content | |
| # Function to extract text from uploaded files | |
| def extract_text(file): | |
| if file.type == "text/plain": | |
| return file.read().decode("utf-8") | |
| elif file.type == "application/pdf": | |
| pdf_reader = PdfReader(file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() or "" | |
| return text | |
| elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| doc = Document(file) | |
| text = "" | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| return text | |
| else: | |
| return "" | |
| # Function to extract keywords and calculate similarity | |
| def extract_keywords(documents): | |
| vectorizer = TfidfVectorizer(stop_words="english") | |
| tfidf_matrix = vectorizer.fit_transform(documents) | |
| return vectorizer, tfidf_matrix | |
| def calculate_similarity(tfidf_matrix): | |
| similarity_matrix = cosine_similarity(tfidf_matrix) | |
| return similarity_matrix | |
| # Function to generate summary for each resume | |
| def generate_summary(text): | |
| if len(text.split()) > 200: # Summarize only if the text is long enough | |
| summary = summarizer(text, max_length=150, min_length=50, do_sample=False) | |
| return summary[0]['summary_text'] | |
| return text # Return original text if it's short | |
| # Streamlit UI | |
| st.title("Detail Job Creator and Resume Scanner") | |
| st.write("Analyze resumes and match them with job descriptions.") | |
| # Upload job description and display Groq analysis first | |
| st.subheader("Job Description") | |
| job_description = st.text_area( | |
| "Paste the job description here:", | |
| height=150, | |
| ) | |
| if job_description: | |
| st.subheader("Groq Analysis") | |
| groq_response = groq_chat_completion(job_description) | |
| st.write("Groq's analysis of the job description:") | |
| st.write(groq_response) | |
| # Proceed with resume upload only if job description is provided | |
| st.subheader("Upload Resumes") | |
| uploaded_files = st.file_uploader( | |
| "Upload resume files (Text, Word, or PDF):", | |
| accept_multiple_files=True, | |
| type=["txt", "docx", "pdf"] | |
| ) | |
| if st.button("Analyze Resumes"): | |
| if not uploaded_files: | |
| st.error("Please upload at least one resume.") | |
| else: | |
| # Extract text from resumes | |
| resumes = [extract_text(file) for file in uploaded_files] | |
| resumes = [resume for resume in resumes if resume.strip()] # Filter out empty files | |
| if not resumes: | |
| st.error("No valid text extracted from resumes. Please check your files.") | |
| else: | |
| # Combine job description and resumes for analysis | |
| documents = [job_description] + resumes | |
| # Extract keywords and calculate similarity | |
| vectorizer, tfidf_matrix = extract_keywords(documents) | |
| similarities = calculate_similarity(tfidf_matrix) | |
| # Display match scores and summaries | |
| st.subheader("Resume Match Scores and Summaries") | |
| for i, file in enumerate(uploaded_files): | |
| st.write(f"**Resume {i+1}: {file.name}**") | |
| st.write(f"Match Score: {similarities[0][i + 1] * 100:.2f}%") | |
| # Generate and display summary | |
| summary = generate_summary(resumes[i]) | |
| st.write("**Summary:**") | |
| st.write(summary) | |
| st.write("---") | |