Spaces:

akazmi
/

resume_scanner

Sleeping

App Files Files Community

resume_scanner / app.py

akazmi

Update app.py

1d4d6a7 verified about 1 year ago

raw

history blame contribute delete

4.3 kB

	import streamlit as st
	import os
	from groq import Groq
	import numpy as np
	import re
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from docx import Document
	from PyPDF2 import PdfReader
	from transformers import pipeline

	# Initialize Groq client
	client = Groq(
	api_key=os.environ.get("GROQ_API_KEY"),
	)

	# Initialize HuggingFace summarization pipeline
	summarizer = pipeline("summarization")

	# Function to get Groq analysis of the job description
	def groq_chat_completion(prompt):
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": prompt,
	}
	],
	model="llama3-8b-8192",
	)
	return chat_completion.choices[0].message.content

	# Function to extract text from uploaded files
	def extract_text(file):
	if file.type == "text/plain":
	return file.read().decode("utf-8")
	elif file.type == "application/pdf":
	pdf_reader = PdfReader(file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text() or ""
	return text
	elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	doc = Document(file)
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text
	else:
	return ""

	# Function to extract keywords and calculate similarity
	def extract_keywords(documents):
	vectorizer = TfidfVectorizer(stop_words="english")
	tfidf_matrix = vectorizer.fit_transform(documents)
	return vectorizer, tfidf_matrix

	def calculate_similarity(tfidf_matrix):
	similarity_matrix = cosine_similarity(tfidf_matrix)
	return similarity_matrix

	# Function to generate summary for each resume
	def generate_summary(text):
	if len(text.split()) > 200: # Summarize only if the text is long enough
	summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
	return summary[0]['summary_text']
	return text # Return original text if it's short

	# Streamlit UI
	st.title("Detail Job Creator and Resume Scanner")
	st.write("Analyze resumes and match them with job descriptions.")

	# Upload job description and display Groq analysis first
	st.subheader("Job Description")
	job_description = st.text_area(
	"Paste the job description here:",
	height=150,
	)

	if job_description:
	st.subheader("Groq Analysis")
	groq_response = groq_chat_completion(job_description)
	st.write("Groq's analysis of the job description:")
	st.write(groq_response)

	# Proceed with resume upload only if job description is provided
	st.subheader("Upload Resumes")
	uploaded_files = st.file_uploader(
	"Upload resume files (Text, Word, or PDF):",
	accept_multiple_files=True,
	type=["txt", "docx", "pdf"]
	)

	if st.button("Analyze Resumes"):
	if not uploaded_files:
	st.error("Please upload at least one resume.")
	else:
	# Extract text from resumes
	resumes = [extract_text(file) for file in uploaded_files]
	resumes = [resume for resume in resumes if resume.strip()] # Filter out empty files

	if not resumes:
	st.error("No valid text extracted from resumes. Please check your files.")
	else:
	# Combine job description and resumes for analysis
	documents = [job_description] + resumes

	# Extract keywords and calculate similarity
	vectorizer, tfidf_matrix = extract_keywords(documents)
	similarities = calculate_similarity(tfidf_matrix)

	# Display match scores and summaries
	st.subheader("Resume Match Scores and Summaries")
	for i, file in enumerate(uploaded_files):
	st.write(f"Resume {i+1}: {file.name}")
	st.write(f"Match Score: {similarities[0][i + 1] * 100:.2f}%")

	# Generate and display summary
	summary = generate_summary(resumes[i])
	st.write("Summary:")
	st.write(summary)
	st.write("---")