Spaces:

Akshayram1
/

resume_parse

Sleeping

App Files Files Community

resume_parse / app.py

Akshayram1

Update app.py

2f91b19 verified over 1 year ago

raw

history blame contribute delete

5.91 kB

	import os
	import json
	import streamlit as st
	from PyPDF2 import PdfReader
	import docx
	from langchain import LLMChain
	from langchain.prompts import PromptTemplate
	from langsmith import Client
	from typing import List, Dict
	import langchain
	from pydantic import BaseModel

	# Initialize LangSmith Client for monitoring
	client = Client(api_key="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")

	# Load Local LLM model (For demo, we'll use a dummy function)
	def load_local_llm():
	# You can integrate a local LLM like GPT4All, or HuggingFace model here
	return lambda prompt: f"Dummy LLM response for: {prompt}"

	llm = load_local_llm()

	# LangChain setup for monitoring
	class LLMMonitor:
	def __init__(self, name: str):
	self.name = name

	def call(self, prompt: str):
	result = llm(prompt) # Execute the LLM locally

	# Log the run to LangSmith (dummy logging to console for now)
	print(f"Logging to LangSmith: {self.name} - Input: {prompt} \| Output: {result}")

	return result

	# Resume Reader Agent
	def read_resume(file_path: str) -> str:
	if file_path.endswith('.pdf'):
	reader = PdfReader(file_path)
	text = ''.join([page.extract_text() for page in reader.pages])
	elif file_path.endswith('.docx'):
	doc = docx.Document(file_path)
	text = '\n'.join([para.text for para in doc.paragraphs])
	else:
	raise ValueError("Unsupported file type. Please upload a PDF or DOCX.")
	return text

	# Entity Extraction Agent
	class EntityExtractionAgent:
	def __init__(self):
	self.monitor = LLMMonitor("EntityExtractor")

	def extract_entities(self, resume_text: str) -> Dict:
	prompt = f"Extract personal info, education, experience, and skills from this resume:\n\n{resume_text}"
	response = self.monitor.call(prompt)
	# For simplicity, we return a dummy structured data
	return {"name": "John Doe", "education": "B.Sc in CS", "experience": "2 years at XYZ", "skills": ["Python", "SQL"]}

	# Entity Validation Agent
	class EntityValidationAgent:
	def __init__(self):
	self.monitor = LLMMonitor("EntityValidator")

	def validate_entities(self, entities: Dict) -> Dict:
	# Add dummy validation logic here
	if not entities["name"] or not entities["education"]:
	feedback = "Invalid or incomplete entity data. Please correct."
	st.write(feedback)
	return {"status": "validation_failed", "entities": entities, "feedback": feedback}
	else:
	return {"status": "validated", "entities": entities}

	# Streamlit Human Feedback Loop
	def human_feedback(entities: Dict):
	st.write("Please review the extracted entities below:")
	name = st.text_input("Name", entities["name"])
	education = st.text_input("Education", entities["education"])
	experience = st.text_area("Experience", entities["experience"])
	skills = st.text_input("Skills (comma-separated)", ','.join(entities["skills"]))

	if st.button("Submit Feedback"):
	return {"name": name, "education": education, "experience": experience, "skills": skills.split(",")}
	return entities

	# Human Feedback Loop after resume reading
	def resume_feedback(resume_text: str):
	st.write("Resume Text: Please review the resume text extracted:")
	resume_text = st.text_area("Resume Text", resume_text)

	if st.button("Submit Resume Feedback"):
	st.write("Resume feedback submitted!")
	return resume_text

	# Human Feedback Loop after entity extraction
	def entity_extraction_feedback(entities: Dict):
	st.write("Extracted Entities: Please review and update the extracted entities:")
	entities = human_feedback(entities)

	if st.button("Submit Extraction Feedback"):
	st.write("Extraction feedback submitted!")
	return entities

	# Generate JSON Output
	def generate_json_output(entities: Dict) -> str:
	output_path = "resume_entities.json"
	json_data = json.dumps(entities, indent=4) # Create JSON string from the entities dictionary
	with open(output_path, 'w') as f:
	f.write(json_data)
	st.write(f"Output saved as {output_path}")
	return json_data # Return the JSON data instead of file path

	# Main function to orchestrate workflow
	def main():
	st.title("LLM-Powered Multi-Agent Resume Processor")

	uploaded_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx"])

	if uploaded_file is not None:
	file_path = os.path.join("/tmp", uploaded_file.name)
	with open(file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	# Resume Reading
	resume_text = read_resume(file_path)
	st.write("Resume successfully read!")

	# Human feedback after resume reading
	resume_text = resume_feedback(resume_text)

	# Entity Extraction
	extractor = EntityExtractionAgent()
	entities = extractor.extract_entities(resume_text)
	st.write("Entities extracted:", entities)

	# Human feedback after entity extraction
	entities = entity_extraction_feedback(entities)

	# Entity Validation
	validator = EntityValidationAgent()
	validation_result = validator.validate_entities(entities)

	if validation_result["status"] == "validation_failed":
	st.write(validation_result["feedback"])
	entities = human_feedback(validation_result["entities"])

	# Final validated entities
	st.write("Final entities:", entities)

	# Generate JSON output
	json_data = generate_json_output(entities) # Use JSON data instead of file path

	# Pass the JSON data to st.download_button
	st.download_button("Download JSON Output", data=json_data, file_name="resume_entities.json")

	# Streamlit app entry point
	if __name__ == "__main__":
	main()