resume_parse / app.py
Akshayram1's picture
Update app.py
2f91b19 verified
import os
import json
import streamlit as st
from PyPDF2 import PdfReader
import docx
from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langsmith import Client
from typing import List, Dict
import langchain
from pydantic import BaseModel
# Initialize LangSmith Client for monitoring
client = Client(api_key="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")
# Load Local LLM model (For demo, we'll use a dummy function)
def load_local_llm():
# You can integrate a local LLM like GPT4All, or HuggingFace model here
return lambda prompt: f"Dummy LLM response for: {prompt}"
llm = load_local_llm()
# LangChain setup for monitoring
class LLMMonitor:
def __init__(self, name: str):
self.name = name
def call(self, prompt: str):
result = llm(prompt) # Execute the LLM locally
# Log the run to LangSmith (dummy logging to console for now)
print(f"Logging to LangSmith: {self.name} - Input: {prompt} | Output: {result}")
return result
# Resume Reader Agent
def read_resume(file_path: str) -> str:
if file_path.endswith('.pdf'):
reader = PdfReader(file_path)
text = ''.join([page.extract_text() for page in reader.pages])
elif file_path.endswith('.docx'):
doc = docx.Document(file_path)
text = '\n'.join([para.text for para in doc.paragraphs])
else:
raise ValueError("Unsupported file type. Please upload a PDF or DOCX.")
return text
# Entity Extraction Agent
class EntityExtractionAgent:
def __init__(self):
self.monitor = LLMMonitor("EntityExtractor")
def extract_entities(self, resume_text: str) -> Dict:
prompt = f"Extract personal info, education, experience, and skills from this resume:\n\n{resume_text}"
response = self.monitor.call(prompt)
# For simplicity, we return a dummy structured data
return {"name": "John Doe", "education": "B.Sc in CS", "experience": "2 years at XYZ", "skills": ["Python", "SQL"]}
# Entity Validation Agent
class EntityValidationAgent:
def __init__(self):
self.monitor = LLMMonitor("EntityValidator")
def validate_entities(self, entities: Dict) -> Dict:
# Add dummy validation logic here
if not entities["name"] or not entities["education"]:
feedback = "Invalid or incomplete entity data. Please correct."
st.write(feedback)
return {"status": "validation_failed", "entities": entities, "feedback": feedback}
else:
return {"status": "validated", "entities": entities}
# Streamlit Human Feedback Loop
def human_feedback(entities: Dict):
st.write("Please review the extracted entities below:")
name = st.text_input("Name", entities["name"])
education = st.text_input("Education", entities["education"])
experience = st.text_area("Experience", entities["experience"])
skills = st.text_input("Skills (comma-separated)", ','.join(entities["skills"]))
if st.button("Submit Feedback"):
return {"name": name, "education": education, "experience": experience, "skills": skills.split(",")}
return entities
# Human Feedback Loop after resume reading
def resume_feedback(resume_text: str):
st.write("Resume Text: Please review the resume text extracted:")
resume_text = st.text_area("Resume Text", resume_text)
if st.button("Submit Resume Feedback"):
st.write("Resume feedback submitted!")
return resume_text
# Human Feedback Loop after entity extraction
def entity_extraction_feedback(entities: Dict):
st.write("Extracted Entities: Please review and update the extracted entities:")
entities = human_feedback(entities)
if st.button("Submit Extraction Feedback"):
st.write("Extraction feedback submitted!")
return entities
# Generate JSON Output
def generate_json_output(entities: Dict) -> str:
output_path = "resume_entities.json"
json_data = json.dumps(entities, indent=4) # Create JSON string from the entities dictionary
with open(output_path, 'w') as f:
f.write(json_data)
st.write(f"Output saved as {output_path}")
return json_data # Return the JSON data instead of file path
# Main function to orchestrate workflow
def main():
st.title("LLM-Powered Multi-Agent Resume Processor")
uploaded_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx"])
if uploaded_file is not None:
file_path = os.path.join("/tmp", uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# Resume Reading
resume_text = read_resume(file_path)
st.write("Resume successfully read!")
# Human feedback after resume reading
resume_text = resume_feedback(resume_text)
# Entity Extraction
extractor = EntityExtractionAgent()
entities = extractor.extract_entities(resume_text)
st.write("Entities extracted:", entities)
# Human feedback after entity extraction
entities = entity_extraction_feedback(entities)
# Entity Validation
validator = EntityValidationAgent()
validation_result = validator.validate_entities(entities)
if validation_result["status"] == "validation_failed":
st.write(validation_result["feedback"])
entities = human_feedback(validation_result["entities"])
# Final validated entities
st.write("Final entities:", entities)
# Generate JSON output
json_data = generate_json_output(entities) # Use JSON data instead of file path
# Pass the JSON data to st.download_button
st.download_button("Download JSON Output", data=json_data, file_name="resume_entities.json")
# Streamlit app entry point
if __name__ == "__main__":
main()