Spaces:

Akshayram1
/

resume_parse

Sleeping

File size: 5,908 Bytes

import os
import json
import streamlit as st
from PyPDF2 import PdfReader
import docx
from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langsmith import Client
from typing import List, Dict
import langchain
from pydantic import BaseModel

# Initialize LangSmith Client for monitoring
client = Client(api_key="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")

# Load Local LLM model (For demo, we'll use a dummy function)
def load_local_llm():
    # You can integrate a local LLM like GPT4All, or HuggingFace model here
    return lambda prompt: f"Dummy LLM response for: {prompt}"

llm = load_local_llm()

# LangChain setup for monitoring
class LLMMonitor:
    def __init__(self, name: str):
        self.name = name

    def call(self, prompt: str):
        result = llm(prompt)  # Execute the LLM locally

        # Log the run to LangSmith (dummy logging to console for now)
        print(f"Logging to LangSmith: {self.name} - Input: {prompt} | Output: {result}")

        return result

# Resume Reader Agent
def read_resume(file_path: str) -> str:
    if file_path.endswith('.pdf'):
        reader = PdfReader(file_path)
        text = ''.join([page.extract_text() for page in reader.pages])
    elif file_path.endswith('.docx'):
        doc = docx.Document(file_path)
        text = '\n'.join([para.text for para in doc.paragraphs])
    else:
        raise ValueError("Unsupported file type. Please upload a PDF or DOCX.")
    return text

# Entity Extraction Agent
class EntityExtractionAgent:
    def __init__(self):
        self.monitor = LLMMonitor("EntityExtractor")

    def extract_entities(self, resume_text: str) -> Dict:
        prompt = f"Extract personal info, education, experience, and skills from this resume:\n\n{resume_text}"
        response = self.monitor.call(prompt)
        # For simplicity, we return a dummy structured data
        return {"name": "John Doe", "education": "B.Sc in CS", "experience": "2 years at XYZ", "skills": ["Python", "SQL"]}

# Entity Validation Agent
class EntityValidationAgent:
    def __init__(self):
        self.monitor = LLMMonitor("EntityValidator")

    def validate_entities(self, entities: Dict) -> Dict:
        # Add dummy validation logic here
        if not entities["name"] or not entities["education"]:
            feedback = "Invalid or incomplete entity data. Please correct."
            st.write(feedback)
            return {"status": "validation_failed", "entities": entities, "feedback": feedback}
        else:
            return {"status": "validated", "entities": entities}

# Streamlit Human Feedback Loop
def human_feedback(entities: Dict):
    st.write("Please review the extracted entities below:")
    name = st.text_input("Name", entities["name"])
    education = st.text_input("Education", entities["education"])
    experience = st.text_area("Experience", entities["experience"])
    skills = st.text_input("Skills (comma-separated)", ','.join(entities["skills"]))

    if st.button("Submit Feedback"):
        return {"name": name, "education": education, "experience": experience, "skills": skills.split(",")}
    return entities

# Human Feedback Loop after resume reading
def resume_feedback(resume_text: str):
    st.write("Resume Text: Please review the resume text extracted:")
    resume_text = st.text_area("Resume Text", resume_text)
    
    if st.button("Submit Resume Feedback"):
        st.write("Resume feedback submitted!")
    return resume_text

# Human Feedback Loop after entity extraction
def entity_extraction_feedback(entities: Dict):
    st.write("Extracted Entities: Please review and update the extracted entities:")
    entities = human_feedback(entities)

    if st.button("Submit Extraction Feedback"):
        st.write("Extraction feedback submitted!")
    return entities

# Generate JSON Output
def generate_json_output(entities: Dict) -> str:
    output_path = "resume_entities.json"
    json_data = json.dumps(entities, indent=4)  # Create JSON string from the entities dictionary
    with open(output_path, 'w') as f:
        f.write(json_data)
    st.write(f"Output saved as {output_path}")
    return json_data  # Return the JSON data instead of file path

# Main function to orchestrate workflow
def main():
    st.title("LLM-Powered Multi-Agent Resume Processor")
    
    uploaded_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx"])
    
    if uploaded_file is not None:
        file_path = os.path.join("/tmp", uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        
        # Resume Reading
        resume_text = read_resume(file_path)
        st.write("Resume successfully read!")
        
        # Human feedback after resume reading
        resume_text = resume_feedback(resume_text)

        # Entity Extraction
        extractor = EntityExtractionAgent()
        entities = extractor.extract_entities(resume_text)
        st.write("Entities extracted:", entities)
        
        # Human feedback after entity extraction
        entities = entity_extraction_feedback(entities)

        # Entity Validation
        validator = EntityValidationAgent()
        validation_result = validator.validate_entities(entities)
        
        if validation_result["status"] == "validation_failed":
            st.write(validation_result["feedback"])
            entities = human_feedback(validation_result["entities"])
        
        # Final validated entities
        st.write("Final entities:", entities)
        
        # Generate JSON output
        json_data = generate_json_output(entities)  # Use JSON data instead of file path

        # Pass the JSON data to st.download_button
        st.download_button("Download JSON Output", data=json_data, file_name="resume_entities.json")

# Streamlit app entry point
if __name__ == "__main__":
    main()