Spaces:
Runtime error
Runtime error
| from dotenv import load_dotenv | |
| import io | |
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| import base64 | |
| from langchain.prompts import PromptTemplate | |
| from langchain_core.output_parsers import PydanticOutputParser | |
| from langchain_anthropic import ChatAnthropic | |
| from langchain_openai import ChatOpenAI | |
| from langchain_groq import ChatGroq | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_core.exceptions import OutputParserException | |
| from pydantic import ValidationError | |
| from langchain_core.pydantic_v1 import BaseModel, Field | |
| from resume_template import Resume | |
| from json import JSONDecodeError | |
| import PyPDF2 | |
| import json | |
| import time | |
| import os | |
| # Set the LANGCHAIN_TRACING_V2 environment variable to 'true' | |
| os.environ['LANGCHAIN_TRACING_V2'] = 'true' | |
| # Set the LANGCHAIN_PROJECT environment variable to the desired project name | |
| os.environ['LANGCHAIN_PROJECT'] = 'Resume_Project' | |
| load_dotenv() | |
| llm_dict = { | |
| "GPT 3.5 turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0125"), | |
| "GPT 4o": ChatOpenAI(temperature=0, model_name="gpt-4o"), | |
| "Anthropic 3.5 Sonnet": ChatAnthropic(model="claude-3-5-sonnet-20240620"), | |
| "Llama 3 8b": ChatGroq(model_name="llama3-8b-8192"), | |
| "Llama 3 70b": ChatGroq(model_name="llama3-70b-8192"), | |
| "Gemma 7b": ChatGroq(model_name="gemma-7b-it"), | |
| "Mixtral 8x7b": ChatGroq(model_name="mixtral-8x7b-32768"), | |
| "Gemini 1.5 Pro": ChatGoogleGenerativeAI(model="gemini-1.5-pro"), | |
| "Gemini 1.5 Flash": ChatGoogleGenerativeAI(model="gemini-1.5-flash"), | |
| } | |
| def pdf_to_string(file): | |
| """ | |
| Convert a PDF file to a string. | |
| Parameters: | |
| file (io.BytesIO): A file-like object representing the PDF file. | |
| Returns: | |
| str: The extracted text from the PDF. | |
| """ | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| num_pages = len(pdf_reader.pages) | |
| text = '' | |
| for i in range(num_pages): | |
| page = pdf_reader.pages[i] | |
| text += page.extract_text() | |
| file.close() | |
| return text | |
| class CustomOutputParserException(Exception): | |
| pass | |
| def extract_resume_fields(full_text, model): | |
| """ | |
| Analyze a resume text and extract structured information using a specified language model. | |
| Parameters: | |
| full_text (str): The text content of the resume. | |
| model (str): The language model object to use for processing the text. | |
| Returns: | |
| dict: A dictionary containing structured information extracted from the resume. | |
| """ | |
| # The Resume object is imported from the local resume_template file | |
| with open("prompts/resume_extraction.prompt", "r") as f: | |
| template = f.read() | |
| parser = PydanticOutputParser(pydantic_object=Resume) | |
| prompt_template = PromptTemplate( | |
| template=template, | |
| input_variables=["resume"], | |
| partial_variables={"response_template": parser.get_format_instructions()}, | |
| ) | |
| llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model)) | |
| chain = prompt_template | llm | parser | |
| max_attempts = 3 | |
| attempt = 1 | |
| while attempt <= max_attempts: | |
| try: | |
| output = chain.invoke(full_text) | |
| print(output) | |
| return output | |
| except (CustomOutputParserException, ValidationError) as e: | |
| if attempt == max_attempts: | |
| raise e | |
| else: | |
| print(f"Parsing error occurred. Retrying (attempt {attempt + 1}/{max_attempts})...") | |
| attempt += 1 | |
| return None | |
| def display_extracted_fields(obj, section_title=None, indent=0): | |
| if section_title: | |
| st.subheader(section_title) | |
| for field_name, field_value in obj: | |
| if field_name in ["personal_details", "education", "work_experience", "projects", "skills", "certifications", "publications", "awards", "additional_sections"]: | |
| st.write(" " * indent + f"**{field_name.replace('_', ' ').title()}**:") | |
| if isinstance(field_value, BaseModel): | |
| display_extracted_fields(field_value, None, indent + 1) | |
| elif isinstance(field_value, list): | |
| for item in field_value: | |
| if isinstance(item, BaseModel): | |
| display_extracted_fields(item, None, indent + 1) | |
| else: | |
| st.write(" " * (indent + 1) + "- " + str(item)) | |
| else: | |
| st.write(" " * (indent + 1) + str(field_value)) | |
| else: | |
| st.write(" " * indent + f"{field_name.replace('_', ' ').title()}: " + str(field_value)) | |
| def get_json_download_link(json_str, download_name): | |
| # Convert the JSON string back to a dictionary | |
| data = json.loads(json_str) | |
| # Convert the dictionary back to a JSON string with 4 spaces indentation | |
| json_str_formatted = json.dumps(data, indent=4) | |
| b64 = base64.b64encode(json_str_formatted.encode()).decode() | |
| href = f'<a href="data:file/json;base64,{b64}" download="{download_name}.json">Click here to download the JSON file</a>' | |
| return href |