Spaces:
Build error
Build error
| import os | |
| import shutil | |
| from dedoc import DedocManager | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.prompts import PromptTemplate | |
| from langchain_core.output_parsers import JsonOutputParser | |
| from ResumeStructure import ResumeStructure | |
| from fastapi import UploadFile | |
| from prompt_template import template_format_instructions, template | |
| from typing import List | |
| # Create a directory to store temporary files | |
| TEMP_DIR = "/temp_files" | |
| # if not os.path.exists(TEMP_DIR): | |
| # os.makedirs(TEMP_DIR) | |
| async def process_file_with_dedoc(file: UploadFile): | |
| """ | |
| Process the file using Dedoc and return the output data. | |
| Args: | |
| - file: The UploadedFile object to be processed. | |
| Returns: | |
| - Output data if the file is processed successfully, None otherwise. | |
| """ | |
| manager = DedocManager() | |
| supported_formats = ['jpg', 'jpeg', 'png', 'docx', 'pdf', 'html', 'doc'] | |
| print(f"Processing file '{file.filename}'...") | |
| # Save the uploaded file to a temporary directory | |
| file_path = os.path.join(TEMP_DIR, file.filename) | |
| with open(file_path, "wb") as buffer: | |
| shutil.copyfileobj(file.file, buffer) | |
| # Extract file extension from the file name | |
| file_name, file_extension = os.path.splitext(file.filename) | |
| file_extension = file_extension[1:].lower() # Remove the leading dot and convert to lowercase | |
| # Check if the file extension is supported | |
| if file_extension not in supported_formats: | |
| print(f"Cannot process file '{file.filename}'. Unsupported file format.") | |
| return None | |
| # Process the file using Dedoc | |
| output = manager.parse(file_path) | |
| output_data = output.to_api_schema().model_dump() | |
| # Remove the temporary file | |
| os.remove(file_path) | |
| return output_data | |
| async def extract_text_from_all_levels(data): | |
| """ | |
| Extract text from all levels of subparagraphs in the JSON data. | |
| Args: | |
| - data: The JSON data containing subparagraphs. | |
| Returns: | |
| - A string containing the text from all levels of subparagraphs. | |
| """ | |
| text = "" | |
| if 'subparagraphs' in data['content']['structure']: | |
| subparagraphs = data['content']['structure']['subparagraphs'] | |
| text += await extract_text_from_subparagraphs(subparagraphs) | |
| return text | |
| async def extract_text_from_subparagraphs(subparagraphs): | |
| """ | |
| Recursively extract text from subparagraphs. | |
| Args: | |
| - subparagraphs: A list of subparagraphs. | |
| Returns: | |
| - A string containing the text from all subparagraphs. | |
| """ | |
| text = "" | |
| for subpara in subparagraphs: | |
| text += subpara['text'] + "\n" | |
| if 'subparagraphs' in subpara: | |
| text += await extract_text_from_subparagraphs(subpara['subparagraphs']) | |
| return text | |
| def generate_formatted_resume(resume, chat_llm): | |
| prompt = PromptTemplate( | |
| template=template, | |
| input_variables=["text"], | |
| ) | |
| chain = prompt | chat_llm | |
| result = chain.invoke({"text": resume}) | |
| return result.content | |
| def generate_json_structured_resume(resume, chat_llm): | |
| parser = JsonOutputParser(pydantic_object=ResumeStructure) | |
| prompt = PromptTemplate( | |
| template=template_format_instructions, | |
| input_variables=["text"], | |
| partial_variables={"format_instructions": parser.get_format_instructions()} | |
| ) | |
| chain = prompt | chat_llm | parser | |
| result = chain.invoke({"text": resume}) | |
| return result | |
| def delete_files_in_directory(directory): | |
| """ | |
| Deletes all files in the specified directory. | |
| Args: | |
| directory (str): The path to the directory containing files to be deleted. | |
| Returns: | |
| None | |
| """ | |
| # Check if the directory exists | |
| if not os.path.exists(directory): | |
| print(f"Directory '{directory}' does not exist.") | |
| return | |
| # Get a list of all files in the directory | |
| files = os.listdir(directory) | |
| # Iterate over each file and delete it | |
| for file in files: | |
| file_path = os.path.join(directory, file) | |
| if os.path.isfile(file_path): | |
| os.remove(file_path) | |
| print(f"Deleted file: {file_path}") | |