Spaces:
Sleeping
Sleeping
| import os | |
| from uuid import uuid4 | |
| import uvicorn | |
| from fastapi import FastAPI, UploadFile, File | |
| from fastapi.responses import JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import aiofiles | |
| import PyPDF2 | |
| from langchain_openai import ChatOpenAI | |
| from langchain.schema import HumanMessage | |
| import json | |
| from fastapi.responses import FileResponse | |
| from docx import Document | |
| UPLOAD_FOLDER = "uploads" | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| app = FastAPI() | |
| # Enable CORS (you can restrict origins later) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| llm = ChatOpenAI( | |
| model_name="gpt-4o-mini", # Use a valid model name like "gpt-4o" or "gpt-4-turbo" | |
| temperature=0, | |
| openai_api_key=os.getenv("OPENAI_API_KEY") | |
| ) | |
| def parse_resume_text(text: str) -> dict: | |
| prompt = f""" | |
| Extract structured information from this resume text and return the result in strict JSON format with the following keys: | |
| - basics: {{first_name, last_name, gender, emails, phone_numbers, address, total_experience_in_years, profession, summary, skills, has_driving_license}} | |
| - educations | |
| - professional_experiences | |
| - trainings_and_certifications | |
| - languages | |
| - awards | |
| - references | |
| - cv_text: {text} | |
| - cv_language: "en" | |
| Resume: | |
| {text} | |
| Return ONLY valid JSON, no text, no explanation. | |
| """ | |
| result = llm([HumanMessage(content=prompt)]) | |
| #return result.content | |
| raw_string = str(result.content).replace("```json\n", "").replace("\n```", "") | |
| final_data = json.loads(raw_string) | |
| return (json.dumps(final_data, indent=2)) | |
| # β Save uploaded file asynchronously | |
| async def save_file(file: UploadFile) -> str: | |
| filename = f"{uuid4()}_{file.filename}" | |
| file_path = os.path.join(UPLOAD_FOLDER, filename) | |
| async with aiofiles.open(file_path, 'wb') as out_file: | |
| content = await file.read() | |
| await out_file.write(content) | |
| return file_path | |
| # β Extract text from DOCX | |
| def extract_text_from_docx(docx_path: str) -> str: | |
| try: | |
| doc = Document(docx_path) | |
| text = "\n".join([para.text for para in doc.paragraphs]) | |
| return text.strip() | |
| except Exception as e: | |
| return f"Error extracting text from DOCX: {str(e)}" | |
| # β Extract text from PDF using PyPDF2 | |
| def extract_text_from_pdf(pdf_path: str) -> str: | |
| text = "" | |
| try: | |
| with open(pdf_path, "rb") as file: | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| for page in pdf_reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| return text.strip() | |
| except Exception as e: | |
| return f"Error extracting text: {str(e)}" | |
| async def parse_resume(file: UploadFile = File(...)): | |
| try: | |
| print("π Saving file...") | |
| path = await save_file(file) | |
| print(f"β File saved at {path}") | |
| print("π Extracting text...") | |
| ext = os.path.splitext(path)[-1].lower() | |
| if ext == ".pdf": | |
| text = extract_text_from_pdf(path) | |
| elif ext in [".docx", ".doc"]: | |
| text = extract_text_from_docx(path) | |
| else: | |
| os.remove(path) | |
| return JSONResponse(status_code=400, content={"error": "Unsupported file type"}) | |
| print("β Text extracted.") | |
| json_result = parse_resume_text(text) | |
| os.remove(path) | |
| print("π§Ή File removed.") | |
| filename = "cleaned_resume.json" | |
| file_path = os.path.join(UPLOAD_FOLDER, filename) | |
| with open(file_path, "w") as f: | |
| f.write(json_result) | |
| return FileResponse( path=file_path, filename=filename, media_type="application/json") | |
| except Exception as e: | |
| import traceback | |
| print("β Exception occurred:\n", traceback.format_exc()) | |
| return JSONResponse(status_code=500, content={"error": str(e)}) | |
| async def root(): | |
| return {"message": "Resume PDF Text Extractor is running π―"} | |
| if __name__ == "__main__": | |
| uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True) |