Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| import pandas as pd | |
| from pathlib import Path | |
| from typing import List, Optional, Dict | |
| from fastapi import APIRouter, UploadFile, File, HTTPException | |
| from models.schemas import ( | |
| ResumeParseOutput, | |
| TechRequest, | |
| JobRecommenderOutput, | |
| JobTitlesResponse, | |
| JobTitleRequest, | |
| JobTitleTechStackResponse | |
| ) | |
| router = APIRouter() | |
| async def resume_parser(file: UploadFile = File(...)): | |
| """Accept an uploaded resume (PDF) and optional techstack string (CSV). Parse resume and extract tech tokens. | |
| Returns parsed text and extracted skills. | |
| """ | |
| import sys | |
| import os | |
| # Debug logging for deployment troubleshooting | |
| print("=== Resume Parser Debug Info ===") | |
| print(f"Current working directory: {os.getcwd()}") | |
| print(f"Python path: {sys.path[:3]}...") # Show first 3 entries | |
| print(f"Files in current dir: {list(Path('./').glob('*'))[:5]}") # Show first 5 files | |
| # Check if src directory exists | |
| src_path = Path('./src') | |
| if src_path.exists(): | |
| print(f"Files in src: {list(src_path.glob('*.py'))}") | |
| else: | |
| print("β src directory not found") | |
| # import pipeline/extractor lazily so this module can be imported even if deps are missing | |
| try: | |
| from src.extract_tech import extract_from_pdf | |
| print("β Successfully imported extract_from_pdf") | |
| except Exception as e: | |
| print(f"β Failed to import extract_from_pdf: {e}") | |
| extract_from_pdf = None # type: ignore | |
| if extract_from_pdf is None: | |
| raise HTTPException(status_code=500, detail="Local parser not available. Ensure backend/src/Parse_resume.py and src/extract_tech.py are present.") | |
| temp_path = Path("./") / f"uploaded_resume_{file.filename}" | |
| content = await file.read() | |
| temp_path.write_bytes(content) | |
| try: | |
| skills = extract_from_pdf(str(temp_path)) | |
| except Exception as e: | |
| print(f"β Error in extract_from_pdf: {e}") | |
| raise HTTPException(status_code=500, detail=f"Error parsing uploaded resume: {e}") | |
| try: | |
| temp_path.unlink() | |
| except Exception: | |
| pass | |
| return ResumeParseOutput(extracted_skills=skills) | |
| async def debug_environment(): | |
| """Debug endpoint to check environment setup""" | |
| import sys | |
| import os | |
| debug_info = { | |
| "cwd": os.getcwd(), | |
| "python_version": sys.version, | |
| "python_path_count": len(sys.path), | |
| "python_path_first_5": sys.path[:5], | |
| "files_in_cwd": list(Path('./').glob('*'))[:10], | |
| "src_exists": Path('./src').exists(), | |
| "src_files": list(Path('./src').glob('*.py'))[:10] if Path('./src').exists() else [], | |
| } | |
| # Test imports | |
| import_status = {} | |
| try: | |
| from src.extract_tech import extract_skills_from_text | |
| import_status["extract_skills_from_text"] = "β Success" | |
| except Exception as e: | |
| import_status["extract_skills_from_text"] = f"β Failed: {e}" | |
| try: | |
| from src.extract_tech import extract_from_pdf | |
| import_status["extract_from_pdf"] = "β Success" | |
| except Exception as e: | |
| import_status["extract_from_pdf"] = f"β Failed: {e}" | |
| try: | |
| from src.Parse_resume import parse_document_hybrid | |
| import_status["parse_document_hybrid"] = "β Success" | |
| except Exception as e: | |
| import_status["parse_document_hybrid"] = f"β Failed: {e}" | |
| debug_info["import_status"] = import_status | |
| return debug_info | |
| async def job_recommender(req: TechRequest): | |
| """Given a tech stack list like ['python','aws','docker'] return top-K job matches from FAISS. | |
| """ | |
| try: | |
| from src.pipeline_tech_match import pipeline_match_from_tech | |
| except Exception: | |
| pipeline_match_from_tech = None # type: ignore | |
| if pipeline_match_from_tech is None: | |
| raise HTTPException(status_code=500, detail="Pipeline not available. Ensure backend/src/pipeline_tech_match.py is present.") | |
| try: | |
| tech_text = ", ".join([t.strip() for t in req.tech if t and t.strip()]) | |
| out = pipeline_match_from_tech(tech_text) | |
| return JobRecommenderOutput(**out) | |
| except FileNotFoundError as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Error running pipeline: {e}") | |
| async def list_job_titles(): | |
| """List all available job titles from the FAISS metadata.""" | |
| try: | |
| metadata_path = Path(__file__).resolve().parent.parent / "Vector_db" / "faiss_metadata.json" | |
| if not metadata_path.exists(): | |
| raise HTTPException(status_code=500, detail="FAISS metadata file not found") | |
| with open(metadata_path, 'r', encoding='utf-8') as f: | |
| metadata = json.load(f) | |
| return JobTitlesResponse(titles=metadata) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Error reading job titles: {e}") | |
| async def get_job_title_techstack(request: JobTitleRequest): | |
| """Get the tech stack for a specific job title.""" | |
| try: | |
| # Load the preprocessed dataset | |
| csv_path = Path(__file__).resolve().parent.parent / "Dataset" / "job_dataset_merged_preprocessed.csv" | |
| if not csv_path.exists(): | |
| raise HTTPException(status_code=500, detail="Preprocessed dataset file not found") | |
| df = pd.read_csv(csv_path) | |
| # Find the job title (case-insensitive match) | |
| mask = df['Title'].str.lower() == request.title.lower() | |
| matches = df[mask] | |
| if matches.empty: | |
| raise HTTPException(status_code=404, detail=f"Job title not found: {request.title}") | |
| # Get the first matching row's tech stack | |
| tech_stack = matches.iloc[0]['tech_stack'] | |
| # Convert string of tech stack to list | |
| tech_list = [skill.strip() for skill in tech_stack.split(',') if skill.strip()] | |
| return JobTitleTechStackResponse( | |
| title=matches.iloc[0]['Title'], | |
| tech_stack=tech_list | |
| ) | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Error retrieving tech stack: {e}") | |