Rs_mini_projrct / recommender.py
Harshilforworks's picture
Upload 3 files
9863bc3 verified
from __future__ import annotations
import json
import pandas as pd
from pathlib import Path
from typing import List, Optional, Dict
from fastapi import APIRouter, UploadFile, File, HTTPException
from models.schemas import (
ResumeParseOutput,
TechRequest,
JobRecommenderOutput,
JobTitlesResponse,
JobTitleRequest,
JobTitleTechStackResponse
)
router = APIRouter()
@router.post("/resume_parser", response_model=ResumeParseOutput)
async def resume_parser(file: UploadFile = File(...)):
"""Accept an uploaded resume (PDF) and optional techstack string (CSV). Parse resume and extract tech tokens.
Returns parsed text and extracted skills.
"""
import sys
import os
# Debug logging for deployment troubleshooting
print("=== Resume Parser Debug Info ===")
print(f"Current working directory: {os.getcwd()}")
print(f"Python path: {sys.path[:3]}...") # Show first 3 entries
print(f"Files in current dir: {list(Path('./').glob('*'))[:5]}") # Show first 5 files
# Check if src directory exists
src_path = Path('./src')
if src_path.exists():
print(f"Files in src: {list(src_path.glob('*.py'))}")
else:
print("❌ src directory not found")
# import pipeline/extractor lazily so this module can be imported even if deps are missing
try:
from src.extract_tech import extract_from_pdf
print("βœ… Successfully imported extract_from_pdf")
except Exception as e:
print(f"❌ Failed to import extract_from_pdf: {e}")
extract_from_pdf = None # type: ignore
if extract_from_pdf is None:
raise HTTPException(status_code=500, detail="Local parser not available. Ensure backend/src/Parse_resume.py and src/extract_tech.py are present.")
temp_path = Path("./") / f"uploaded_resume_{file.filename}"
content = await file.read()
temp_path.write_bytes(content)
try:
skills = extract_from_pdf(str(temp_path))
except Exception as e:
print(f"❌ Error in extract_from_pdf: {e}")
raise HTTPException(status_code=500, detail=f"Error parsing uploaded resume: {e}")
try:
temp_path.unlink()
except Exception:
pass
return ResumeParseOutput(extracted_skills=skills)
@router.get("/debug_environment")
async def debug_environment():
"""Debug endpoint to check environment setup"""
import sys
import os
debug_info = {
"cwd": os.getcwd(),
"python_version": sys.version,
"python_path_count": len(sys.path),
"python_path_first_5": sys.path[:5],
"files_in_cwd": list(Path('./').glob('*'))[:10],
"src_exists": Path('./src').exists(),
"src_files": list(Path('./src').glob('*.py'))[:10] if Path('./src').exists() else [],
}
# Test imports
import_status = {}
try:
from src.extract_tech import extract_skills_from_text
import_status["extract_skills_from_text"] = "βœ… Success"
except Exception as e:
import_status["extract_skills_from_text"] = f"❌ Failed: {e}"
try:
from src.extract_tech import extract_from_pdf
import_status["extract_from_pdf"] = "βœ… Success"
except Exception as e:
import_status["extract_from_pdf"] = f"❌ Failed: {e}"
try:
from src.Parse_resume import parse_document_hybrid
import_status["parse_document_hybrid"] = "βœ… Success"
except Exception as e:
import_status["parse_document_hybrid"] = f"❌ Failed: {e}"
debug_info["import_status"] = import_status
return debug_info
@router.post("/job_recommender", response_model=JobRecommenderOutput)
async def job_recommender(req: TechRequest):
"""Given a tech stack list like ['python','aws','docker'] return top-K job matches from FAISS.
"""
try:
from src.pipeline_tech_match import pipeline_match_from_tech
except Exception:
pipeline_match_from_tech = None # type: ignore
if pipeline_match_from_tech is None:
raise HTTPException(status_code=500, detail="Pipeline not available. Ensure backend/src/pipeline_tech_match.py is present.")
try:
tech_text = ", ".join([t.strip() for t in req.tech if t and t.strip()])
out = pipeline_match_from_tech(tech_text)
return JobRecommenderOutput(**out)
except FileNotFoundError as e:
raise HTTPException(status_code=500, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error running pipeline: {e}")
@router.get("/list_job_titles", response_model=JobTitlesResponse)
async def list_job_titles():
"""List all available job titles from the FAISS metadata."""
try:
metadata_path = Path(__file__).resolve().parent.parent / "Vector_db" / "faiss_metadata.json"
if not metadata_path.exists():
raise HTTPException(status_code=500, detail="FAISS metadata file not found")
with open(metadata_path, 'r', encoding='utf-8') as f:
metadata = json.load(f)
return JobTitlesResponse(titles=metadata)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error reading job titles: {e}")
@router.post("/job_title_techstack", response_model=JobTitleTechStackResponse)
async def get_job_title_techstack(request: JobTitleRequest):
"""Get the tech stack for a specific job title."""
try:
# Load the preprocessed dataset
csv_path = Path(__file__).resolve().parent.parent / "Dataset" / "job_dataset_merged_preprocessed.csv"
if not csv_path.exists():
raise HTTPException(status_code=500, detail="Preprocessed dataset file not found")
df = pd.read_csv(csv_path)
# Find the job title (case-insensitive match)
mask = df['Title'].str.lower() == request.title.lower()
matches = df[mask]
if matches.empty:
raise HTTPException(status_code=404, detail=f"Job title not found: {request.title}")
# Get the first matching row's tech stack
tech_stack = matches.iloc[0]['tech_stack']
# Convert string of tech stack to list
tech_list = [skill.strip() for skill in tech_stack.split(',') if skill.strip()]
return JobTitleTechStackResponse(
title=matches.iloc[0]['Title'],
tech_stack=tech_list
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error retrieving tech stack: {e}")