Spaces:

Harshilforworks
/

Rs_mini_projrct

Sleeping

App Files Files Community

Rs_mini_projrct / recommender.py

Harshilforworks

Upload 3 files

9863bc3 verified 5 months ago

raw

history blame contribute delete

6.79 kB

	from __future__ import annotations
	import json
	import pandas as pd
	from pathlib import Path
	from typing import List, Optional, Dict

	from fastapi import APIRouter, UploadFile, File, HTTPException
	from models.schemas import (
	ResumeParseOutput,
	TechRequest,
	JobRecommenderOutput,
	JobTitlesResponse,
	JobTitleRequest,
	JobTitleTechStackResponse
	)

	router = APIRouter()


	@router.post("/resume_parser", response_model=ResumeParseOutput)
	async def resume_parser(file: UploadFile = File(...)):
	"""Accept an uploaded resume (PDF) and optional techstack string (CSV). Parse resume and extract tech tokens.

	Returns parsed text and extracted skills.
	"""
	import sys
	import os

	# Debug logging for deployment troubleshooting
	print("=== Resume Parser Debug Info ===")
	print(f"Current working directory: {os.getcwd()}")
	print(f"Python path: {sys.path[:3]}...") # Show first 3 entries
	print(f"Files in current dir: {list(Path('./').glob('*'))[:5]}") # Show first 5 files

	# Check if src directory exists
	src_path = Path('./src')
	if src_path.exists():
	print(f"Files in src: {list(src_path.glob('*.py'))}")
	else:
	print("❌ src directory not found")

	# import pipeline/extractor lazily so this module can be imported even if deps are missing
	try:
	from src.extract_tech import extract_from_pdf
	print("✅ Successfully imported extract_from_pdf")
	except Exception as e:
	print(f"❌ Failed to import extract_from_pdf: {e}")
	extract_from_pdf = None # type: ignore

	if extract_from_pdf is None:
	raise HTTPException(status_code=500, detail="Local parser not available. Ensure backend/src/Parse_resume.py and src/extract_tech.py are present.")

	temp_path = Path("./") / f"uploaded_resume_{file.filename}"
	content = await file.read()
	temp_path.write_bytes(content)

	try:
	skills = extract_from_pdf(str(temp_path))
	except Exception as e:
	print(f"❌ Error in extract_from_pdf: {e}")
	raise HTTPException(status_code=500, detail=f"Error parsing uploaded resume: {e}")

	try:
	temp_path.unlink()
	except Exception:
	pass

	return ResumeParseOutput(extracted_skills=skills)


	@router.get("/debug_environment")
	async def debug_environment():
	"""Debug endpoint to check environment setup"""
	import sys
	import os

	debug_info = {
	"cwd": os.getcwd(),
	"python_version": sys.version,
	"python_path_count": len(sys.path),
	"python_path_first_5": sys.path[:5],
	"files_in_cwd": list(Path('./').glob('*'))[:10],
	"src_exists": Path('./src').exists(),
	"src_files": list(Path('./src').glob('*.py'))[:10] if Path('./src').exists() else [],
	}

	# Test imports
	import_status = {}
	try:
	from src.extract_tech import extract_skills_from_text
	import_status["extract_skills_from_text"] = "✅ Success"
	except Exception as e:
	import_status["extract_skills_from_text"] = f"❌ Failed: {e}"

	try:
	from src.extract_tech import extract_from_pdf
	import_status["extract_from_pdf"] = "✅ Success"
	except Exception as e:
	import_status["extract_from_pdf"] = f"❌ Failed: {e}"

	try:
	from src.Parse_resume import parse_document_hybrid
	import_status["parse_document_hybrid"] = "✅ Success"
	except Exception as e:
	import_status["parse_document_hybrid"] = f"❌ Failed: {e}"

	debug_info["import_status"] = import_status

	return debug_info


	@router.post("/job_recommender", response_model=JobRecommenderOutput)
	async def job_recommender(req: TechRequest):
	"""Given a tech stack list like ['python','aws','docker'] return top-K job matches from FAISS.
	"""
	try:
	from src.pipeline_tech_match import pipeline_match_from_tech
	except Exception:
	pipeline_match_from_tech = None # type: ignore

	if pipeline_match_from_tech is None:
	raise HTTPException(status_code=500, detail="Pipeline not available. Ensure backend/src/pipeline_tech_match.py is present.")

	try:
	tech_text = ", ".join([t.strip() for t in req.tech if t and t.strip()])
	out = pipeline_match_from_tech(tech_text)
	return JobRecommenderOutput(**out)
	except FileNotFoundError as e:
	raise HTTPException(status_code=500, detail=str(e))
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error running pipeline: {e}")


	@router.get("/list_job_titles", response_model=JobTitlesResponse)
	async def list_job_titles():
	"""List all available job titles from the FAISS metadata."""
	try:
	metadata_path = Path(__file__).resolve().parent.parent / "Vector_db" / "faiss_metadata.json"
	if not metadata_path.exists():
	raise HTTPException(status_code=500, detail="FAISS metadata file not found")

	with open(metadata_path, 'r', encoding='utf-8') as f:
	metadata = json.load(f)

	return JobTitlesResponse(titles=metadata)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error reading job titles: {e}")


	@router.post("/job_title_techstack", response_model=JobTitleTechStackResponse)
	async def get_job_title_techstack(request: JobTitleRequest):
	"""Get the tech stack for a specific job title."""
	try:
	# Load the preprocessed dataset
	csv_path = Path(__file__).resolve().parent.parent / "Dataset" / "job_dataset_merged_preprocessed.csv"
	if not csv_path.exists():
	raise HTTPException(status_code=500, detail="Preprocessed dataset file not found")

	df = pd.read_csv(csv_path)

	# Find the job title (case-insensitive match)
	mask = df['Title'].str.lower() == request.title.lower()
	matches = df[mask]

	if matches.empty:
	raise HTTPException(status_code=404, detail=f"Job title not found: {request.title}")

	# Get the first matching row's tech stack
	tech_stack = matches.iloc[0]['tech_stack']

	# Convert string of tech stack to list
	tech_list = [skill.strip() for skill in tech_stack.split(',') if skill.strip()]

	return JobTitleTechStackResponse(
	title=matches.iloc[0]['Title'],
	tech_stack=tech_list
	)
	except HTTPException:
	raise
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error retrieving tech stack: {e}")