Spaces:

srivathsa96
/

Portfolio

Sleeping

App Files Files Community

Portfolio / app.py

srivathsa96

Update app.py

4eab4d8 verified about 1 month ago

raw

history blame contribute delete

6.63 kB

	from fastapi import FastAPI, File, UploadFile, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	import os
	from dotenv import load_dotenv
	import tempfile
	import shutil
	import logging
	from typing import Optional

	# LangChain imports (with error handling)
	try:
	from langchain_groq import ChatGroq
	from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader, TextLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	from langsmith import traceable
	LANGCHAIN_AVAILABLE = True
	os.environ["LANGSMITH_TRACING"] = "true"
	os.environ["LANGSMITH_ENDPOINT"] = os.getenv('LANGSMITH_ENDPOINT')
	os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API")
	os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")
	except ImportError as e:
	logging.error(f"LangChain import error: {e}")
	LANGCHAIN_AVAILABLE = False

	load_dotenv()
	app = FastAPI(title="Resume QA API")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Global cache (session-based)
	qa_chain = None
	resume_text_cache = None

	class Question(BaseModel):
	question: str

	def create_mock_qa():
	"""Fallback mock Q&A for demo"""
	def mock_invoke(question):
	q_lower = question.lower()
	if "fabric" or "databricks" or "skill" or "experience" in q_lower:
	return "testing"
	return mock_invoke

	def create_qa_chain(resume_content: str):
	"""Create real Q&A chain"""
	if not LANGCHAIN_AVAILABLE:
	logger.warning("LangChain not available, using mock")
	return create_mock_qa()

	try:
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
	splits = text_splitter.create_documents([resume_content])

	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vectorstore = FAISS.from_documents(splits, embeddings)

	llm = ChatGroq(
	api_key=os.getenv('GROQ_API_KEY'),
	model=os.getenv('GROQ_MODEL'), # Fixed model name
	temperature=0.1
	)

	retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

	template = """Use the following resume context to answer the question.
	If you don't know the answer, say so. Answer concisely and accurately.
	Your job:
	- Answer the question only related to the resume
	- if any question other than my resume or uploaded resume, please revoke to answer very strictly and politely
	- if the answer lengthy, give a response in the form of points separate by bullets with proper indentation
	- response should be well structured and straight forward

	Context: {context}

	Question: {question}

	Answer:"""

	prompt = ChatPromptTemplate.from_template(template)

	def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)

	return (
	{"context": retriever \| format_docs, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	\| StrOutputParser()
	)
	except Exception as e:
	logger.error(f"Q&A chain creation failed: {e}")
	return create_mock_qa()

	@traceable
	@app.post("/upload-resume")
	async def upload_resume(file: UploadFile = File(...)):
	"""Upload and process resume"""
	global qa_chain, resume_text_cache

	if not file.content_type.startswith('application/'):
	raise HTTPException(status_code=400, detail="Only PDF/DOCX/TXT supported")

	# Create temp file
	tmp_path = None
	try:
	tmp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
	with open(tmp_path, "wb") as buffer:
	shutil.copyfileobj(file.file, buffer)

	# Load document
	if file.filename.endswith('.pdf'):
	loader = PyPDFLoader(tmp_path)
	elif file.filename.endswith(('.docx', '.doc')):
	loader = UnstructuredWordDocumentLoader(tmp_path)
	elif file.filename.endswith('.txt'):
	loader = TextLoader(tmp_path)
	else:
	raise HTTPException(status_code=400, detail="Supported: PDF, DOCX, TXT")

	docs = loader.load()
	resume_text_cache = "\n".join([doc.page_content for doc in docs])
	qa_chain = create_qa_chain(resume_text_cache)

	logger.info(f"Resume loaded: {len(resume_text_cache)} chars")

	return {
	"status": "success",
	"filename": file.filename,
	"chars": len(resume_text_cache),
	"preview": resume_text_cache[:200] + "..." if len(resume_text_cache) > 200 else resume_text_cache
	}

	except Exception as e:
	logger.error(f"Upload error: {e}")
	raise HTTPException(status_code=500, detail="Failed to process resume")
	finally:
	if tmp_path and os.path.exists(tmp_path):
	os.unlink(tmp_path)

	@traceable
	@app.post("/ask")
	async def ask_resume(question: Question):
	global qa_chain

	if qa_chain is None:
	return {"error": "Upload resume first using /upload-resume"}

	try:
	# Handle both chain types (real or mock)
	if callable(qa_chain):
	answer = qa_chain(question.question)
	else:
	answer = qa_chain.invoke(question.question)

	return {"question": question.question, "answer": str(answer)}
	except Exception as e:
	logger.error(f"Q&A error: {e}")
	return {"error": "Failed to process question"}

	@traceable
	@app.get("/health")
	async def health():
	return {
	"status": "healthy",
	"qa_loaded": qa_chain is not None,
	"resume_chars": len(resume_text_cache) if resume_text_cache else 0,
	"groq_key": bool(os.getenv('GROQ_API_KEY')),
	"langchain_available": LANGCHAIN_AVAILABLE
	}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)