Portfolio / app.py
srivathsa96's picture
Update app.py
4eab4d8 verified
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import os
from dotenv import load_dotenv
import tempfile
import shutil
import logging
from typing import Optional
# LangChain imports (with error handling)
try:
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langsmith import traceable
LANGCHAIN_AVAILABLE = True
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_ENDPOINT"] = os.getenv('LANGSMITH_ENDPOINT')
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API")
os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")
except ImportError as e:
logging.error(f"LangChain import error: {e}")
LANGCHAIN_AVAILABLE = False
load_dotenv()
app = FastAPI(title="Resume QA API")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global cache (session-based)
qa_chain = None
resume_text_cache = None
class Question(BaseModel):
question: str
def create_mock_qa():
"""Fallback mock Q&A for demo"""
def mock_invoke(question):
q_lower = question.lower()
if "fabric" or "databricks" or "skill" or "experience" in q_lower:
return "testing"
return mock_invoke
def create_qa_chain(resume_content: str):
"""Create real Q&A chain"""
if not LANGCHAIN_AVAILABLE:
logger.warning("LangChain not available, using mock")
return create_mock_qa()
try:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
splits = text_splitter.create_documents([resume_content])
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(splits, embeddings)
llm = ChatGroq(
api_key=os.getenv('GROQ_API_KEY'),
model=os.getenv('GROQ_MODEL'), # Fixed model name
temperature=0.1
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
template = """Use the following resume context to answer the question.
If you don't know the answer, say so. Answer concisely and accurately.
Your job:
- Answer the question only related to the resume
- if any question other than my resume or uploaded resume, please revoke to answer very strictly and politely
- if the answer lengthy, give a response in the form of points separate by bullets with proper indentation
- response should be well structured and straight forward
Context: {context}
Question: {question}
Answer:"""
prompt = ChatPromptTemplate.from_template(template)
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
return (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
except Exception as e:
logger.error(f"Q&A chain creation failed: {e}")
return create_mock_qa()
@traceable
@app.post("/upload-resume")
async def upload_resume(file: UploadFile = File(...)):
"""Upload and process resume"""
global qa_chain, resume_text_cache
if not file.content_type.startswith('application/'):
raise HTTPException(status_code=400, detail="Only PDF/DOCX/TXT supported")
# Create temp file
tmp_path = None
try:
tmp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
with open(tmp_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
# Load document
if file.filename.endswith('.pdf'):
loader = PyPDFLoader(tmp_path)
elif file.filename.endswith(('.docx', '.doc')):
loader = UnstructuredWordDocumentLoader(tmp_path)
elif file.filename.endswith('.txt'):
loader = TextLoader(tmp_path)
else:
raise HTTPException(status_code=400, detail="Supported: PDF, DOCX, TXT")
docs = loader.load()
resume_text_cache = "\n".join([doc.page_content for doc in docs])
qa_chain = create_qa_chain(resume_text_cache)
logger.info(f"Resume loaded: {len(resume_text_cache)} chars")
return {
"status": "success",
"filename": file.filename,
"chars": len(resume_text_cache),
"preview": resume_text_cache[:200] + "..." if len(resume_text_cache) > 200 else resume_text_cache
}
except Exception as e:
logger.error(f"Upload error: {e}")
raise HTTPException(status_code=500, detail="Failed to process resume")
finally:
if tmp_path and os.path.exists(tmp_path):
os.unlink(tmp_path)
@traceable
@app.post("/ask")
async def ask_resume(question: Question):
global qa_chain
if qa_chain is None:
return {"error": "Upload resume first using /upload-resume"}
try:
# Handle both chain types (real or mock)
if callable(qa_chain):
answer = qa_chain(question.question)
else:
answer = qa_chain.invoke(question.question)
return {"question": question.question, "answer": str(answer)}
except Exception as e:
logger.error(f"Q&A error: {e}")
return {"error": "Failed to process question"}
@traceable
@app.get("/health")
async def health():
return {
"status": "healthy",
"qa_loaded": qa_chain is not None,
"resume_chars": len(resume_text_cache) if resume_text_cache else 0,
"groq_key": bool(os.getenv('GROQ_API_KEY')),
"langchain_available": LANGCHAIN_AVAILABLE
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)