from fastapi import FastAPI, Request, Form from fastapi.responses import HTMLResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from langchain_core.prompts import PromptTemplate from langchain_core.runnables import RunnablePassthrough, RunnableLambda import os import requests from dotenv import load_dotenv load_dotenv() app = FastAPI() # Mount static files app.mount("/static", StaticFiles(directory="static"), name="static") templates = Jinja2Templates(directory="templates") # Load FAISS Index FAISS_PATH = "faiss_index" embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") if os.path.exists(FAISS_PATH): vector_db = FAISS.load_local(FAISS_PATH, embeddings, allow_dangerous_deserialization=True) retriever = vector_db.as_retriever(search_kwargs={"k": 3}) print("DEBUG: FAISS index loaded.") else: print("WARNING: FAISS index not found. Run ingest.py first.") retriever = None # Custom DeepSeek V3.2 Wrapper def call_deepseek_v3(prompt_input): if hasattr(prompt_input, "to_string"): prompt_text = prompt_input.to_string() else: prompt_text = str(prompt_input) api_url = "https://router.huggingface.co/v1/chat/completions" token = os.getenv("HUGGINGFACEHUB_API_TOKEN") if not token: return "Error: No API Token found." headers = { "Authorization": f"Bearer {token}", "Content-Type": "application/json", } payload = { "model": "deepseek-ai/DeepSeek-V3.2", "messages": [ { "role": "system", "content": "You are a Stranger Things expert. Answer clearly." }, { "role": "user", "content": prompt_text } ], "temperature": 0.3, "max_tokens": 700, "stream": False } try: response = requests.post(api_url, headers=headers, json=payload) if response.status_code != 200: return f"DeepSeek Error ({response.status_code}): {response.text}" return response.json()["choices"][0]["message"]["content"] except Exception as e: return f"Connection Error: {e}" llm = RunnableLambda(call_deepseek_v3) # Updated Prompt Template (User Request) template = """ You are a Stranger Things expert assistant. Answer the user's question using ONLY the provided context. Important rules: - Do NOT copy sentences directly from the context. Rewrite in your own words. - If the context does NOT contain the answer, say: "I don’t have enough information in the provided context to answer that fully." - Keep the tone natural, friendly, and engaging. - Write at least 4–6 sentences unless the question is very simple. If the question is about a CHARACTER, you MUST include: 1) Full name + who they are in the story 2) Key relationships (friends, family, love interest, major connections) 3) Role / occupation / passion (student, sheriff, journalist, Dungeon Master, etc.) 4) Iconic traits (personality, behavior, famous moments or skills) If the question is about an EVENT / LOCATION / OBJECT, you MUST include: 1) What it is 2) Why it matters in the story 3) Who is involved 4) Any major consequences or impact Context: {context} User Question: {question} Answer (detailed and structured): """ prompt = PromptTemplate.from_template(template) def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) if retriever: rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm ) else: rag_chain = None @app.get("/", response_class=HTMLResponse) async def read_root(request: Request): return templates.TemplateResponse("index.html", {"request": request}) from pydantic import BaseModel class QueryRequest(BaseModel): query: str @app.post("/query") async def query_endpoint(request: QueryRequest): if not rag_chain: return {"answer": "System Error: RAG chain not initialized."} answer = rag_chain.invoke(request.query) return {"answer": answer} @app.post("/get_response") async def get_response(request: Request, query: str = Form(...)): if not rag_chain: return templates.TemplateResponse("index.html", {"request": request, "response": "System Error: RAG chain not initialized."}) result = rag_chain.invoke(query) return templates.TemplateResponse("index.html", {"request": request, "response": result}) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8001)