Portfolio-AI / app.py
Aaravkumar's picture
Update app.py
adb38e8 verified
Raw
History Blame Contribute Delete
5.04 kB
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from huggingface_hub import InferenceClient
import os, logging, time, threading
from loader import Loader
from chunker import Chunker
from embedder import Embedder
from vector import VectorStorage
from retriever import Retriever
app = FastAPI()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
MODELS = [
"Qwen/Qwen2.5-72B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"mistralai/Mistral-7B-Instruct-v0.3",
"HuggingFaceH4/zephyr-7b-beta",
]
SESSION_TIMEOUT = 3 * 60 * 60
sessions: dict = {}
def auto_cleanup():
while True:
time.sleep(SESSION_TIMEOUT)
current_time = time.time()
expired = [
sid for sid, data in sessions.items()
if current_time - data.get('created_at', current_time) > SESSION_TIMEOUT
]
for sid in expired:
del sessions[sid]
if expired:
logger.info(f"Auto-cleaned {len(expired)} expired sessions to free RAM.")
threading.Thread(target=auto_cleanup, daemon=True).start()
# --- Global RAG Components ---
text = Loader("portfolio.pdf").load()
chunks = Chunker().chunker(text)
embedder = Embedder()
vectors = embedder.embed(chunks)
store = VectorStorage(dimension=len(vectors[0]))
store.add(vectors, chunks)
class ChatRequest(BaseModel):
session_id: str
message: str
@app.post("/")
def chat(req: ChatRequest):
if req.session_id not in sessions:
sessions[req.session_id] = {"history": [], "created_at": time.time()}
session = sessions[req.session_id]
retriever = Retriever(store, embedder, k=3)
context_chunks = retriever.retrieve(req.message)
if not context_chunks:
return {"response": "I only answer questions about Aarav and his work."}
context_text = "\n\n".join(context_chunks)
system_prompt = (
"You are Aarav's AI assistant.\n"
"Your name is Zooba\n"
"Your job is to answer questions about Aarav Kumar Ranjan, his projects, skills, and interests using the provided context.\n"
"Rules:\n"
"- Only answer using the given context. Do not make up information.\n"
"- If the answer is not in the context, say: I only answer questions about Aarav and his work.\n"
"- Keep answers clear, simple, and confident.\n"
"- Do not use complex jargon unless necessary.\n"
"- Prefer explaining things in a way a beginner can understand.\n"
"Style:\n"
"- Speak in a calm, intelligent, and slightly friendly tone.\n"
"- Be concise but informative.\n"
"- When explaining projects, include:\n"
" • what it does\n"
" • how it works (simple explanation)\n"
" • why it is useful\n"
"Do not generate fake achievements, skills, or experiences.\n"
"Do not pretend to be Aarav himself.\n"
"If asked about projects, mention their names clearly.\n"
"Make Aarav appear as a thoughtful, skilled, and curious machine learning enthusiast who focuses on understanding and building real systems.\n"
)
messages = [{"role": "system", "content": system_prompt}]
recent_history = session["history"][-10:]
messages.extend(recent_history)
messages.append({"role": "user", "content": f"Context:\n{context_text}\n\nQuestion: {req.message}"})
full_response = ""
def token_stream():
for model in MODELS:
try:
client = InferenceClient(model, token=os.environ["HF_TOKEN"])
logger.info(f"Streaming with: {model}")
success = False
for token in client.chat_completion(messages, max_tokens=512, stream=True):
text = token.choices[0].delta.content
if text:
success = True
yield f"data: {text}\n\n"
yield "data: [DONE]\n\n"
return
except Exception as e:
if success:
yield "data: [DONE]\n\n"
return
logger.warning(f"Streaming failed for {model}: {e}")
continue
yield "data: Sorry, all models are currently unavailable. Try again later.\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(token_stream(), media_type="text/event-stream")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7600)