Spaces:

Aaravkumar
/

Portfolio-AI

Sleeping

App Files Files Community

Portfolio-AI / app.py

Aaravkumar

Update app.py

adb38e8 verified 7 days ago

Raw

History Blame Contribute Delete

5.04 kB

	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import StreamingResponse
	from pydantic import BaseModel
	from huggingface_hub import InferenceClient
	import os, logging, time, threading

	from loader import Loader
	from chunker import Chunker
	from embedder import Embedder
	from vector import VectorStorage
	from retriever import Retriever

	app = FastAPI()

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	MODELS = [
	"Qwen/Qwen2.5-72B-Instruct",
	"meta-llama/Llama-3.2-3B-Instruct",
	"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
	"mistralai/Mistral-7B-Instruct-v0.3",
	"HuggingFaceH4/zephyr-7b-beta",
	]

	SESSION_TIMEOUT = 3 * 60 * 60
	sessions: dict = {}

	def auto_cleanup():
	while True:
	time.sleep(SESSION_TIMEOUT)

	current_time = time.time()
	expired = [
	sid for sid, data in sessions.items()
	if current_time - data.get('created_at', current_time) > SESSION_TIMEOUT
	]
	for sid in expired:
	del sessions[sid]

	if expired:
	logger.info(f"Auto-cleaned {len(expired)} expired sessions to free RAM.")


	threading.Thread(target=auto_cleanup, daemon=True).start()

	# --- Global RAG Components ---
	text = Loader("portfolio.pdf").load()
	chunks = Chunker().chunker(text)
	embedder = Embedder()
	vectors = embedder.embed(chunks)
	store = VectorStorage(dimension=len(vectors[0]))
	store.add(vectors, chunks)

	class ChatRequest(BaseModel):
	session_id: str
	message: str

	@app.post("/")
	def chat(req: ChatRequest):
	if req.session_id not in sessions:

	sessions[req.session_id] = {"history": [], "created_at": time.time()}

	session = sessions[req.session_id]

	retriever = Retriever(store, embedder, k=3)
	context_chunks = retriever.retrieve(req.message)

	if not context_chunks:
	return {"response": "I only answer questions about Aarav and his work."}

	context_text = "\n\n".join(context_chunks)
	system_prompt = (
	"You are Aarav's AI assistant.\n"
	"Your name is Zooba\n"
	"Your job is to answer questions about Aarav Kumar Ranjan, his projects, skills, and interests using the provided context.\n"
	"Rules:\n"
	"- Only answer using the given context. Do not make up information.\n"
	"- If the answer is not in the context, say: I only answer questions about Aarav and his work.\n"
	"- Keep answers clear, simple, and confident.\n"
	"- Do not use complex jargon unless necessary.\n"
	"- Prefer explaining things in a way a beginner can understand.\n"
	"Style:\n"
	"- Speak in a calm, intelligent, and slightly friendly tone.\n"
	"- Be concise but informative.\n"
	"- When explaining projects, include:\n"
	" • what it does\n"
	" • how it works (simple explanation)\n"
	" • why it is useful\n"
	"Do not generate fake achievements, skills, or experiences.\n"
	"Do not pretend to be Aarav himself.\n"
	"If asked about projects, mention their names clearly.\n"
	"Make Aarav appear as a thoughtful, skilled, and curious machine learning enthusiast who focuses on understanding and building real systems.\n"
	)

	messages = [{"role": "system", "content": system_prompt}]


	recent_history = session["history"][-10:]
	messages.extend(recent_history)

	messages.append({"role": "user", "content": f"Context:\n{context_text}\n\nQuestion: {req.message}"})

	full_response = ""

	def token_stream():
	for model in MODELS:
	try:
	client = InferenceClient(model, token=os.environ["HF_TOKEN"])
	logger.info(f"Streaming with: {model}")
	success = False
	for token in client.chat_completion(messages, max_tokens=512, stream=True):
	text = token.choices[0].delta.content
	if text:
	success = True
	yield f"data: {text}\n\n"
	yield "data: [DONE]\n\n"
	return
	except Exception as e:
	if success:

	yield "data: [DONE]\n\n"
	return
	logger.warning(f"Streaming failed for {model}: {e}")
	continue


	yield "data: Sorry, all models are currently unavailable. Try again later.\n\n"
	yield "data: [DONE]\n\n"


	return StreamingResponse(token_stream(), media_type="text/event-stream")


	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7600)