Spaces:

Jayanthk2004
/

rag-mini-project

Sleeping

korupolujayanth2004

Update main.py

0321a5b 6 months ago

5.53 kB

	# main.py
	from fastapi import FastAPI, File, UploadFile, Form, Query
	from fastapi.responses import StreamingResponse, JSONResponse
	from pathlib import Path
	import uuid # <<< MAKE SURE THIS LINE IS PRESENT AND AT THE TOP!

	# Import backend utilities
	from backend.embed_utils import embed_and_store_chunks, search_knowledge_base
	from backend.document_loader import extract_text # This will use your chunker internally
	from backend.chat_history import store_chat_turn, retrieve_chat_context
	from backend.llm_client import stream_llm_response
	from backend.session_utils import delete_session_data # For clearing session data

	app = FastAPI()

	# Add root endpoint to eliminate 404 errors
	@app.get("/")
	async def root():
	return {"message": "Mario's RAG Backend is running! 🍄", "status": "healthy", "endpoints": ["/upload_document", "/chat", "/end_session"]}

	@app.post("/upload_document")
	async def upload_document(file: UploadFile = File(...), session_id: str = Form("global")):
	"""
	Handles document uploads, extracts text, chunks it, and embeds it into the knowledge base.
	"""
	try:
	content = await file.read() # Read file content asynchronously
	filename = file.filename or "unknown.txt"

	# Use document_loader to extract and chunk text into Document objects
	docs = extract_text(Path(filename), content)

	# Embed and store the chunks (Document objects) into Qdrant
	embed_and_store_chunks(docs, session_id=session_id)
	return {"status": "✅ Document uploaded and processed. Power-up!"}
	except Exception as e:
	print(f"Error during document upload: {e}") # Log error for debugging
	return JSONResponse(status_code=500, content={"error": f"NameError: name 'uuid' is not defined" if "uuid" in str(e) else f"{type(e).__name__}: {str(e)}"})

	@app.get("/chat")
	async def chat(question: str, session_id: str = ""):
	"""
	Chat endpoint that streams answers using chat history and knowledge base context.
	"""
	# Ensure a session_id exists; create a new one if not provided (should be provided by frontend)
	if not session_id:
	session_id = str(uuid.uuid4()) # <<< 'uuid' is used here!
	print(f"Warning: No session_id provided for chat. Generated new one: {session_id}")

	# --- Retrieve Context ---
	# Search the knowledge base for relevant documents based on the current question and session
	kb_context = search_knowledge_base(question, session_id=session_id)

	# Retrieve previous chat turns for conversational context
	chat_context = retrieve_chat_context(session_id, question)

	# --- Construct LLM Prompt ---
	# The prompt structure guides the LLM on how to use the provided context
	prompt_for_llm = f"""
	You are Mario, a super helpful, friendly, and engaging AI assistant!
	You love to chat and make interactions fun, using Mario-esque phrases and tone.
	You're an expert at finding answers, but only from the knowledge you have.

	Here's the information I have for you:

	Chat History:
	{chat_context if chat_context else "No prior chat history for this session."}

	Knowledge Base Context:
	{kb_context if kb_context else "No relevant knowledge base context found for this question. If you want me to learn, upload a document!"}

	User's Question:
	{question}

	Please provide a helpful and friendly response, using the provided context if relevant.
	If the answer isn't in the provided context, please politely say so and encourage the user
	to provide more information or upload a document, using Mario-themed language.
	"""

	# --- Stream LLM Response ---
	async def stream_response():
	# Determine the turn number for storing chat history
	# This calculates approximate turn number based on how many "lines" are in chat_context
	# It assumes each user/assistant turn is one line in the formatted chat_context
	turn_number = (len(chat_context.split("\n")) // 2) + 1 if chat_context else 1

	# Store the user's question in chat history immediately
	store_chat_turn(session_id, "user", question, turn_number)

	full_response = ""
	# Call the LLM client to get a streaming response
	for chunk in stream_llm_response(prompt=prompt_for_llm):
	if chunk.choices and len(chunk.choices) > 0:
	choice = chunk.choices[0] # Get the first choice
	if hasattr(choice, 'delta') and hasattr(choice.delta, 'content') and choice.delta.content:
	token = choice.delta.content
	full_response += token
	yield token # Yield each token as it arrives

	# Store the full assistant response in chat history once complete
	store_chat_turn(session_id, "assistant", full_response, turn_number)

	return StreamingResponse(stream_response(), media_type="text/plain")

	@app.post("/end_session")
	async def end_session(session_id: str = Query(...)):
	"""
	Endpoint to explicitly clear all data (chat history and knowledge base)
	associated with a specific session_id. Called by the Streamlit frontend.
	"""
	success = delete_session_data(session_id)
	if success:
	return {"status": f"✅ Session '{session_id}' and all related data deleted. See ya!"}
	else:
	return JSONResponse(status_code=500, content={"error": "❌ Failed to delete session data. Bowser's at it again!"})