Simple_RAG / app.py
fansa34's picture
Update app.py
2cbc55b verified
raw
history blame
2.13 kB
from fastapi import FastAPI, Query
from prepare_env import prepare_environment_and_retriever
from rag_pipeline import full_rag_pipeline
from langchain_google_genai import GoogleGenerativeAI
import os
from dotenv import load_dotenv
import psutil
import os
# Load environment variables from .env file
load_dotenv()
import time
app = FastAPI()
# Prepare the environment and load the vector store
expanding_retriever = prepare_environment_and_retriever()
@app.get("/ask")
def ask_question(question: str, with_citations: bool = Query(False, description="Include citations in the response")):
process = psutil.Process(os.getpid())
start_time = time.time()
start_mem = process.memory_info().rss # RAM in bytes
# Run RAG
response = full_rag_pipeline(question, expanding_retriever, clean_all_citations=with_citations)
end_time = time.time()
end_mem = process.memory_info().rss
# Metrics
latency = end_time - start_time
ram_used_mb = (end_mem - start_mem) / (1024 ** 2) # Convert to MB
return {
"question": question,
"answer": response,
"metrics": {
"latency_seconds": round(latency, 3),
"ram_usage_delta_mb": round(ram_used_mb, 2)
}
}
@app.get("/generate_title")
def generate_title(first_question: str = Query(..., description="The first question to generate a title from")):
# Initialize the LLM - using the same model as in prepare_env.py
llm = GoogleGenerativeAI(
model="gemini-2.0-flash",
google_api_key=os.getenv("GOOGLE_API_KEY")
)
prompt = f"""Analyze this question and generate a very short title (3-5 words max):
1. If it's medical/vaccine-related: Create a professional clinical title
2. If non-medical: Create a general topic title
3. If unclear or greeting: Use "General Inquiry"
Always return just the title text, nothing else.
Question: {first_question}
Title:"""
title = llm.invoke(prompt)
return {"title": title.strip()}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)