File size: 2,125 Bytes
7f51074
 
 
 
 
 
07c2b95
 
7f51074
 
9b8ecff
7f51074
 
 
 
 
 
 
 
 
07c2b95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f51074
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from fastapi import FastAPI, Query
from prepare_env import prepare_environment_and_retriever
from rag_pipeline import full_rag_pipeline
from langchain_google_genai import GoogleGenerativeAI
import os
from dotenv import load_dotenv
import psutil
import os
# Load environment variables from .env file
load_dotenv()
import time

app = FastAPI()

# Prepare the environment and load the vector store
expanding_retriever = prepare_environment_and_retriever()


@app.get("/ask")
def ask_question(question: str, with_citations: bool = Query(False, description="Include citations in the response")):
    process = psutil.Process(os.getpid())
    start_time = time.time()
    start_mem = process.memory_info().rss  # RAM in bytes

    # Run RAG
    response = full_rag_pipeline(question, expanding_retriever, clean_all_citations=with_citations)

    end_time = time.time()
    end_mem = process.memory_info().rss

    # Metrics
    latency = end_time - start_time
    ram_used_mb = (end_mem - start_mem) / (1024 ** 2)  # Convert to MB

    return {
        "question": question,
        "answer": response,
        "metrics": {
            "latency_seconds": round(latency, 3),
            "ram_usage_delta_mb": round(ram_used_mb, 2)
        }
    }
@app.get("/generate_title")
def generate_title(first_question: str = Query(..., description="The first question to generate a title from")):
    # Initialize the LLM - using the same model as in prepare_env.py
    llm = GoogleGenerativeAI(
        model="gemini-2.0-flash",
        google_api_key=os.getenv("GOOGLE_API_KEY")
    )
    
    prompt = f"""Analyze this question and generate a very short title (3-5 words max):
    1. If it's medical/vaccine-related: Create a professional clinical title
    2. If non-medical: Create a general topic title
    3. If unclear or greeting: Use "General Inquiry"
    
    Always return just the title text, nothing else.
    
    Question: {first_question}
    
    Title:"""
    
    title = llm.invoke(prompt)
    return {"title": title.strip()}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)