Spaces:
Sleeping
Sleeping
modify main (#2)
Browse files- modify main (85b34987f65ac2d0ae5e793d781a3dc4beb529ac)
Co-authored-by: Traikia Ilyes <fansa34@users.noreply.huggingface.co>
app.py
CHANGED
|
@@ -4,7 +4,8 @@ from rag_pipeline import full_rag_pipeline
|
|
| 4 |
from langchain_google_genai import GoogleGenerativeAI
|
| 5 |
import os
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
-
|
|
|
|
| 8 |
# Load environment variables from .env file
|
| 9 |
load_dotenv()
|
| 10 |
|
|
@@ -17,8 +18,28 @@ expanding_retriever = prepare_environment_and_retriever()
|
|
| 17 |
|
| 18 |
@app.get("/ask")
|
| 19 |
def ask_question(question: str, with_citations: bool = Query(False, description="Include citations in the response")):
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
@app.get("/generate_title")
|
| 23 |
def generate_title(first_question: str = Query(..., description="The first question to generate a title from")):
|
| 24 |
# Initialize the LLM - using the same model as in prepare_env.py
|
|
|
|
| 4 |
from langchain_google_genai import GoogleGenerativeAI
|
| 5 |
import os
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
+
import psutil
|
| 8 |
+
import os
|
| 9 |
# Load environment variables from .env file
|
| 10 |
load_dotenv()
|
| 11 |
|
|
|
|
| 18 |
|
| 19 |
@app.get("/ask")
|
| 20 |
def ask_question(question: str, with_citations: bool = Query(False, description="Include citations in the response")):
|
| 21 |
+
process = psutil.Process(os.getpid())
|
| 22 |
+
start_time = time.time()
|
| 23 |
+
start_mem = process.memory_info().rss # RAM in bytes
|
| 24 |
+
|
| 25 |
+
# Run RAG
|
| 26 |
+
response = full_rag_pipeline(question, expanding_retriever, clean_all_citations=with_citations)
|
| 27 |
+
|
| 28 |
+
end_time = time.time()
|
| 29 |
+
end_mem = process.memory_info().rss
|
| 30 |
+
|
| 31 |
+
# Metrics
|
| 32 |
+
latency = end_time - start_time
|
| 33 |
+
ram_used_mb = (end_mem - start_mem) / (1024 ** 2) # Convert to MB
|
| 34 |
+
|
| 35 |
+
return {
|
| 36 |
+
"question": question,
|
| 37 |
+
"answer": response,
|
| 38 |
+
"metrics": {
|
| 39 |
+
"latency_seconds": round(latency, 3),
|
| 40 |
+
"ram_usage_delta_mb": round(ram_used_mb, 2)
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
@app.get("/generate_title")
|
| 44 |
def generate_title(first_question: str = Query(..., description="The first question to generate a title from")):
|
| 45 |
# Initialize the LLM - using the same model as in prepare_env.py
|