Zeggai fansa34 commited on
Commit
07c2b95
·
verified ·
1 Parent(s): c23c6b4

modify main (#2)

Browse files

- modify main (85b34987f65ac2d0ae5e793d781a3dc4beb529ac)


Co-authored-by: Traikia Ilyes <fansa34@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +24 -3
app.py CHANGED
@@ -4,7 +4,8 @@ from rag_pipeline import full_rag_pipeline
4
  from langchain_google_genai import GoogleGenerativeAI
5
  import os
6
  from dotenv import load_dotenv
7
-
 
8
  # Load environment variables from .env file
9
  load_dotenv()
10
 
@@ -17,8 +18,28 @@ expanding_retriever = prepare_environment_and_retriever()
17
 
18
  @app.get("/ask")
19
  def ask_question(question: str, with_citations: bool = Query(False, description="Include citations in the response")):
20
- response = full_rag_pipeline(question, expanding_retriever,clean_all_citations=with_citations)
21
- return {"question": question, "answer": response}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  @app.get("/generate_title")
23
  def generate_title(first_question: str = Query(..., description="The first question to generate a title from")):
24
  # Initialize the LLM - using the same model as in prepare_env.py
 
4
  from langchain_google_genai import GoogleGenerativeAI
5
  import os
6
  from dotenv import load_dotenv
7
+ import psutil
8
+ import os
9
  # Load environment variables from .env file
10
  load_dotenv()
11
 
 
18
 
19
  @app.get("/ask")
20
  def ask_question(question: str, with_citations: bool = Query(False, description="Include citations in the response")):
21
+ process = psutil.Process(os.getpid())
22
+ start_time = time.time()
23
+ start_mem = process.memory_info().rss # RAM in bytes
24
+
25
+ # Run RAG
26
+ response = full_rag_pipeline(question, expanding_retriever, clean_all_citations=with_citations)
27
+
28
+ end_time = time.time()
29
+ end_mem = process.memory_info().rss
30
+
31
+ # Metrics
32
+ latency = end_time - start_time
33
+ ram_used_mb = (end_mem - start_mem) / (1024 ** 2) # Convert to MB
34
+
35
+ return {
36
+ "question": question,
37
+ "answer": response,
38
+ "metrics": {
39
+ "latency_seconds": round(latency, 3),
40
+ "ram_usage_delta_mb": round(ram_used_mb, 2)
41
+ }
42
+ }
43
  @app.get("/generate_title")
44
  def generate_title(first_question: str = Query(..., description="The first question to generate a title from")):
45
  # Initialize the LLM - using the same model as in prepare_env.py