Spaces:

aseelflihan
/

BioRAG

Running

App Files Files Community

aseelflihan commited on 17 days ago

Commit

5cb4c11

1 Parent(s): a556c58

feat: add token usage tracking and display, update sample questions for demo scenarios

Browse files

Files changed (4) hide show

src/bio_rag/generator.py +12 -0
static/index.html +6 -6
static/js/app.js +2 -0
web_app.py +7 -0

src/bio_rag/generator.py CHANGED Viewed

@@ -10,6 +10,12 @@ from .retriever import RetrievedPassage
 logger = logging.getLogger(__name__)
 # Switch to use Groq API instead of local Models
 class BiomedicalAnswerGenerator:
     """Generates answers using a biomedical LLM via Groq API."""
@@ -19,6 +25,7 @@ class BiomedicalAnswerGenerator:
         self._is_seq2seq = False
         self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
         logger.info("Loaded Groq API Generator with model: %s", self.model_name)
     def generate(self, question: str, passages: Iterable[RetrievedPassage]) -> str:
         passage_list = list(passages)
@@ -43,6 +50,11 @@ class BiomedicalAnswerGenerator:
                 kwargs["response_format"] = {"type": "json_object"}
             response = self.client.chat.completions.create(**kwargs)
             return response.choices[0].message.content.strip()
         except Exception as e:
             logger.error("Error generating with Groq API: %s", e)

 logger = logging.getLogger(__name__)
+class TokenUsage:
+    def __init__(self, prompt_tokens=0, completion_tokens=0):
+        self.prompt_tokens = prompt_tokens
+        self.completion_tokens = completion_tokens
+        self.total_tokens = prompt_tokens + completion_tokens
 # Switch to use Groq API instead of local Models
 class BiomedicalAnswerGenerator:
     """Generates answers using a biomedical LLM via Groq API."""
         self._is_seq2seq = False
         self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
         logger.info("Loaded Groq API Generator with model: %s", self.model_name)
+        self.last_usage = TokenUsage()
     def generate(self, question: str, passages: Iterable[RetrievedPassage]) -> str:
         passage_list = list(passages)
                 kwargs["response_format"] = {"type": "json_object"}
             response = self.client.chat.completions.create(**kwargs)
+            if hasattr(response, 'usage') and response.usage:
+                self.last_usage = TokenUsage(
+                    prompt_tokens=response.usage.prompt_tokens or 0,
+                    completion_tokens=response.usage.completion_tokens or 0,
+                )
             return response.choices[0].message.content.strip()
         except Exception as e:
             logger.error("Error generating with Groq API: %s", e)

static/index.html CHANGED Viewed

@@ -90,13 +90,13 @@ Diabetes Domain Only
 <span class="suggestion-icon">💊</span>
 <span class="suggestion-text">Is metformin safe for patients with kidney disease?</span>
 </button>
-<button class="suggestion-card" data-question="How does insulin resistance develop in type 2 diabetes?">
-<span class="suggestion-icon">🧪</span>
-<span class="suggestion-text">How does insulin resistance develop in type 2 diabetes?</span>
 </button>
-<button class="suggestion-card" data-question="Can type 2 diabetes be prevented through lifestyle changes?">
-<span class="suggestion-icon">🏃</span>
-<span class="suggestion-text">Can type 2 diabetes be prevented through lifestyle changes?</span>
 </button>
 </div>
 </div>

 <span class="suggestion-icon">💊</span>
 <span class="suggestion-text">Is metformin safe for patients with kidney disease?</span>
 </button>
+<button class="suggestion-card" data-question="Is insulin dosage adjustment necessary for type 1 diabetic patients with severe renal impairment?">
+<span class="suggestion-icon">⚠️</span>
+<span class="suggestion-text">Insulin dosage for diabetics with renal impairment</span>
 </button>
+<button class="suggestion-card" data-question="Are arterial stiffness and central arterial wave reflection associated with serum uric acid in patients with coronary artery disease?">
+<span class="suggestion-icon">🚫</span>
+<span class="suggestion-text">Test: Non-diabetes question (should be rejected)</span>
 </button>
 </div>
 </div>

static/js/app.js CHANGED Viewed

@@ -531,6 +531,8 @@ ${data.processing_stats ? `
 <div>📄 <strong>Passages Retrieved:</strong> ${data.processing_stats.passages_retrieved} → Top ${Math.min(data.processing_stats.passages_retrieved, 10)} after RRF</div>
 <div>✂️ <strong>Claims Decomposed:</strong> ${data.processing_stats.claims_verified}</div>
 <div>🔬 <strong>Total Evidence Evaluated:</strong> ${data.processing_stats.total_evidence_evaluated} (${data.processing_stats.claims_verified} claims × ${data.processing_stats.evidence_per_claim} docs)</div>
 ${data.processing_stats.phase_times ? `
 <div style="margin-top:4px;">⏱️ <strong>Phase Times:</strong>
 Query Expansion: ${data.processing_stats.phase_times.query_expansion || 0}s •

 <div>📄 <strong>Passages Retrieved:</strong> ${data.processing_stats.passages_retrieved} → Top ${Math.min(data.processing_stats.passages_retrieved, 10)} after RRF</div>
 <div>✂️ <strong>Claims Decomposed:</strong> ${data.processing_stats.claims_verified}</div>
 <div>🔬 <strong>Total Evidence Evaluated:</strong> ${data.processing_stats.total_evidence_evaluated} (${data.processing_stats.claims_verified} claims × ${data.processing_stats.evidence_per_claim} docs)</div>
+${data.processing_stats.token_usage ? `
+<div>🪙 <strong>Tokens:</strong> Input: ${data.processing_stats.token_usage.prompt_tokens} • Output: ${data.processing_stats.token_usage.completion_tokens} • Total: ${data.processing_stats.token_usage.total_tokens}</div>` : ''}
 ${data.processing_stats.phase_times ? `
 <div style="margin-top:4px;">⏱️ <strong>Phase Times:</strong>
 Query Expansion: ${data.processing_stats.phase_times.query_expansion || 0}s •

web_app.py CHANGED Viewed

@@ -46,6 +46,7 @@ def ask_stream():
         try:
             _start_time = time.time()
             phase_times = {}
             yield f"data: {json_lib.dumps({'step': 0, 'status': 'active'})}\n\n"
             time.sleep(0.1)
             yield f"data: {json_lib.dumps({'step': 0, 'status': 'done'})}\n\n"
@@ -83,6 +84,11 @@ def ask_stream():
             _p3_start = time.time()
             original_answer = pipeline.generator.generate(question, passages)
             phase_times['generation'] = round(time.time() - _p3_start, 2)
             yield f"data: {json_lib.dumps({'step': 3, 'status': 'done'})}\n\n"
             time.sleep(0.1)
@@ -160,6 +166,7 @@ def ask_stream():
                     'evidence_per_claim': 10,
                     'total_evidence_evaluated': len(claims) * 10,
                     'phase_times': phase_times,
                 }
             }
             yield f"data: {json_lib.dumps({'complete': True, 'result': r})}\n\n"

         try:
             _start_time = time.time()
             phase_times = {}
+            token_stats = {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}
             yield f"data: {json_lib.dumps({'step': 0, 'status': 'active'})}\n\n"
             time.sleep(0.1)
             yield f"data: {json_lib.dumps({'step': 0, 'status': 'done'})}\n\n"
             _p3_start = time.time()
             original_answer = pipeline.generator.generate(question, passages)
             phase_times['generation'] = round(time.time() - _p3_start, 2)
+            if hasattr(pipeline.generator, 'last_usage'):
+                u = pipeline.generator.last_usage
+                token_stats['prompt_tokens'] += u.prompt_tokens
+                token_stats['completion_tokens'] += u.completion_tokens
+                token_stats['total_tokens'] += u.total_tokens
             yield f"data: {json_lib.dumps({'step': 3, 'status': 'done'})}\n\n"
             time.sleep(0.1)
                     'evidence_per_claim': 10,
                     'total_evidence_evaluated': len(claims) * 10,
                     'phase_times': phase_times,
+                    'token_usage': token_stats,
                 }
             }
             yield f"data: {json_lib.dumps({'complete': True, 'result': r})}\n\n"