uumerrr684 commited on
Commit
aaeb58e
·
verified ·
1 Parent(s): ffea308

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -16
app.py CHANGED
@@ -330,14 +330,16 @@ class ProductionRAGSystem:
330
  # Always generate extracted answer
331
  extracted_answer = self.extract_direct_answer(query, best_result['content'])
332
 
333
- # Try AI answer with minimal tokens
334
  ai_answer = None
335
  openrouter_key = os.environ.get("OPENROUTER_API_KEY")
336
 
337
  if openrouter_key:
338
- # Use much shorter context and fewer tokens
339
- context = search_results[0]['content'][:200] # Reduced from 500
340
- prompt = f"Q: {query}\nA:" # Much shorter prompt
 
 
341
 
342
  try:
343
  response = requests.post(
@@ -351,14 +353,16 @@ class ProductionRAGSystem:
351
  json={
352
  "model": "openai/gpt-3.5-turbo",
353
  "messages": [{"role": "user", "content": prompt}],
354
- "max_tokens": 30, # Reduced from 100
355
  "temperature": 0.1
356
  },
357
  timeout=10
358
  )
359
 
360
  if response.status_code == 200:
361
- ai_answer = response.json()['choices'][0]['message']['content'].strip()
 
 
362
  elif response.status_code == 402:
363
  st.error("💳 OpenRouter credits exhausted. Using extracted answers only.")
364
  else:
@@ -366,14 +370,23 @@ class ProductionRAGSystem:
366
 
367
  except Exception as e:
368
  st.error(f"API Exception: {str(e)}")
369
-
370
- return {
371
- 'ai_answer': ai_answer,
372
- 'extracted_answer': extracted_answer,
373
- 'sources': sources,
374
- 'confidence': avg_confidence,
375
- 'has_both': ai_answer is not None
376
- }
 
 
 
 
 
 
 
 
 
377
 
378
  def get_general_ai_response(query):
379
  """Get AI response for general questions with minimal token usage"""
@@ -676,8 +689,8 @@ if prompt := st.chat_input("Ask questions about your documents..."):
676
  # Search documents first
677
  search_results = rag_system.search(prompt, n_results=3)
678
 
679
- # Check if we found relevant documents (confidence > 0.05)
680
- if search_results and search_results[0]['similarity'] > 0.05:
681
  # Generate document-based answer
682
  result = rag_system.generate_answer(prompt, search_results)
683
 
 
330
  # Always generate extracted answer
331
  extracted_answer = self.extract_direct_answer(query, best_result['content'])
332
 
333
+ # Try AI answer with minimal tokens - send only extracted answer, not full chunks
334
  ai_answer = None
335
  openrouter_key = os.environ.get("OPENROUTER_API_KEY")
336
 
337
  if openrouter_key:
338
+ # Use the extracted answer as context instead of raw chunks
339
+ extracted_answer = self.extract_direct_answer(query, best_result['content'])
340
+
341
+ # Super minimal prompt with just the extracted info
342
+ prompt = f"Improve: {extracted_answer}" # Very short prompt
343
 
344
  try:
345
  response = requests.post(
 
353
  json={
354
  "model": "openai/gpt-3.5-turbo",
355
  "messages": [{"role": "user", "content": prompt}],
356
+ "max_tokens": 25, # Very small
357
  "temperature": 0.1
358
  },
359
  timeout=10
360
  )
361
 
362
  if response.status_code == 200:
363
+ ai_response = response.json()['choices'][0]['message']['content'].strip()
364
+ # Use AI response if it's actually better, otherwise stick with extracted
365
+ ai_answer = ai_response if len(ai_response) > 10 else extracted_answer
366
  elif response.status_code == 402:
367
  st.error("💳 OpenRouter credits exhausted. Using extracted answers only.")
368
  else:
 
370
 
371
  except Exception as e:
372
  st.error(f"API Exception: {str(e)}")
373
+
374
+ return {
375
+ 'ai_answer': ai_answer,
376
+ 'extracted_answer': extracted_answer,
377
+ 'sources': sources,
378
+ 'confidence': avg_confidence,
379
+ 'has_both': ai_answer is not None
380
+ }
381
+ else:
382
+ # No API key - just return extracted answer
383
+ return {
384
+ 'ai_answer': None,
385
+ 'extracted_answer': extracted_answer,
386
+ 'sources': sources,
387
+ 'confidence': avg_confidence,
388
+ 'has_both': False
389
+ }
390
 
391
  def get_general_ai_response(query):
392
  """Get AI response for general questions with minimal token usage"""
 
689
  # Search documents first
690
  search_results = rag_system.search(prompt, n_results=3)
691
 
692
+ # Check if we found relevant documents (much lower threshold)
693
+ if search_results and search_results[0]['similarity'] > 0.01: # Very low threshold
694
  # Generate document-based answer
695
  result = rag_system.generate_answer(prompt, search_results)
696