uumerrr684 commited on
Commit
91099df
Β·
verified Β·
1 Parent(s): aaeb58e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -72
app.py CHANGED
@@ -312,8 +312,8 @@ class ProductionRAGSystem:
312
 
313
  return content[:200] + "..."
314
 
315
- def generate_answer(self, query, search_results):
316
- """Generate both AI and extracted answers with minimal token usage"""
317
  if not search_results:
318
  return {
319
  'ai_answer': "No information found in documents.",
@@ -330,16 +330,32 @@ class ProductionRAGSystem:
330
  # Always generate extracted answer
331
  extracted_answer = self.extract_direct_answer(query, best_result['content'])
332
 
333
- # Try AI answer with minimal tokens - send only extracted answer, not full chunks
334
  ai_answer = None
335
  openrouter_key = os.environ.get("OPENROUTER_API_KEY")
336
 
337
- if openrouter_key:
338
- # Use the extracted answer as context instead of raw chunks
339
- extracted_answer = self.extract_direct_answer(query, best_result['content'])
 
340
 
341
- # Super minimal prompt with just the extracted info
342
- prompt = f"Improve: {extracted_answer}" # Very short prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
  try:
345
  response = requests.post(
@@ -353,51 +369,52 @@ class ProductionRAGSystem:
353
  json={
354
  "model": "openai/gpt-3.5-turbo",
355
  "messages": [{"role": "user", "content": prompt}],
356
- "max_tokens": 25, # Very small
357
- "temperature": 0.1
358
  },
359
- timeout=10
360
  )
361
 
362
  if response.status_code == 200:
363
  ai_response = response.json()['choices'][0]['message']['content'].strip()
364
- # Use AI response if it's actually better, otherwise stick with extracted
365
  ai_answer = ai_response if len(ai_response) > 10 else extracted_answer
366
  elif response.status_code == 402:
367
- st.error("πŸ’³ OpenRouter credits exhausted. Using extracted answers only.")
 
 
368
  else:
369
- st.error(f"API Error {response.status_code}")
370
 
 
 
371
  except Exception as e:
372
- st.error(f"API Exception: {str(e)}")
373
-
374
- return {
375
- 'ai_answer': ai_answer,
376
- 'extracted_answer': extracted_answer,
377
- 'sources': sources,
378
- 'confidence': avg_confidence,
379
- 'has_both': ai_answer is not None
380
- }
381
- else:
382
- # No API key - just return extracted answer
383
- return {
384
- 'ai_answer': None,
385
- 'extracted_answer': extracted_answer,
386
- 'sources': sources,
387
- 'confidence': avg_confidence,
388
- 'has_both': False
389
- }
390
 
391
- def get_general_ai_response(query):
392
- """Get AI response for general questions with minimal token usage"""
393
  openrouter_key = os.environ.get("OPENROUTER_API_KEY")
394
 
395
  if not openrouter_key:
396
  return "I can only answer questions about your uploaded documents. Please add an OpenRouter API key for general conversations."
397
 
398
  try:
399
- # Much shorter prompt to save tokens
400
- short_query = query[:50] # Limit input length
 
 
 
 
 
 
 
401
 
402
  response = requests.post(
403
  "https://openrouter.ai/api/v1/chat/completions",
@@ -409,9 +426,9 @@ def get_general_ai_response(query):
409
  },
410
  json={
411
  "model": "openai/gpt-3.5-turbo",
412
- "messages": [{"role": "user", "content": short_query}],
413
- "max_tokens": 25, # Reduced from 150
414
- "temperature": 0.7
415
  },
416
  timeout=15
417
  )
@@ -420,9 +437,13 @@ def get_general_ai_response(query):
420
  return response.json()['choices'][0]['message']['content'].strip()
421
  elif response.status_code == 402:
422
  return "Sorry, OpenRouter credits exhausted. Please add more credits or ask document-specific questions."
 
 
423
  else:
424
  return f"API error (Status: {response.status_code}). Try asking about documents instead."
425
 
 
 
426
  except Exception as e:
427
  return f"Error: {str(e)}"
428
 
@@ -606,6 +627,10 @@ with st.sidebar:
606
  )
607
  if test_response.status_code == 200:
608
  st.success("βœ… API working correctly!")
 
 
 
 
609
  else:
610
  st.error(f"❌ API Error: {test_response.status_code}")
611
  except Exception as e:
@@ -614,14 +639,20 @@ with st.sidebar:
614
  st.error("❌ No OpenRouter API Key")
615
  st.info("Add OPENROUTER_API_KEY in Hugging Face Space settings β†’ Variables and secrets")
616
 
617
- # RAG Settings
618
- use_ai_enhancement = st.checkbox("Use AI Enhancement", value=bool(openrouter_key))
619
- low_token_mode = st.checkbox("Low Token Mode (Save Credits)", value=True)
620
- show_sources = st.checkbox("Show Sources", value=True)
621
- show_confidence = st.checkbox("Show Confidence Scores", value=True)
 
 
 
622
 
623
- if low_token_mode:
624
- st.info("Using minimal tokens to conserve credits")
 
 
 
625
 
626
  st.divider()
627
 
@@ -656,10 +687,11 @@ for message in st.session_state.messages:
656
  rag_info = message["rag_info"]
657
 
658
  if show_sources and rag_info.get("sources"):
 
659
  st.markdown(f"""
660
  <div class="rag-attribution">
661
  <strong>πŸ“ Sources:</strong> {', '.join(rag_info['sources'])}<br>
662
- <strong>🎯 Confidence:</strong> {rag_info['confidence']*100:.1f}%
663
  </div>
664
  """, unsafe_allow_html=True)
665
 
@@ -692,22 +724,28 @@ if prompt := st.chat_input("Ask questions about your documents..."):
692
  # Check if we found relevant documents (much lower threshold)
693
  if search_results and search_results[0]['similarity'] > 0.01: # Very low threshold
694
  # Generate document-based answer
695
- result = rag_system.generate_answer(prompt, search_results)
 
 
 
 
 
696
 
697
  # Display AI answer or extracted answer
698
  if use_ai_enhancement and result['has_both']:
699
  answer_text = result['ai_answer']
700
- st.markdown(f"πŸ€– **AI Answer:** {answer_text}")
701
  else:
702
  answer_text = result['extracted_answer']
703
  st.markdown(f"πŸ“„ **Document Answer:** {answer_text}")
704
 
705
  # Show RAG info
706
  if show_sources and result['sources']:
 
707
  st.markdown(f"""
708
  <div class="rag-attribution">
709
  <strong>πŸ“ Sources:</strong> {', '.join(result['sources'])}<br>
710
- <strong>🎯 Confidence:</strong> {result['confidence']*100:.1f}%<br>
711
  <strong>πŸ“Š Found:</strong> {len(search_results)} relevant sections
712
  </div>
713
  """, unsafe_allow_html=True)
@@ -725,26 +763,16 @@ if prompt := st.chat_input("Ask questions about your documents..."):
725
  }
726
 
727
  else:
728
- # No relevant documents found - handle based on mode
729
- if low_token_mode:
730
- no_docs_msg = "No relevant information found in your documents. (General AI disabled in low-token mode to save credits)"
731
- st.warning(no_docs_msg)
732
- assistant_message = {
733
- "role": "assistant",
734
- "content": no_docs_msg,
735
- "rag_info": {"sources": [], "confidence": 0, "mode": "no_docs"}
736
- }
737
- else:
738
- # Use general AI only if not in low-token mode
739
- st.info("No relevant documents found. Switching to general AI mode...")
740
- general_response = get_general_ai_response(prompt)
741
- st.markdown(f"πŸ’¬ **General AI:** {general_response}")
742
-
743
- assistant_message = {
744
- "role": "assistant",
745
- "content": general_response,
746
- "rag_info": {"sources": [], "confidence": 0, "mode": "general"}
747
- }
748
 
749
  else:
750
  # RAG system not ready - use general AI
@@ -753,7 +781,7 @@ if prompt := st.chat_input("Ask questions about your documents..."):
753
  else:
754
  st.error("RAG system not ready. Using general AI mode...")
755
 
756
- general_response = get_general_ai_response(prompt)
757
  st.markdown(f"πŸ’¬ **General AI:** {general_response}")
758
 
759
  assistant_message = {
@@ -771,4 +799,5 @@ if prompt := st.chat_input("Ask questions about your documents..."):
771
  # Footer info
772
  if rag_system and rag_system.model:
773
  doc_count = rag_system.get_collection_count()
774
- st.caption(f"πŸ“š Knowledge Base: {doc_count} indexed chunks | πŸ” RAG System Active")
 
 
312
 
313
  return content[:200] + "..."
314
 
315
+ def generate_answer(self, query, search_results, use_ai_enhancement=True, unlimited_tokens=False):
316
+ """Generate both AI and extracted answers with proper token handling"""
317
  if not search_results:
318
  return {
319
  'ai_answer': "No information found in documents.",
 
330
  # Always generate extracted answer
331
  extracted_answer = self.extract_direct_answer(query, best_result['content'])
332
 
333
+ # Try AI answer if requested and API key available
334
  ai_answer = None
335
  openrouter_key = os.environ.get("OPENROUTER_API_KEY")
336
 
337
+ if use_ai_enhancement and openrouter_key:
338
+ # Build context from search results
339
+ context = "\n\n".join([f"Source: {r['metadata']['source_file']}\nContent: {r['content']}"
340
+ for r in search_results[:3]])
341
 
342
+ # Create comprehensive prompt for unlimited tokens
343
+ if unlimited_tokens:
344
+ prompt = f"""Based on the following document context, provide a comprehensive and detailed answer to the user's question.
345
+
346
+ Context from documents:
347
+ {context}
348
+
349
+ User Question: {query}
350
+
351
+ Please provide a thorough, well-structured answer that directly addresses the question using the information from the documents. If the documents contain specific details, include them in your response."""
352
+ max_tokens = 500 # Higher token limit for detailed responses
353
+ temperature = 0.3
354
+ else:
355
+ # Fallback to shorter prompt
356
+ prompt = f"Context: {extracted_answer}\n\nQuestion: {query}\n\nImprove the answer:"
357
+ max_tokens = 150
358
+ temperature = 0.1
359
 
360
  try:
361
  response = requests.post(
 
369
  json={
370
  "model": "openai/gpt-3.5-turbo",
371
  "messages": [{"role": "user", "content": prompt}],
372
+ "max_tokens": max_tokens,
373
+ "temperature": temperature
374
  },
375
+ timeout=15
376
  )
377
 
378
  if response.status_code == 200:
379
  ai_response = response.json()['choices'][0]['message']['content'].strip()
 
380
  ai_answer = ai_response if len(ai_response) > 10 else extracted_answer
381
  elif response.status_code == 402:
382
+ st.warning("πŸ’³ OpenRouter credits exhausted. Using extracted answers only.")
383
+ elif response.status_code == 429:
384
+ st.warning("⏱️ Rate limit reached. Using extracted answers only.")
385
  else:
386
+ st.warning(f"API Error {response.status_code}. Using extracted answers only.")
387
 
388
+ except requests.exceptions.Timeout:
389
+ st.warning("⏱️ API timeout. Using extracted answers only.")
390
  except Exception as e:
391
+ st.warning(f"API Exception: {str(e)}. Using extracted answers only.")
392
+
393
+ return {
394
+ 'ai_answer': ai_answer,
395
+ 'extracted_answer': extracted_answer,
396
+ 'sources': sources,
397
+ 'confidence': avg_confidence,
398
+ 'has_both': ai_answer is not None
399
+ }
 
 
 
 
 
 
 
 
 
400
 
401
+ def get_general_ai_response(query, unlimited_tokens=False):
402
+ """Get AI response for general questions with proper token handling"""
403
  openrouter_key = os.environ.get("OPENROUTER_API_KEY")
404
 
405
  if not openrouter_key:
406
  return "I can only answer questions about your uploaded documents. Please add an OpenRouter API key for general conversations."
407
 
408
  try:
409
+ # Adjust parameters based on token availability
410
+ if unlimited_tokens:
411
+ max_tokens = 500
412
+ temperature = 0.7
413
+ prompt = f"Please provide a helpful and detailed answer to this question: {query}"
414
+ else:
415
+ max_tokens = 150
416
+ temperature = 0.7
417
+ prompt = query[:200] # Limit input length for token conservation
418
 
419
  response = requests.post(
420
  "https://openrouter.ai/api/v1/chat/completions",
 
426
  },
427
  json={
428
  "model": "openai/gpt-3.5-turbo",
429
+ "messages": [{"role": "user", "content": prompt}],
430
+ "max_tokens": max_tokens,
431
+ "temperature": temperature
432
  },
433
  timeout=15
434
  )
 
437
  return response.json()['choices'][0]['message']['content'].strip()
438
  elif response.status_code == 402:
439
  return "Sorry, OpenRouter credits exhausted. Please add more credits or ask document-specific questions."
440
+ elif response.status_code == 429:
441
+ return "Rate limit reached. Please try again in a moment."
442
  else:
443
  return f"API error (Status: {response.status_code}). Try asking about documents instead."
444
 
445
+ except requests.exceptions.Timeout:
446
+ return "Request timeout. Please try again."
447
  except Exception as e:
448
  return f"Error: {str(e)}"
449
 
 
627
  )
628
  if test_response.status_code == 200:
629
  st.success("βœ… API working correctly!")
630
+ elif test_response.status_code == 402:
631
+ st.error("❌ Credits exhausted")
632
+ elif test_response.status_code == 429:
633
+ st.warning("⏱️ Rate limited")
634
  else:
635
  st.error(f"❌ API Error: {test_response.status_code}")
636
  except Exception as e:
 
639
  st.error("❌ No OpenRouter API Key")
640
  st.info("Add OPENROUTER_API_KEY in Hugging Face Space settings β†’ Variables and secrets")
641
 
642
+ # Enhanced Settings
643
+ st.subheader("πŸš€ Token Settings")
644
+ unlimited_tokens = st.checkbox("πŸ”₯ Unlimited Tokens Mode", value=True, help="Use higher token limits for detailed responses")
645
+ use_ai_enhancement = st.checkbox("πŸ€– AI Enhancement", value=bool(openrouter_key), help="Enhance answers with AI when documents are found")
646
+
647
+ st.subheader("πŸŽ›οΈ Display Settings")
648
+ show_sources = st.checkbox("πŸ“ Show Sources", value=True)
649
+ show_confidence = st.checkbox("🎯 Show Confidence Scores", value=True)
650
 
651
+ # Token mode indicator
652
+ if unlimited_tokens:
653
+ st.success("πŸ”₯ Unlimited mode: Detailed responses enabled")
654
+ else:
655
+ st.info("πŸ’° Conservative mode: Limited tokens to save credits")
656
 
657
  st.divider()
658
 
 
687
  rag_info = message["rag_info"]
688
 
689
  if show_sources and rag_info.get("sources"):
690
+ confidence_text = f"{rag_info['confidence']*100:.1f}%" if show_confidence else ""
691
  st.markdown(f"""
692
  <div class="rag-attribution">
693
  <strong>πŸ“ Sources:</strong> {', '.join(rag_info['sources'])}<br>
694
+ <strong>🎯 Confidence:</strong> {confidence_text}
695
  </div>
696
  """, unsafe_allow_html=True)
697
 
 
724
  # Check if we found relevant documents (much lower threshold)
725
  if search_results and search_results[0]['similarity'] > 0.01: # Very low threshold
726
  # Generate document-based answer
727
+ result = rag_system.generate_answer(
728
+ prompt,
729
+ search_results,
730
+ use_ai_enhancement=use_ai_enhancement,
731
+ unlimited_tokens=unlimited_tokens
732
+ )
733
 
734
  # Display AI answer or extracted answer
735
  if use_ai_enhancement and result['has_both']:
736
  answer_text = result['ai_answer']
737
+ st.markdown(f"πŸ€– **AI Enhanced Answer:** {answer_text}")
738
  else:
739
  answer_text = result['extracted_answer']
740
  st.markdown(f"πŸ“„ **Document Answer:** {answer_text}")
741
 
742
  # Show RAG info
743
  if show_sources and result['sources']:
744
+ confidence_text = f"{result['confidence']*100:.1f}%" if show_confidence else ""
745
  st.markdown(f"""
746
  <div class="rag-attribution">
747
  <strong>πŸ“ Sources:</strong> {', '.join(result['sources'])}<br>
748
+ <strong>🎯 Confidence:</strong> {confidence_text}<br>
749
  <strong>πŸ“Š Found:</strong> {len(search_results)} relevant sections
750
  </div>
751
  """, unsafe_allow_html=True)
 
763
  }
764
 
765
  else:
766
+ # No relevant documents found - use general AI
767
+ st.info("No relevant documents found. Using general AI mode...")
768
+ general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
769
+ st.markdown(f"πŸ’¬ **General AI:** {general_response}")
770
+
771
+ assistant_message = {
772
+ "role": "assistant",
773
+ "content": general_response,
774
+ "rag_info": {"sources": [], "confidence": 0, "mode": "general"}
775
+ }
 
 
 
 
 
 
 
 
 
 
776
 
777
  else:
778
  # RAG system not ready - use general AI
 
781
  else:
782
  st.error("RAG system not ready. Using general AI mode...")
783
 
784
+ general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
785
  st.markdown(f"πŸ’¬ **General AI:** {general_response}")
786
 
787
  assistant_message = {
 
799
  # Footer info
800
  if rag_system and rag_system.model:
801
  doc_count = rag_system.get_collection_count()
802
+ token_mode = "πŸ”₯ Unlimited" if unlimited_tokens else "πŸ’° Conservative"
803
+ st.caption(f"πŸ“š Knowledge Base: {doc_count} indexed chunks | πŸ” RAG System Active | {token_mode} Token Mode")