Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -330,14 +330,16 @@ class ProductionRAGSystem:
|
|
| 330 |
# Always generate extracted answer
|
| 331 |
extracted_answer = self.extract_direct_answer(query, best_result['content'])
|
| 332 |
|
| 333 |
-
# Try AI answer with minimal tokens
|
| 334 |
ai_answer = None
|
| 335 |
openrouter_key = os.environ.get("OPENROUTER_API_KEY")
|
| 336 |
|
| 337 |
if openrouter_key:
|
| 338 |
-
# Use
|
| 339 |
-
|
| 340 |
-
|
|
|
|
|
|
|
| 341 |
|
| 342 |
try:
|
| 343 |
response = requests.post(
|
|
@@ -351,14 +353,16 @@ class ProductionRAGSystem:
|
|
| 351 |
json={
|
| 352 |
"model": "openai/gpt-3.5-turbo",
|
| 353 |
"messages": [{"role": "user", "content": prompt}],
|
| 354 |
-
"max_tokens":
|
| 355 |
"temperature": 0.1
|
| 356 |
},
|
| 357 |
timeout=10
|
| 358 |
)
|
| 359 |
|
| 360 |
if response.status_code == 200:
|
| 361 |
-
|
|
|
|
|
|
|
| 362 |
elif response.status_code == 402:
|
| 363 |
st.error("💳 OpenRouter credits exhausted. Using extracted answers only.")
|
| 364 |
else:
|
|
@@ -366,14 +370,23 @@ class ProductionRAGSystem:
|
|
| 366 |
|
| 367 |
except Exception as e:
|
| 368 |
st.error(f"API Exception: {str(e)}")
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
def get_general_ai_response(query):
|
| 379 |
"""Get AI response for general questions with minimal token usage"""
|
|
@@ -676,8 +689,8 @@ if prompt := st.chat_input("Ask questions about your documents..."):
|
|
| 676 |
# Search documents first
|
| 677 |
search_results = rag_system.search(prompt, n_results=3)
|
| 678 |
|
| 679 |
-
# Check if we found relevant documents (
|
| 680 |
-
if search_results and search_results[0]['similarity'] > 0.
|
| 681 |
# Generate document-based answer
|
| 682 |
result = rag_system.generate_answer(prompt, search_results)
|
| 683 |
|
|
|
|
| 330 |
# Always generate extracted answer
|
| 331 |
extracted_answer = self.extract_direct_answer(query, best_result['content'])
|
| 332 |
|
| 333 |
+
# Try AI answer with minimal tokens - send only extracted answer, not full chunks
|
| 334 |
ai_answer = None
|
| 335 |
openrouter_key = os.environ.get("OPENROUTER_API_KEY")
|
| 336 |
|
| 337 |
if openrouter_key:
|
| 338 |
+
# Use the extracted answer as context instead of raw chunks
|
| 339 |
+
extracted_answer = self.extract_direct_answer(query, best_result['content'])
|
| 340 |
+
|
| 341 |
+
# Super minimal prompt with just the extracted info
|
| 342 |
+
prompt = f"Improve: {extracted_answer}" # Very short prompt
|
| 343 |
|
| 344 |
try:
|
| 345 |
response = requests.post(
|
|
|
|
| 353 |
json={
|
| 354 |
"model": "openai/gpt-3.5-turbo",
|
| 355 |
"messages": [{"role": "user", "content": prompt}],
|
| 356 |
+
"max_tokens": 25, # Very small
|
| 357 |
"temperature": 0.1
|
| 358 |
},
|
| 359 |
timeout=10
|
| 360 |
)
|
| 361 |
|
| 362 |
if response.status_code == 200:
|
| 363 |
+
ai_response = response.json()['choices'][0]['message']['content'].strip()
|
| 364 |
+
# Use AI response if it's actually better, otherwise stick with extracted
|
| 365 |
+
ai_answer = ai_response if len(ai_response) > 10 else extracted_answer
|
| 366 |
elif response.status_code == 402:
|
| 367 |
st.error("💳 OpenRouter credits exhausted. Using extracted answers only.")
|
| 368 |
else:
|
|
|
|
| 370 |
|
| 371 |
except Exception as e:
|
| 372 |
st.error(f"API Exception: {str(e)}")
|
| 373 |
+
|
| 374 |
+
return {
|
| 375 |
+
'ai_answer': ai_answer,
|
| 376 |
+
'extracted_answer': extracted_answer,
|
| 377 |
+
'sources': sources,
|
| 378 |
+
'confidence': avg_confidence,
|
| 379 |
+
'has_both': ai_answer is not None
|
| 380 |
+
}
|
| 381 |
+
else:
|
| 382 |
+
# No API key - just return extracted answer
|
| 383 |
+
return {
|
| 384 |
+
'ai_answer': None,
|
| 385 |
+
'extracted_answer': extracted_answer,
|
| 386 |
+
'sources': sources,
|
| 387 |
+
'confidence': avg_confidence,
|
| 388 |
+
'has_both': False
|
| 389 |
+
}
|
| 390 |
|
| 391 |
def get_general_ai_response(query):
|
| 392 |
"""Get AI response for general questions with minimal token usage"""
|
|
|
|
| 689 |
# Search documents first
|
| 690 |
search_results = rag_system.search(prompt, n_results=3)
|
| 691 |
|
| 692 |
+
# Check if we found relevant documents (much lower threshold)
|
| 693 |
+
if search_results and search_results[0]['similarity'] > 0.01: # Very low threshold
|
| 694 |
# Generate document-based answer
|
| 695 |
result = rag_system.generate_answer(prompt, search_results)
|
| 696 |
|