Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -312,8 +312,8 @@ class ProductionRAGSystem:
|
|
| 312 |
|
| 313 |
return content[:200] + "..."
|
| 314 |
|
| 315 |
-
def generate_answer(self, query, search_results):
|
| 316 |
-
"""Generate both AI and extracted answers with
|
| 317 |
if not search_results:
|
| 318 |
return {
|
| 319 |
'ai_answer': "No information found in documents.",
|
|
@@ -330,16 +330,32 @@ class ProductionRAGSystem:
|
|
| 330 |
# Always generate extracted answer
|
| 331 |
extracted_answer = self.extract_direct_answer(query, best_result['content'])
|
| 332 |
|
| 333 |
-
# Try AI answer
|
| 334 |
ai_answer = None
|
| 335 |
openrouter_key = os.environ.get("OPENROUTER_API_KEY")
|
| 336 |
|
| 337 |
-
if openrouter_key:
|
| 338 |
-
#
|
| 339 |
-
|
|
|
|
| 340 |
|
| 341 |
-
#
|
| 342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
try:
|
| 345 |
response = requests.post(
|
|
@@ -353,51 +369,52 @@ class ProductionRAGSystem:
|
|
| 353 |
json={
|
| 354 |
"model": "openai/gpt-3.5-turbo",
|
| 355 |
"messages": [{"role": "user", "content": prompt}],
|
| 356 |
-
"max_tokens":
|
| 357 |
-
"temperature":
|
| 358 |
},
|
| 359 |
-
timeout=
|
| 360 |
)
|
| 361 |
|
| 362 |
if response.status_code == 200:
|
| 363 |
ai_response = response.json()['choices'][0]['message']['content'].strip()
|
| 364 |
-
# Use AI response if it's actually better, otherwise stick with extracted
|
| 365 |
ai_answer = ai_response if len(ai_response) > 10 else extracted_answer
|
| 366 |
elif response.status_code == 402:
|
| 367 |
-
st.
|
|
|
|
|
|
|
| 368 |
else:
|
| 369 |
-
st.
|
| 370 |
|
|
|
|
|
|
|
| 371 |
except Exception as e:
|
| 372 |
-
st.
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
else:
|
| 382 |
-
# No API key - just return extracted answer
|
| 383 |
-
return {
|
| 384 |
-
'ai_answer': None,
|
| 385 |
-
'extracted_answer': extracted_answer,
|
| 386 |
-
'sources': sources,
|
| 387 |
-
'confidence': avg_confidence,
|
| 388 |
-
'has_both': False
|
| 389 |
-
}
|
| 390 |
|
| 391 |
-
def get_general_ai_response(query):
|
| 392 |
-
"""Get AI response for general questions with
|
| 393 |
openrouter_key = os.environ.get("OPENROUTER_API_KEY")
|
| 394 |
|
| 395 |
if not openrouter_key:
|
| 396 |
return "I can only answer questions about your uploaded documents. Please add an OpenRouter API key for general conversations."
|
| 397 |
|
| 398 |
try:
|
| 399 |
-
#
|
| 400 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
response = requests.post(
|
| 403 |
"https://openrouter.ai/api/v1/chat/completions",
|
|
@@ -409,9 +426,9 @@ def get_general_ai_response(query):
|
|
| 409 |
},
|
| 410 |
json={
|
| 411 |
"model": "openai/gpt-3.5-turbo",
|
| 412 |
-
"messages": [{"role": "user", "content":
|
| 413 |
-
"max_tokens":
|
| 414 |
-
"temperature":
|
| 415 |
},
|
| 416 |
timeout=15
|
| 417 |
)
|
|
@@ -420,9 +437,13 @@ def get_general_ai_response(query):
|
|
| 420 |
return response.json()['choices'][0]['message']['content'].strip()
|
| 421 |
elif response.status_code == 402:
|
| 422 |
return "Sorry, OpenRouter credits exhausted. Please add more credits or ask document-specific questions."
|
|
|
|
|
|
|
| 423 |
else:
|
| 424 |
return f"API error (Status: {response.status_code}). Try asking about documents instead."
|
| 425 |
|
|
|
|
|
|
|
| 426 |
except Exception as e:
|
| 427 |
return f"Error: {str(e)}"
|
| 428 |
|
|
@@ -606,6 +627,10 @@ with st.sidebar:
|
|
| 606 |
)
|
| 607 |
if test_response.status_code == 200:
|
| 608 |
st.success("β
API working correctly!")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 609 |
else:
|
| 610 |
st.error(f"β API Error: {test_response.status_code}")
|
| 611 |
except Exception as e:
|
|
@@ -614,14 +639,20 @@ with st.sidebar:
|
|
| 614 |
st.error("β No OpenRouter API Key")
|
| 615 |
st.info("Add OPENROUTER_API_KEY in Hugging Face Space settings β Variables and secrets")
|
| 616 |
|
| 617 |
-
#
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
|
|
|
|
|
|
|
|
|
| 622 |
|
| 623 |
-
|
| 624 |
-
|
|
|
|
|
|
|
|
|
|
| 625 |
|
| 626 |
st.divider()
|
| 627 |
|
|
@@ -656,10 +687,11 @@ for message in st.session_state.messages:
|
|
| 656 |
rag_info = message["rag_info"]
|
| 657 |
|
| 658 |
if show_sources and rag_info.get("sources"):
|
|
|
|
| 659 |
st.markdown(f"""
|
| 660 |
<div class="rag-attribution">
|
| 661 |
<strong>π Sources:</strong> {', '.join(rag_info['sources'])}<br>
|
| 662 |
-
<strong>π― Confidence:</strong> {
|
| 663 |
</div>
|
| 664 |
""", unsafe_allow_html=True)
|
| 665 |
|
|
@@ -692,22 +724,28 @@ if prompt := st.chat_input("Ask questions about your documents..."):
|
|
| 692 |
# Check if we found relevant documents (much lower threshold)
|
| 693 |
if search_results and search_results[0]['similarity'] > 0.01: # Very low threshold
|
| 694 |
# Generate document-based answer
|
| 695 |
-
result = rag_system.generate_answer(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
|
| 697 |
# Display AI answer or extracted answer
|
| 698 |
if use_ai_enhancement and result['has_both']:
|
| 699 |
answer_text = result['ai_answer']
|
| 700 |
-
st.markdown(f"π€ **AI Answer:** {answer_text}")
|
| 701 |
else:
|
| 702 |
answer_text = result['extracted_answer']
|
| 703 |
st.markdown(f"π **Document Answer:** {answer_text}")
|
| 704 |
|
| 705 |
# Show RAG info
|
| 706 |
if show_sources and result['sources']:
|
|
|
|
| 707 |
st.markdown(f"""
|
| 708 |
<div class="rag-attribution">
|
| 709 |
<strong>π Sources:</strong> {', '.join(result['sources'])}<br>
|
| 710 |
-
<strong>π― Confidence:</strong> {
|
| 711 |
<strong>π Found:</strong> {len(search_results)} relevant sections
|
| 712 |
</div>
|
| 713 |
""", unsafe_allow_html=True)
|
|
@@ -725,26 +763,16 @@ if prompt := st.chat_input("Ask questions about your documents..."):
|
|
| 725 |
}
|
| 726 |
|
| 727 |
else:
|
| 728 |
-
# No relevant documents found -
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
}
|
| 737 |
-
|
| 738 |
-
# Use general AI only if not in low-token mode
|
| 739 |
-
st.info("No relevant documents found. Switching to general AI mode...")
|
| 740 |
-
general_response = get_general_ai_response(prompt)
|
| 741 |
-
st.markdown(f"π¬ **General AI:** {general_response}")
|
| 742 |
-
|
| 743 |
-
assistant_message = {
|
| 744 |
-
"role": "assistant",
|
| 745 |
-
"content": general_response,
|
| 746 |
-
"rag_info": {"sources": [], "confidence": 0, "mode": "general"}
|
| 747 |
-
}
|
| 748 |
|
| 749 |
else:
|
| 750 |
# RAG system not ready - use general AI
|
|
@@ -753,7 +781,7 @@ if prompt := st.chat_input("Ask questions about your documents..."):
|
|
| 753 |
else:
|
| 754 |
st.error("RAG system not ready. Using general AI mode...")
|
| 755 |
|
| 756 |
-
general_response = get_general_ai_response(prompt)
|
| 757 |
st.markdown(f"π¬ **General AI:** {general_response}")
|
| 758 |
|
| 759 |
assistant_message = {
|
|
@@ -771,4 +799,5 @@ if prompt := st.chat_input("Ask questions about your documents..."):
|
|
| 771 |
# Footer info
|
| 772 |
if rag_system and rag_system.model:
|
| 773 |
doc_count = rag_system.get_collection_count()
|
| 774 |
-
|
|
|
|
|
|
| 312 |
|
| 313 |
return content[:200] + "..."
|
| 314 |
|
| 315 |
+
def generate_answer(self, query, search_results, use_ai_enhancement=True, unlimited_tokens=False):
|
| 316 |
+
"""Generate both AI and extracted answers with proper token handling"""
|
| 317 |
if not search_results:
|
| 318 |
return {
|
| 319 |
'ai_answer': "No information found in documents.",
|
|
|
|
| 330 |
# Always generate extracted answer
|
| 331 |
extracted_answer = self.extract_direct_answer(query, best_result['content'])
|
| 332 |
|
| 333 |
+
# Try AI answer if requested and API key available
|
| 334 |
ai_answer = None
|
| 335 |
openrouter_key = os.environ.get("OPENROUTER_API_KEY")
|
| 336 |
|
| 337 |
+
if use_ai_enhancement and openrouter_key:
|
| 338 |
+
# Build context from search results
|
| 339 |
+
context = "\n\n".join([f"Source: {r['metadata']['source_file']}\nContent: {r['content']}"
|
| 340 |
+
for r in search_results[:3]])
|
| 341 |
|
| 342 |
+
# Create comprehensive prompt for unlimited tokens
|
| 343 |
+
if unlimited_tokens:
|
| 344 |
+
prompt = f"""Based on the following document context, provide a comprehensive and detailed answer to the user's question.
|
| 345 |
+
|
| 346 |
+
Context from documents:
|
| 347 |
+
{context}
|
| 348 |
+
|
| 349 |
+
User Question: {query}
|
| 350 |
+
|
| 351 |
+
Please provide a thorough, well-structured answer that directly addresses the question using the information from the documents. If the documents contain specific details, include them in your response."""
|
| 352 |
+
max_tokens = 500 # Higher token limit for detailed responses
|
| 353 |
+
temperature = 0.3
|
| 354 |
+
else:
|
| 355 |
+
# Fallback to shorter prompt
|
| 356 |
+
prompt = f"Context: {extracted_answer}\n\nQuestion: {query}\n\nImprove the answer:"
|
| 357 |
+
max_tokens = 150
|
| 358 |
+
temperature = 0.1
|
| 359 |
|
| 360 |
try:
|
| 361 |
response = requests.post(
|
|
|
|
| 369 |
json={
|
| 370 |
"model": "openai/gpt-3.5-turbo",
|
| 371 |
"messages": [{"role": "user", "content": prompt}],
|
| 372 |
+
"max_tokens": max_tokens,
|
| 373 |
+
"temperature": temperature
|
| 374 |
},
|
| 375 |
+
timeout=15
|
| 376 |
)
|
| 377 |
|
| 378 |
if response.status_code == 200:
|
| 379 |
ai_response = response.json()['choices'][0]['message']['content'].strip()
|
|
|
|
| 380 |
ai_answer = ai_response if len(ai_response) > 10 else extracted_answer
|
| 381 |
elif response.status_code == 402:
|
| 382 |
+
st.warning("π³ OpenRouter credits exhausted. Using extracted answers only.")
|
| 383 |
+
elif response.status_code == 429:
|
| 384 |
+
st.warning("β±οΈ Rate limit reached. Using extracted answers only.")
|
| 385 |
else:
|
| 386 |
+
st.warning(f"API Error {response.status_code}. Using extracted answers only.")
|
| 387 |
|
| 388 |
+
except requests.exceptions.Timeout:
|
| 389 |
+
st.warning("β±οΈ API timeout. Using extracted answers only.")
|
| 390 |
except Exception as e:
|
| 391 |
+
st.warning(f"API Exception: {str(e)}. Using extracted answers only.")
|
| 392 |
+
|
| 393 |
+
return {
|
| 394 |
+
'ai_answer': ai_answer,
|
| 395 |
+
'extracted_answer': extracted_answer,
|
| 396 |
+
'sources': sources,
|
| 397 |
+
'confidence': avg_confidence,
|
| 398 |
+
'has_both': ai_answer is not None
|
| 399 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
|
| 401 |
+
def get_general_ai_response(query, unlimited_tokens=False):
|
| 402 |
+
"""Get AI response for general questions with proper token handling"""
|
| 403 |
openrouter_key = os.environ.get("OPENROUTER_API_KEY")
|
| 404 |
|
| 405 |
if not openrouter_key:
|
| 406 |
return "I can only answer questions about your uploaded documents. Please add an OpenRouter API key for general conversations."
|
| 407 |
|
| 408 |
try:
|
| 409 |
+
# Adjust parameters based on token availability
|
| 410 |
+
if unlimited_tokens:
|
| 411 |
+
max_tokens = 500
|
| 412 |
+
temperature = 0.7
|
| 413 |
+
prompt = f"Please provide a helpful and detailed answer to this question: {query}"
|
| 414 |
+
else:
|
| 415 |
+
max_tokens = 150
|
| 416 |
+
temperature = 0.7
|
| 417 |
+
prompt = query[:200] # Limit input length for token conservation
|
| 418 |
|
| 419 |
response = requests.post(
|
| 420 |
"https://openrouter.ai/api/v1/chat/completions",
|
|
|
|
| 426 |
},
|
| 427 |
json={
|
| 428 |
"model": "openai/gpt-3.5-turbo",
|
| 429 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 430 |
+
"max_tokens": max_tokens,
|
| 431 |
+
"temperature": temperature
|
| 432 |
},
|
| 433 |
timeout=15
|
| 434 |
)
|
|
|
|
| 437 |
return response.json()['choices'][0]['message']['content'].strip()
|
| 438 |
elif response.status_code == 402:
|
| 439 |
return "Sorry, OpenRouter credits exhausted. Please add more credits or ask document-specific questions."
|
| 440 |
+
elif response.status_code == 429:
|
| 441 |
+
return "Rate limit reached. Please try again in a moment."
|
| 442 |
else:
|
| 443 |
return f"API error (Status: {response.status_code}). Try asking about documents instead."
|
| 444 |
|
| 445 |
+
except requests.exceptions.Timeout:
|
| 446 |
+
return "Request timeout. Please try again."
|
| 447 |
except Exception as e:
|
| 448 |
return f"Error: {str(e)}"
|
| 449 |
|
|
|
|
| 627 |
)
|
| 628 |
if test_response.status_code == 200:
|
| 629 |
st.success("β
API working correctly!")
|
| 630 |
+
elif test_response.status_code == 402:
|
| 631 |
+
st.error("β Credits exhausted")
|
| 632 |
+
elif test_response.status_code == 429:
|
| 633 |
+
st.warning("β±οΈ Rate limited")
|
| 634 |
else:
|
| 635 |
st.error(f"β API Error: {test_response.status_code}")
|
| 636 |
except Exception as e:
|
|
|
|
| 639 |
st.error("β No OpenRouter API Key")
|
| 640 |
st.info("Add OPENROUTER_API_KEY in Hugging Face Space settings β Variables and secrets")
|
| 641 |
|
| 642 |
+
# Enhanced Settings
|
| 643 |
+
st.subheader("π Token Settings")
|
| 644 |
+
unlimited_tokens = st.checkbox("π₯ Unlimited Tokens Mode", value=True, help="Use higher token limits for detailed responses")
|
| 645 |
+
use_ai_enhancement = st.checkbox("π€ AI Enhancement", value=bool(openrouter_key), help="Enhance answers with AI when documents are found")
|
| 646 |
+
|
| 647 |
+
st.subheader("ποΈ Display Settings")
|
| 648 |
+
show_sources = st.checkbox("π Show Sources", value=True)
|
| 649 |
+
show_confidence = st.checkbox("π― Show Confidence Scores", value=True)
|
| 650 |
|
| 651 |
+
# Token mode indicator
|
| 652 |
+
if unlimited_tokens:
|
| 653 |
+
st.success("π₯ Unlimited mode: Detailed responses enabled")
|
| 654 |
+
else:
|
| 655 |
+
st.info("π° Conservative mode: Limited tokens to save credits")
|
| 656 |
|
| 657 |
st.divider()
|
| 658 |
|
|
|
|
| 687 |
rag_info = message["rag_info"]
|
| 688 |
|
| 689 |
if show_sources and rag_info.get("sources"):
|
| 690 |
+
confidence_text = f"{rag_info['confidence']*100:.1f}%" if show_confidence else ""
|
| 691 |
st.markdown(f"""
|
| 692 |
<div class="rag-attribution">
|
| 693 |
<strong>π Sources:</strong> {', '.join(rag_info['sources'])}<br>
|
| 694 |
+
<strong>π― Confidence:</strong> {confidence_text}
|
| 695 |
</div>
|
| 696 |
""", unsafe_allow_html=True)
|
| 697 |
|
|
|
|
| 724 |
# Check if we found relevant documents (much lower threshold)
|
| 725 |
if search_results and search_results[0]['similarity'] > 0.01: # Very low threshold
|
| 726 |
# Generate document-based answer
|
| 727 |
+
result = rag_system.generate_answer(
|
| 728 |
+
prompt,
|
| 729 |
+
search_results,
|
| 730 |
+
use_ai_enhancement=use_ai_enhancement,
|
| 731 |
+
unlimited_tokens=unlimited_tokens
|
| 732 |
+
)
|
| 733 |
|
| 734 |
# Display AI answer or extracted answer
|
| 735 |
if use_ai_enhancement and result['has_both']:
|
| 736 |
answer_text = result['ai_answer']
|
| 737 |
+
st.markdown(f"π€ **AI Enhanced Answer:** {answer_text}")
|
| 738 |
else:
|
| 739 |
answer_text = result['extracted_answer']
|
| 740 |
st.markdown(f"π **Document Answer:** {answer_text}")
|
| 741 |
|
| 742 |
# Show RAG info
|
| 743 |
if show_sources and result['sources']:
|
| 744 |
+
confidence_text = f"{result['confidence']*100:.1f}%" if show_confidence else ""
|
| 745 |
st.markdown(f"""
|
| 746 |
<div class="rag-attribution">
|
| 747 |
<strong>π Sources:</strong> {', '.join(result['sources'])}<br>
|
| 748 |
+
<strong>π― Confidence:</strong> {confidence_text}<br>
|
| 749 |
<strong>π Found:</strong> {len(search_results)} relevant sections
|
| 750 |
</div>
|
| 751 |
""", unsafe_allow_html=True)
|
|
|
|
| 763 |
}
|
| 764 |
|
| 765 |
else:
|
| 766 |
+
# No relevant documents found - use general AI
|
| 767 |
+
st.info("No relevant documents found. Using general AI mode...")
|
| 768 |
+
general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
|
| 769 |
+
st.markdown(f"π¬ **General AI:** {general_response}")
|
| 770 |
+
|
| 771 |
+
assistant_message = {
|
| 772 |
+
"role": "assistant",
|
| 773 |
+
"content": general_response,
|
| 774 |
+
"rag_info": {"sources": [], "confidence": 0, "mode": "general"}
|
| 775 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 776 |
|
| 777 |
else:
|
| 778 |
# RAG system not ready - use general AI
|
|
|
|
| 781 |
else:
|
| 782 |
st.error("RAG system not ready. Using general AI mode...")
|
| 783 |
|
| 784 |
+
general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
|
| 785 |
st.markdown(f"π¬ **General AI:** {general_response}")
|
| 786 |
|
| 787 |
assistant_message = {
|
|
|
|
| 799 |
# Footer info
|
| 800 |
if rag_system and rag_system.model:
|
| 801 |
doc_count = rag_system.get_collection_count()
|
| 802 |
+
token_mode = "π₯ Unlimited" if unlimited_tokens else "π° Conservative"
|
| 803 |
+
st.caption(f"π Knowledge Base: {doc_count} indexed chunks | π RAG System Active | {token_mode} Token Mode")
|