Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -354,39 +354,82 @@ class ProductionRAGSystem:
|
|
| 354 |
status_text.empty()
|
| 355 |
return False
|
| 356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
def search(self, query, n_results=5):
|
| 358 |
-
"""Search for relevant chunks"""
|
| 359 |
if not self.model or not self.collection:
|
| 360 |
return None
|
| 361 |
|
| 362 |
try:
|
| 363 |
-
|
|
|
|
|
|
|
| 364 |
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
-
if not
|
| 371 |
return None
|
| 372 |
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
|
| 386 |
# Debug: Show search results for troubleshooting
|
| 387 |
-
print(f"Search for '{query}' found {len(search_results)} results")
|
| 388 |
for i, result in enumerate(search_results[:3]):
|
| 389 |
-
print(f" {i+1}. Similarity: {result['similarity']:.3f} | Source: {result['metadata']['source_file']}")
|
| 390 |
print(f" Content preview: {result['content'][:100]}...")
|
| 391 |
|
| 392 |
return search_results
|
|
@@ -462,25 +505,38 @@ class ProductionRAGSystem:
|
|
| 462 |
context = "\n\n".join([f"Source: {r['metadata']['source_file']}\nContent: {r['content']}"
|
| 463 |
for r in search_results[:3]])
|
| 464 |
|
| 465 |
-
# Create focused prompt for
|
| 466 |
if unlimited_tokens:
|
| 467 |
-
prompt = f"""
|
| 468 |
-
|
|
|
|
| 469 |
{context}
|
| 470 |
-
|
|
|
|
|
|
|
| 471 |
Instructions:
|
| 472 |
-
-
|
| 473 |
-
-
|
| 474 |
-
-
|
| 475 |
-
-
|
| 476 |
-
-
|
| 477 |
-
|
| 478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
else:
|
| 480 |
-
# Shorter prompt for conservative mode
|
| 481 |
-
prompt = f"
|
| 482 |
-
|
| 483 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
|
| 485 |
try:
|
| 486 |
response = requests.post(
|
|
@@ -536,27 +592,31 @@ Instructions:
|
|
| 536 |
}
|
| 537 |
|
| 538 |
def get_general_ai_response(query, unlimited_tokens=False):
|
| 539 |
-
"""Get AI response for general questions with
|
| 540 |
openrouter_key = os.environ.get("OPENROUTER_API_KEY")
|
| 541 |
|
| 542 |
if not openrouter_key:
|
| 543 |
-
return "I can only answer questions about your uploaded documents. Please add an OpenRouter API key for general conversations."
|
| 544 |
|
| 545 |
try:
|
| 546 |
# Adjust parameters based on token availability
|
| 547 |
if unlimited_tokens:
|
| 548 |
-
max_tokens =
|
| 549 |
-
temperature = 0.7
|
| 550 |
-
prompt = f"""Act as a knowledgeable assistant. Provide a helpful, clear, and concise answer to this question: {query}
|
| 551 |
-
Keep your response:
|
| 552 |
-
- Direct and actionable
|
| 553 |
-
- Professional but conversational
|
| 554 |
-
- Focused on the main points
|
| 555 |
-
- Under 300 tokens"""
|
| 556 |
-
else:
|
| 557 |
-
max_tokens = 50 # Minimum for conservative mode
|
| 558 |
temperature = 0.5
|
| 559 |
-
prompt = f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
|
| 561 |
response = requests.post(
|
| 562 |
"https://openrouter.ai/api/v1/chat/completions",
|
|
@@ -750,13 +810,18 @@ with st.sidebar:
|
|
| 750 |
else:
|
| 751 |
st.error("Failed to index documents. Check your documents folder.")
|
| 752 |
|
| 753 |
-
# Show current documents
|
| 754 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 755 |
txt_files = [f for f in os.listdir("documents") if f.endswith('.txt')]
|
| 756 |
if txt_files:
|
| 757 |
-
st.
|
| 758 |
-
for file in txt_files:
|
| 759 |
-
st.text(f"β’ {file}")
|
| 760 |
|
| 761 |
# Manual upload interface (fallback)
|
| 762 |
st.subheader("π€ Manual Upload")
|
|
|
|
| 354 |
status_text.empty()
|
| 355 |
return False
|
| 356 |
|
| 357 |
+
def expand_query_with_family_terms(self, query):
|
| 358 |
+
"""Expand query to include family relationship synonyms"""
|
| 359 |
+
family_mappings = {
|
| 360 |
+
'mother': ['mama', 'mom', 'ammi'],
|
| 361 |
+
'mama': ['mother', 'mom', 'ammi'],
|
| 362 |
+
'father': ['papa', 'dad', 'abbu'],
|
| 363 |
+
'papa': ['father', 'dad', 'abbu'],
|
| 364 |
+
'brother': ['bhai', 'bro'],
|
| 365 |
+
'bhai': ['brother', 'bro'],
|
| 366 |
+
'sister': ['behn', 'sis'],
|
| 367 |
+
'behn': ['sister', 'sis']
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
expanded_terms = [query]
|
| 371 |
+
query_lower = query.lower()
|
| 372 |
+
|
| 373 |
+
for key, synonyms in family_mappings.items():
|
| 374 |
+
if key in query_lower:
|
| 375 |
+
for synonym in synonyms:
|
| 376 |
+
expanded_terms.append(query_lower.replace(key, synonym))
|
| 377 |
+
|
| 378 |
+
return expanded_terms
|
| 379 |
+
|
| 380 |
def search(self, query, n_results=5):
|
| 381 |
+
"""Search for relevant chunks with family relationship mapping"""
|
| 382 |
if not self.model or not self.collection:
|
| 383 |
return None
|
| 384 |
|
| 385 |
try:
|
| 386 |
+
# Expand query with family terms
|
| 387 |
+
expanded_queries = self.expand_query_with_family_terms(query)
|
| 388 |
+
all_results = []
|
| 389 |
|
| 390 |
+
# Search with all expanded terms
|
| 391 |
+
for search_query in expanded_queries:
|
| 392 |
+
query_embedding = self.model.encode([search_query])[0].tolist()
|
| 393 |
+
|
| 394 |
+
results = self.collection.query(
|
| 395 |
+
query_embeddings=[query_embedding],
|
| 396 |
+
n_results=n_results
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
if results['documents'][0]:
|
| 400 |
+
for chunk, distance, metadata in zip(
|
| 401 |
+
results['documents'][0],
|
| 402 |
+
results['distances'][0],
|
| 403 |
+
results['metadatas'][0]
|
| 404 |
+
):
|
| 405 |
+
similarity = max(0, 1 - distance)
|
| 406 |
+
all_results.append({
|
| 407 |
+
'content': chunk,
|
| 408 |
+
'metadata': metadata,
|
| 409 |
+
'similarity': similarity,
|
| 410 |
+
'query_used': search_query
|
| 411 |
+
})
|
| 412 |
|
| 413 |
+
if not all_results:
|
| 414 |
return None
|
| 415 |
|
| 416 |
+
# Remove duplicates and sort by similarity
|
| 417 |
+
seen_chunks = set()
|
| 418 |
+
unique_results = []
|
| 419 |
+
for result in all_results:
|
| 420 |
+
chunk_id = f"{result['metadata']['source_file']}_{result['content'][:50]}"
|
| 421 |
+
if chunk_id not in seen_chunks:
|
| 422 |
+
seen_chunks.add(chunk_id)
|
| 423 |
+
unique_results.append(result)
|
| 424 |
+
|
| 425 |
+
# Sort by similarity and take top results
|
| 426 |
+
unique_results.sort(key=lambda x: x['similarity'], reverse=True)
|
| 427 |
+
search_results = unique_results[:n_results]
|
| 428 |
|
| 429 |
# Debug: Show search results for troubleshooting
|
| 430 |
+
print(f"Search for '{query}' (expanded to {len(expanded_queries)} terms) found {len(search_results)} results")
|
| 431 |
for i, result in enumerate(search_results[:3]):
|
| 432 |
+
print(f" {i+1}. Similarity: {result['similarity']:.3f} | Source: {result['metadata']['source_file']} | Query: {result['query_used']}")
|
| 433 |
print(f" Content preview: {result['content'][:100]}...")
|
| 434 |
|
| 435 |
return search_results
|
|
|
|
| 505 |
context = "\n\n".join([f"Source: {r['metadata']['source_file']}\nContent: {r['content']}"
|
| 506 |
for r in search_results[:3]])
|
| 507 |
|
| 508 |
+
# Create focused prompt for rich, engaging family responses
|
| 509 |
if unlimited_tokens:
|
| 510 |
+
prompt = f"""You are a warm, caring family assistant who knows everyone well. Based on the family information below, provide a rich, detailed, and engaging response.
|
| 511 |
+
|
| 512 |
+
Family Document Context:
|
| 513 |
{context}
|
| 514 |
+
|
| 515 |
+
Question: {query}
|
| 516 |
+
|
| 517 |
Instructions:
|
| 518 |
+
- Use the document information as your foundation
|
| 519 |
+
- Expand with logical personality traits and qualities someone like this would have
|
| 520 |
+
- Add 3-4 additional lines of thoughtful insights about their character
|
| 521 |
+
- Use relevant emojis to make the response warm and engaging
|
| 522 |
+
- Write in a caring, family-friend tone
|
| 523 |
+
- If someone asks about relationships (like "mother" = "mama"), make those connections
|
| 524 |
+
- Make the response feel personal and detailed, not just a basic fact
|
| 525 |
+
- Include both strengths and endearing qualities
|
| 526 |
+
- Keep it warm but informative (4-6 sentences total)
|
| 527 |
+
|
| 528 |
+
Remember: You're helping someone learn about their family members in a meaningful way! π"""
|
| 529 |
+
max_tokens = 400 # Increased for richer responses
|
| 530 |
+
temperature = 0.3 # Slightly more creative
|
| 531 |
else:
|
| 532 |
+
# Shorter but still enhanced prompt for conservative mode
|
| 533 |
+
prompt = f"""Based on this family info: {extracted_answer}
|
| 534 |
+
|
| 535 |
+
Question: {query}
|
| 536 |
+
|
| 537 |
+
Give a warm, detailed answer with emojis. Add 2-3 more qualities this person likely has. Make it caring and personal! π"""
|
| 538 |
+
max_tokens = 150 # Better than 50 for family context
|
| 539 |
+
temperature = 0.2
|
| 540 |
|
| 541 |
try:
|
| 542 |
response = requests.post(
|
|
|
|
| 592 |
}
|
| 593 |
|
| 594 |
def get_general_ai_response(query, unlimited_tokens=False):
|
| 595 |
+
"""Get AI response for general questions with family-friendly enhancement"""
|
| 596 |
openrouter_key = os.environ.get("OPENROUTER_API_KEY")
|
| 597 |
|
| 598 |
if not openrouter_key:
|
| 599 |
+
return "I can only answer questions about your family members from the uploaded documents. Please add an OpenRouter API key for general conversations. π"
|
| 600 |
|
| 601 |
try:
|
| 602 |
# Adjust parameters based on token availability
|
| 603 |
if unlimited_tokens:
|
| 604 |
+
max_tokens = 350 # Good limit for detailed family responses
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
temperature = 0.5
|
| 606 |
+
prompt = f"""You are a caring family assistant. Someone is asking about their family but I couldn't find specific information in their family documents.
|
| 607 |
+
|
| 608 |
+
Question: {query}
|
| 609 |
+
|
| 610 |
+
Please provide a warm, helpful response that:
|
| 611 |
+
- Acknowledges I don't have specific information about their family member
|
| 612 |
+
- Suggests they might want to add more details to their family profiles
|
| 613 |
+
- Offers to help in other ways
|
| 614 |
+
- Uses a caring, family-friendly tone with appropriate emojis
|
| 615 |
+
- Keep it supportive and understanding π"""
|
| 616 |
+
else:
|
| 617 |
+
max_tokens = 100 # Reasonable for conservative mode
|
| 618 |
+
temperature = 0.4
|
| 619 |
+
prompt = f"Family question: {query[:100]} - I don't have info about this family member. Give a caring, helpful response with emojis π"
|
| 620 |
|
| 621 |
response = requests.post(
|
| 622 |
"https://openrouter.ai/api/v1/chat/completions",
|
|
|
|
| 810 |
else:
|
| 811 |
st.error("Failed to index documents. Check your documents folder.")
|
| 812 |
|
| 813 |
+
# Show current documents (optional - can be hidden for privacy)
|
| 814 |
+
if st.checkbox("π Show Document List", value=False, help="Show/hide document filenames"):
|
| 815 |
+
if os.path.exists("documents"):
|
| 816 |
+
txt_files = [f for f in os.listdir("documents") if f.endswith('.txt')]
|
| 817 |
+
if txt_files:
|
| 818 |
+
st.subheader("π Current Documents")
|
| 819 |
+
for file in txt_files:
|
| 820 |
+
st.text(f"β’ {file}")
|
| 821 |
+
elif os.path.exists("documents"):
|
| 822 |
txt_files = [f for f in os.listdir("documents") if f.endswith('.txt')]
|
| 823 |
if txt_files:
|
| 824 |
+
st.info(f"π {len(txt_files)} family profile documents loaded (hidden for privacy)")
|
|
|
|
|
|
|
| 825 |
|
| 826 |
# Manual upload interface (fallback)
|
| 827 |
st.subheader("π€ Manual Upload")
|