uumerrr684 commited on
Commit
622f9de
Β·
verified Β·
1 Parent(s): 98a0731

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -54
app.py CHANGED
@@ -354,39 +354,82 @@ class ProductionRAGSystem:
354
  status_text.empty()
355
  return False
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  def search(self, query, n_results=5):
358
- """Search for relevant chunks"""
359
  if not self.model or not self.collection:
360
  return None
361
 
362
  try:
363
- query_embedding = self.model.encode([query])[0].tolist()
 
 
364
 
365
- results = self.collection.query(
366
- query_embeddings=[query_embedding],
367
- n_results=n_results
368
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
- if not results['documents'][0]:
371
  return None
372
 
373
- search_results = []
374
- for chunk, distance, metadata in zip(
375
- results['documents'][0],
376
- results['distances'][0],
377
- results['metadatas'][0]
378
- ):
379
- similarity = max(0, 1 - distance)
380
- search_results.append({
381
- 'content': chunk,
382
- 'metadata': metadata,
383
- 'similarity': similarity
384
- })
385
 
386
  # Debug: Show search results for troubleshooting
387
- print(f"Search for '{query}' found {len(search_results)} results")
388
  for i, result in enumerate(search_results[:3]):
389
- print(f" {i+1}. Similarity: {result['similarity']:.3f} | Source: {result['metadata']['source_file']}")
390
  print(f" Content preview: {result['content'][:100]}...")
391
 
392
  return search_results
@@ -462,25 +505,38 @@ class ProductionRAGSystem:
462
  context = "\n\n".join([f"Source: {r['metadata']['source_file']}\nContent: {r['content']}"
463
  for r in search_results[:3]])
464
 
465
- # Create focused prompt for concise, professional answers
466
  if unlimited_tokens:
467
- prompt = f"""Act as a helpful HR assistant for this company. Based on the document context below, provide a clear, concise, and professional answer to the employee's question.
468
- Document Context:
 
469
  {context}
470
- Employee Question: {query}
 
 
471
  Instructions:
472
- - Give a direct, actionable answer
473
- - Use specific details from the documents
474
- - Keep it professional but friendly
475
- - Be concise and to the point
476
- - If policies have conditions or exceptions, mention them clearly"""
477
- max_tokens = 300 # Balanced token limit
478
- temperature = 0.2
 
 
 
 
 
 
479
  else:
480
- # Shorter prompt for conservative mode
481
- prompt = f"Act as an HR assistant. Based on this info: {extracted_answer}\n\nEmployee asks: {query}\n\nGive a clear, helpful answer:"
482
- max_tokens = 50 # Minimum token limit
483
- temperature = 0.1
 
 
 
 
484
 
485
  try:
486
  response = requests.post(
@@ -536,27 +592,31 @@ Instructions:
536
  }
537
 
538
  def get_general_ai_response(query, unlimited_tokens=False):
539
- """Get AI response for general questions with proper token handling"""
540
  openrouter_key = os.environ.get("OPENROUTER_API_KEY")
541
 
542
  if not openrouter_key:
543
- return "I can only answer questions about your uploaded documents. Please add an OpenRouter API key for general conversations."
544
 
545
  try:
546
  # Adjust parameters based on token availability
547
  if unlimited_tokens:
548
- max_tokens = 300 # Balanced limit for good answers
549
- temperature = 0.7
550
- prompt = f"""Act as a knowledgeable assistant. Provide a helpful, clear, and concise answer to this question: {query}
551
- Keep your response:
552
- - Direct and actionable
553
- - Professional but conversational
554
- - Focused on the main points
555
- - Under 300 tokens"""
556
- else:
557
- max_tokens = 50 # Minimum for conservative mode
558
  temperature = 0.5
559
- prompt = f"Briefly answer: {query[:100]}" # Limit input length for token conservation
 
 
 
 
 
 
 
 
 
 
 
 
 
560
 
561
  response = requests.post(
562
  "https://openrouter.ai/api/v1/chat/completions",
@@ -750,13 +810,18 @@ with st.sidebar:
750
  else:
751
  st.error("Failed to index documents. Check your documents folder.")
752
 
753
- # Show current documents
754
- if os.path.exists("documents"):
 
 
 
 
 
 
 
755
  txt_files = [f for f in os.listdir("documents") if f.endswith('.txt')]
756
  if txt_files:
757
- st.subheader("πŸ“„ Current Documents")
758
- for file in txt_files:
759
- st.text(f"β€’ {file}")
760
 
761
  # Manual upload interface (fallback)
762
  st.subheader("πŸ“€ Manual Upload")
 
354
  status_text.empty()
355
  return False
356
 
357
+ def expand_query_with_family_terms(self, query):
358
+ """Expand query to include family relationship synonyms"""
359
+ family_mappings = {
360
+ 'mother': ['mama', 'mom', 'ammi'],
361
+ 'mama': ['mother', 'mom', 'ammi'],
362
+ 'father': ['papa', 'dad', 'abbu'],
363
+ 'papa': ['father', 'dad', 'abbu'],
364
+ 'brother': ['bhai', 'bro'],
365
+ 'bhai': ['brother', 'bro'],
366
+ 'sister': ['behn', 'sis'],
367
+ 'behn': ['sister', 'sis']
368
+ }
369
+
370
+ expanded_terms = [query]
371
+ query_lower = query.lower()
372
+
373
+ for key, synonyms in family_mappings.items():
374
+ if key in query_lower:
375
+ for synonym in synonyms:
376
+ expanded_terms.append(query_lower.replace(key, synonym))
377
+
378
+ return expanded_terms
379
+
380
  def search(self, query, n_results=5):
381
+ """Search for relevant chunks with family relationship mapping"""
382
  if not self.model or not self.collection:
383
  return None
384
 
385
  try:
386
+ # Expand query with family terms
387
+ expanded_queries = self.expand_query_with_family_terms(query)
388
+ all_results = []
389
 
390
+ # Search with all expanded terms
391
+ for search_query in expanded_queries:
392
+ query_embedding = self.model.encode([search_query])[0].tolist()
393
+
394
+ results = self.collection.query(
395
+ query_embeddings=[query_embedding],
396
+ n_results=n_results
397
+ )
398
+
399
+ if results['documents'][0]:
400
+ for chunk, distance, metadata in zip(
401
+ results['documents'][0],
402
+ results['distances'][0],
403
+ results['metadatas'][0]
404
+ ):
405
+ similarity = max(0, 1 - distance)
406
+ all_results.append({
407
+ 'content': chunk,
408
+ 'metadata': metadata,
409
+ 'similarity': similarity,
410
+ 'query_used': search_query
411
+ })
412
 
413
+ if not all_results:
414
  return None
415
 
416
+ # Remove duplicates and sort by similarity
417
+ seen_chunks = set()
418
+ unique_results = []
419
+ for result in all_results:
420
+ chunk_id = f"{result['metadata']['source_file']}_{result['content'][:50]}"
421
+ if chunk_id not in seen_chunks:
422
+ seen_chunks.add(chunk_id)
423
+ unique_results.append(result)
424
+
425
+ # Sort by similarity and take top results
426
+ unique_results.sort(key=lambda x: x['similarity'], reverse=True)
427
+ search_results = unique_results[:n_results]
428
 
429
  # Debug: Show search results for troubleshooting
430
+ print(f"Search for '{query}' (expanded to {len(expanded_queries)} terms) found {len(search_results)} results")
431
  for i, result in enumerate(search_results[:3]):
432
+ print(f" {i+1}. Similarity: {result['similarity']:.3f} | Source: {result['metadata']['source_file']} | Query: {result['query_used']}")
433
  print(f" Content preview: {result['content'][:100]}...")
434
 
435
  return search_results
 
505
  context = "\n\n".join([f"Source: {r['metadata']['source_file']}\nContent: {r['content']}"
506
  for r in search_results[:3]])
507
 
508
+ # Create focused prompt for rich, engaging family responses
509
  if unlimited_tokens:
510
+ prompt = f"""You are a warm, caring family assistant who knows everyone well. Based on the family information below, provide a rich, detailed, and engaging response.
511
+
512
+ Family Document Context:
513
  {context}
514
+
515
+ Question: {query}
516
+
517
  Instructions:
518
+ - Use the document information as your foundation
519
+ - Expand with logical personality traits and qualities someone like this would have
520
+ - Add 3-4 additional lines of thoughtful insights about their character
521
+ - Use relevant emojis to make the response warm and engaging
522
+ - Write in a caring, family-friend tone
523
+ - If someone asks about relationships (like "mother" = "mama"), make those connections
524
+ - Make the response feel personal and detailed, not just a basic fact
525
+ - Include both strengths and endearing qualities
526
+ - Keep it warm but informative (4-6 sentences total)
527
+
528
+ Remember: You're helping someone learn about their family members in a meaningful way! πŸ’"""
529
+ max_tokens = 400 # Increased for richer responses
530
+ temperature = 0.3 # Slightly more creative
531
  else:
532
+ # Shorter but still enhanced prompt for conservative mode
533
+ prompt = f"""Based on this family info: {extracted_answer}
534
+
535
+ Question: {query}
536
+
537
+ Give a warm, detailed answer with emojis. Add 2-3 more qualities this person likely has. Make it caring and personal! πŸ’"""
538
+ max_tokens = 150 # Better than 50 for family context
539
+ temperature = 0.2
540
 
541
  try:
542
  response = requests.post(
 
592
  }
593
 
594
  def get_general_ai_response(query, unlimited_tokens=False):
595
+ """Get AI response for general questions with family-friendly enhancement"""
596
  openrouter_key = os.environ.get("OPENROUTER_API_KEY")
597
 
598
  if not openrouter_key:
599
+ return "I can only answer questions about your family members from the uploaded documents. Please add an OpenRouter API key for general conversations. πŸ’"
600
 
601
  try:
602
  # Adjust parameters based on token availability
603
  if unlimited_tokens:
604
+ max_tokens = 350 # Good limit for detailed family responses
 
 
 
 
 
 
 
 
 
605
  temperature = 0.5
606
+ prompt = f"""You are a caring family assistant. Someone is asking about their family but I couldn't find specific information in their family documents.
607
+
608
+ Question: {query}
609
+
610
+ Please provide a warm, helpful response that:
611
+ - Acknowledges I don't have specific information about their family member
612
+ - Suggests they might want to add more details to their family profiles
613
+ - Offers to help in other ways
614
+ - Uses a caring, family-friendly tone with appropriate emojis
615
+ - Keep it supportive and understanding πŸ’"""
616
+ else:
617
+ max_tokens = 100 # Reasonable for conservative mode
618
+ temperature = 0.4
619
+ prompt = f"Family question: {query[:100]} - I don't have info about this family member. Give a caring, helpful response with emojis πŸ’"
620
 
621
  response = requests.post(
622
  "https://openrouter.ai/api/v1/chat/completions",
 
810
  else:
811
  st.error("Failed to index documents. Check your documents folder.")
812
 
813
+ # Show current documents (optional - can be hidden for privacy)
814
+ if st.checkbox("πŸ“„ Show Document List", value=False, help="Show/hide document filenames"):
815
+ if os.path.exists("documents"):
816
+ txt_files = [f for f in os.listdir("documents") if f.endswith('.txt')]
817
+ if txt_files:
818
+ st.subheader("πŸ“„ Current Documents")
819
+ for file in txt_files:
820
+ st.text(f"β€’ {file}")
821
+ elif os.path.exists("documents"):
822
  txt_files = [f for f in os.listdir("documents") if f.endswith('.txt')]
823
  if txt_files:
824
+ st.info(f"πŸ“„ {len(txt_files)} family profile documents loaded (hidden for privacy)")
 
 
825
 
826
  # Manual upload interface (fallback)
827
  st.subheader("πŸ“€ Manual Upload")