Spaces:

uumerrr684
/

RAG_Chat_Flow

Sleeping

App Files Files Community

uumerrr684 commited on Aug 20, 2025

Commit

25a8945

verified ·

1 Parent(s): 3d66778

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -15

app.py CHANGED Viewed

@@ -345,22 +345,27 @@ class ProductionRAGSystem:
             context = "\n\n".join([f"Source: {r['metadata']['source_file']}\nContent: {r['content']}"
                                  for r in search_results[:3]])
-            # Create comprehensive prompt for unlimited tokens
             if unlimited_tokens:
-                prompt = f"""Based on the following document context, provide a comprehensive and detailed answer to the user's question.
-Context from documents:
 {context}
-User Question: {query}
-Please provide a thorough, well-structured answer that directly addresses the question using the information from the documents. If the documents contain specific details, include them in your response."""
-                max_tokens = 500  # Higher token limit for detailed responses
-                temperature = 0.3
             else:
-                # Fallback to shorter prompt
-                prompt = f"Context: {extracted_answer}\n\nQuestion: {query}\n\nImprove the answer:"
-                max_tokens = 150
                 temperature = 0.1
             try:
@@ -429,13 +434,19 @@ def get_general_ai_response(query, unlimited_tokens=False):
     try:
         # Adjust parameters based on token availability
         if unlimited_tokens:
-            max_tokens = 500
             temperature = 0.7
-            prompt = f"Please provide a helpful and detailed answer to this question: {query}"
         else:
-            max_tokens = 150
-            temperature = 0.7
-            prompt = query[:200]  # Limit input length for token conservation
         response = requests.post(
             "https://openrouter.ai/api/v1/chat/completions",

             context = "\n\n".join([f"Source: {r['metadata']['source_file']}\nContent: {r['content']}"
                                  for r in search_results[:3]])
+            # Create focused prompt for concise, professional answers
             if unlimited_tokens:
+                prompt = f"""Act as a helpful HR assistant for this company. Based on the document context below, provide a clear, concise, and professional answer to the employee's question.
+Document Context:
 {context}
+Employee Question: {query}
+Instructions:
+- Give a direct, actionable answer
+- Use specific details from the documents
+- Keep it professional but friendly
+- Be concise and to the point
+- If policies have conditions or exceptions, mention them clearly"""
+                max_tokens = 300  # Balanced token limit
+                temperature = 0.2
             else:
+                # Shorter prompt for conservative mode
+                prompt = f"Act as an HR assistant. Based on this info: {extracted_answer}\n\nEmployee asks: {query}\n\nGive a clear, helpful answer:"
+                max_tokens = 50  # Minimum token limit
                 temperature = 0.1
             try:
     try:
         # Adjust parameters based on token availability
         if unlimited_tokens:
+            max_tokens = 300  # Balanced limit for good answers
             temperature = 0.7
+            prompt = f"""Act as a knowledgeable assistant. Provide a helpful, clear, and concise answer to this question: {query}
+Keep your response:
+- Direct and actionable
+- Professional but conversational
+- Focused on the main points
+- Under 300 tokens"""
         else:
+            max_tokens = 50  # Minimum for conservative mode
+            temperature = 0.5
+            prompt = f"Briefly answer: {query[:100]}"  # Limit input length for token conservation
         response = requests.post(
             "https://openrouter.ai/api/v1/chat/completions",