Spaces:

alx-d
/

PhiRAG

Running

App Files Files Community

alx-d commited on Mar 1

Commit

8c804eb

verified ·

1 Parent(s): ded9c47

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

advanced_rag.py +53 -41

advanced_rag.py CHANGED Viewed

@@ -196,20 +196,19 @@ class ElevatedRagChain:
             class MistralLLM(LLM):
                 temperature: float = 0.7
                 top_p: float = 0.95
-                _client: Any = PrivateAttr()  # Remove the default=None here
                 def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95, **kwargs: Any):
-                    # Initialize the private attributes before calling super().__init__
-                    self._client = Mistral(api_key=api_key)
-                    # Now call super().__init__
                     super().__init__(temperature=temperature, top_p=top_p, **kwargs)
                 @property
                 def _llm_type(self) -> str:
                     return "mistral_llm"
                 def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-                    response = self._client.chat.complete(
                         model="mistral-small-latest",
                         messages=[{"role": "user", "content": prompt}],
                         temperature=self.temperature,
@@ -421,42 +420,55 @@ def submit_query_updated(query):
     debug_print("Inside submit_query function.")
     if not query:
         debug_print("Please enter a non-empty query")
-        return "Please enter a non-empty query", "Word count: 0", f"Model used: {rag_chain.llm_choice}", ""
-    if hasattr(rag_chain, 'elevated_rag_chain'):
-        try:
-            history_text = "\n".join([f"Q: {conv['query']}\nA: {conv['response']}" for conv in rag_chain.conversation_history]) if rag_chain.conversation_history else ""
-            prompt_variables = {
-                "conversation_history": history_text,
-                "context": rag_chain.context,
-                "question": query
-            }
-            if "llama" in rag_chain.llm_choice.lower():
-                prompt_variables["context"] = truncate_prompt(prompt_variables["context"], max_tokens=4092)
-            response = rag_chain.elevated_rag_chain.invoke(prompt_variables)
-            rag_chain.conversation_history.append({"query": query, "response": response})
-            input_token_count = count_tokens(query)
-            output_token_count = count_tokens(response)
-            return (
-                response,
-                rag_chain.get_current_context(),
-                f"Input tokens: {input_token_count}",
-                f"Output tokens: {output_token_count}"
-            )
-        except Exception as e:
-            error_msg = traceback.format_exc()
-            debug_print("LLM error. Error: " + error_msg)
-            return (
-                "Query error: " + str(e),
-                "",
-                "Input tokens: 0",
-                "Output tokens: 0"
-            )
-    return (
-        "Please load files first.",
-        "",
-        "Input tokens: 0",
-        "Output tokens: 0"
-    )
 def reset_app_updated():
     global rag_chain

             class MistralLLM(LLM):
                 temperature: float = 0.7
                 top_p: float = 0.95
+                client: Any = None  # Changed from _client PrivateAttr
                 def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95, **kwargs: Any):
                     super().__init__(temperature=temperature, top_p=top_p, **kwargs)
+                    # Initialize the client as a regular attribute instead of PrivateAttr
+                    self.client = Mistral(api_key=api_key)
                 @property
                 def _llm_type(self) -> str:
                     return "mistral_llm"
                 def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+                    response = self.client.chat.complete(  # Use self.client instead of self._client
                         model="mistral-small-latest",
                         messages=[{"role": "user", "content": prompt}],
                         temperature=self.temperature,
     debug_print("Inside submit_query function.")
     if not query:
         debug_print("Please enter a non-empty query")
+        return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0"
+    if not hasattr(rag_chain, 'elevated_rag_chain'):
+        return "Please load files first.", "", "Input tokens: 0", "Output tokens: 0"
+    try:
+        # Collect and truncate conversation history if needed
+        history_text = ""
+        if rag_chain.conversation_history:
+            # Only keep the last 3 conversations to limit context size
+            recent_history = rag_chain.conversation_history[-3:]
+            history_text = "\n".join([f"Q: {conv['query']}\nA: {conv['response']}"
+                                     for conv in recent_history])
+        # Determine max context size based on model
+        max_context_tokens = 32000 if "mistral" in rag_chain.llm_choice.lower() else 4096
+        # Reserve 1000 tokens for the question and generation
+        max_context_tokens -= 1000
+        # Truncate context if needed
+        context = truncate_prompt(rag_chain.context, max_tokens=max_context_tokens)
+        prompt_variables = {
+            "conversation_history": history_text,
+            "context": context,
+            "question": query
+        }
+        response = rag_chain.elevated_rag_chain.invoke({"question": query})
+        rag_chain.conversation_history.append({"query": query, "response": response})
+        input_token_count = count_tokens(query)
+        output_token_count = count_tokens(response)
+        return (
+            response,
+            rag_chain.get_current_context(),
+            f"Input tokens: {input_token_count}",
+            f"Output tokens: {output_token_count}"
+        )
+    except Exception as e:
+        error_msg = traceback.format_exc()
+        debug_print("LLM error. Error: " + error_msg)
+        return (
+            f"Query error: {str(e)}\n\nTry using a smaller document or simplifying your query.",
+            "",
+            "Input tokens: 0",
+            "Output tokens: 0"
+        )
 def reset_app_updated():
     global rag_chain