Spaces:

rdz-falcon
/

testing

Build error

App Files Files Community

rdz-falcon commited on May 10, 2025

Commit

a440252

verified ·

1 Parent(s): 9180644

Update rag.py

Browse files

Files changed (1) hide show

rag.py +37 -28

rag.py CHANGED Viewed

@@ -75,14 +75,20 @@ def load_emotion_classifier(api_base_url="http://127.0.0.1:1234/v1"):
     """
     print(f"=== CONFIGURING LLM CLIENT FOR API: {api_base_url} ===")
-    llm = ChatOpenAI(
-        openai_api_base=api_base_url,
-        openai_api_key="dummy-key", # Required by LangChain, but not used by LM Studio
-        temperature=0.7,
-        max_tokens=128,
     )
-    return llm
     # --- The following code was commented out or unreachable in the original notebook ---
     # Example code (replace with appropriate code for your model):
@@ -227,32 +233,35 @@ class AACAssistant:
         print("AAC Assistant initialized and ready!")
-    def get_emotion_analysis(self, situation):
         """
         Gets emotion analysis from the configured emotion LLM API.
         """
         # Define the prompt structure for the emotion analysis model
         # (Adjust this based on how you prompted your model in LM Studio)
-        input_emotion = "excited" # Or determine this dynamically if needed
-        user_content = f"Emotion: {input_emotion}\nSituation: {situation}\nGenerate a brief analysis of the user's likely feeling based on the situation."
-        messages = [
-            # {"role": "system", "content": "You are an empathetic assistant analyzing emotions."},
-            {"role": "user", "content": user_content},
-        ]
-        print(f"Sending to emotion API: {messages}")
-        try:
-            # Use the invoke method for ChatOpenAI
-            response = self.emotion_llm.invoke(messages)
-            # The response object has a 'content' attribute
-            analysis = response.content.strip()
-            print(f"Received from emotion API: {analysis}")
-            return analysis
-        except Exception as e:
-            print(f"\nAn error occurred during emotion analysis API call: {e}")
-            # Fallback or default analysis
-            return f"Could not determine emotion (API error: {e})"
     def process_query(self, user_query):
@@ -267,7 +276,7 @@ class AACAssistant:
         """
         # Step 1: Get emotion analysis from the LM Studio API via the emotion_llm client
         print(f"Getting emotion analysis for query: '{user_query}'")
-        emotion_analysis = self.get_emotion_analysis(user_query)
         print(f"Emotion Analysis Result: {emotion_analysis}")
         # Step 2: Run the RAG + LLM chain (using the main generation model)

     """
     print(f"=== CONFIGURING LLM CLIENT FOR API: {api_base_url} ===")
+    from llama_cpp import Llama
+    llm = Llama.from_pretrained(
+    	repo_id="rdz-falcon/llma_fine-tuned",
+    	filename="unsloth.F16.gguf",
     )
+    # llm = ChatOpenAI(
+    #     openai_api_base=api_base_url,
+    #     openai_api_key="dummy-key", # Required by LangChain, but not used by LM Studio
+    #     temperature=0.7,
+    #     max_tokens=128,
+    # )
+    # return llm
     # --- The following code was commented out or unreachable in the original notebook ---
     # Example code (replace with appropriate code for your model):
         print("AAC Assistant initialized and ready!")
+    def get_emotion_analysis(self,llm, situation):
         """
         Gets emotion analysis from the configured emotion LLM API.
         """
         # Define the prompt structure for the emotion analysis model
         # (Adjust this based on how you prompted your model in LM Studio)
+        text = situation
+        response = llm.create_chat_completion(
+        messages=[{"role": "user", "content": text}],
+        max_tokens=128,       # Max length of the generated response (adjust as needed)
+        temperature=0.7,    # Controls randomness (adjust)
+            # top_p=0.9,          # Optional: Nucleus sampling
+            # top_k=40,           # Optional: Top-k sampling
+        stop=["<|eot_id|>"], # Crucial: Stop generation when the model outputs the end-of-turn token
+            stream=False,        # Set to True to get token-by-token output (like TextStreamer)
+        )
+                # --- 4. Extract and print the response ---
+        if response and 'choices' in response and len(response['choices']) > 0:
+            assistant_message = response['choices'][0]['message']['content']
+            print("\nAssistant Response:")
+            print(assistant_message.strip())
+            print("returning:", assistant_message.strip())
+            return assistant_message.strip()
+        else:
+            print("\nNo response generated or unexpected format.")
+            print("Full response:", response)
+        return ""
     def process_query(self, user_query):
         """
         # Step 1: Get emotion analysis from the LM Studio API via the emotion_llm client
         print(f"Getting emotion analysis for query: '{user_query}'")
+        emotion_analysis = self.get_emotion_analysis(self.emotion_llm, user_query)
         print(f"Emotion Analysis Result: {emotion_analysis}")
         # Step 2: Run the RAG + LLM chain (using the main generation model)