rdz-falcon commited on
Commit
a440252
·
verified ·
1 Parent(s): 9180644

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +37 -28
rag.py CHANGED
@@ -75,14 +75,20 @@ def load_emotion_classifier(api_base_url="http://127.0.0.1:1234/v1"):
75
  """
76
  print(f"=== CONFIGURING LLM CLIENT FOR API: {api_base_url} ===")
77
 
 
78
 
79
- llm = ChatOpenAI(
80
- openai_api_base=api_base_url,
81
- openai_api_key="dummy-key", # Required by LangChain, but not used by LM Studio
82
- temperature=0.7,
83
- max_tokens=128,
84
  )
85
- return llm
 
 
 
 
 
 
 
86
 
87
  # --- The following code was commented out or unreachable in the original notebook ---
88
  # Example code (replace with appropriate code for your model):
@@ -227,32 +233,35 @@ class AACAssistant:
227
 
228
  print("AAC Assistant initialized and ready!")
229
 
230
- def get_emotion_analysis(self, situation):
231
  """
232
  Gets emotion analysis from the configured emotion LLM API.
233
  """
234
  # Define the prompt structure for the emotion analysis model
235
  # (Adjust this based on how you prompted your model in LM Studio)
236
- input_emotion = "excited" # Or determine this dynamically if needed
237
- user_content = f"Emotion: {input_emotion}\nSituation: {situation}\nGenerate a brief analysis of the user's likely feeling based on the situation."
238
-
239
- messages = [
240
- # {"role": "system", "content": "You are an empathetic assistant analyzing emotions."},
241
- {"role": "user", "content": user_content},
242
- ]
243
-
244
- print(f"Sending to emotion API: {messages}")
245
- try:
246
- # Use the invoke method for ChatOpenAI
247
- response = self.emotion_llm.invoke(messages)
248
- # The response object has a 'content' attribute
249
- analysis = response.content.strip()
250
- print(f"Received from emotion API: {analysis}")
251
- return analysis
252
- except Exception as e:
253
- print(f"\nAn error occurred during emotion analysis API call: {e}")
254
- # Fallback or default analysis
255
- return f"Could not determine emotion (API error: {e})"
 
 
 
256
 
257
 
258
  def process_query(self, user_query):
@@ -267,7 +276,7 @@ class AACAssistant:
267
  """
268
  # Step 1: Get emotion analysis from the LM Studio API via the emotion_llm client
269
  print(f"Getting emotion analysis for query: '{user_query}'")
270
- emotion_analysis = self.get_emotion_analysis(user_query)
271
  print(f"Emotion Analysis Result: {emotion_analysis}")
272
 
273
  # Step 2: Run the RAG + LLM chain (using the main generation model)
 
75
  """
76
  print(f"=== CONFIGURING LLM CLIENT FOR API: {api_base_url} ===")
77
 
78
+ from llama_cpp import Llama
79
 
80
+ llm = Llama.from_pretrained(
81
+ repo_id="rdz-falcon/llma_fine-tuned",
82
+ filename="unsloth.F16.gguf",
 
 
83
  )
84
+
85
+ # llm = ChatOpenAI(
86
+ # openai_api_base=api_base_url,
87
+ # openai_api_key="dummy-key", # Required by LangChain, but not used by LM Studio
88
+ # temperature=0.7,
89
+ # max_tokens=128,
90
+ # )
91
+ # return llm
92
 
93
  # --- The following code was commented out or unreachable in the original notebook ---
94
  # Example code (replace with appropriate code for your model):
 
233
 
234
  print("AAC Assistant initialized and ready!")
235
 
236
+ def get_emotion_analysis(self,llm, situation):
237
  """
238
  Gets emotion analysis from the configured emotion LLM API.
239
  """
240
  # Define the prompt structure for the emotion analysis model
241
  # (Adjust this based on how you prompted your model in LM Studio)
242
+ text = situation
243
+ response = llm.create_chat_completion(
244
+ messages=[{"role": "user", "content": text}],
245
+ max_tokens=128, # Max length of the generated response (adjust as needed)
246
+ temperature=0.7, # Controls randomness (adjust)
247
+ # top_p=0.9, # Optional: Nucleus sampling
248
+ # top_k=40, # Optional: Top-k sampling
249
+ stop=["<|eot_id|>"], # Crucial: Stop generation when the model outputs the end-of-turn token
250
+ stream=False, # Set to True to get token-by-token output (like TextStreamer)
251
+ )
252
+
253
+ # --- 4. Extract and print the response ---
254
+ if response and 'choices' in response and len(response['choices']) > 0:
255
+ assistant_message = response['choices'][0]['message']['content']
256
+ print("\nAssistant Response:")
257
+ print(assistant_message.strip())
258
+ print("returning:", assistant_message.strip())
259
+ return assistant_message.strip()
260
+ else:
261
+ print("\nNo response generated or unexpected format.")
262
+ print("Full response:", response)
263
+
264
+ return ""
265
 
266
 
267
  def process_query(self, user_query):
 
276
  """
277
  # Step 1: Get emotion analysis from the LM Studio API via the emotion_llm client
278
  print(f"Getting emotion analysis for query: '{user_query}'")
279
+ emotion_analysis = self.get_emotion_analysis(self.emotion_llm, user_query)
280
  print(f"Emotion Analysis Result: {emotion_analysis}")
281
 
282
  # Step 2: Run the RAG + LLM chain (using the main generation model)