Spaces:
Build error
Build error
Update rag.py
Browse files
rag.py
CHANGED
|
@@ -75,14 +75,20 @@ def load_emotion_classifier(api_base_url="http://127.0.0.1:1234/v1"):
|
|
| 75 |
"""
|
| 76 |
print(f"=== CONFIGURING LLM CLIENT FOR API: {api_base_url} ===")
|
| 77 |
|
|
|
|
| 78 |
|
| 79 |
-
llm =
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
temperature=0.7,
|
| 83 |
-
max_tokens=128,
|
| 84 |
)
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
# --- The following code was commented out or unreachable in the original notebook ---
|
| 88 |
# Example code (replace with appropriate code for your model):
|
|
@@ -227,32 +233,35 @@ class AACAssistant:
|
|
| 227 |
|
| 228 |
print("AAC Assistant initialized and ready!")
|
| 229 |
|
| 230 |
-
def get_emotion_analysis(self, situation):
|
| 231 |
"""
|
| 232 |
Gets emotion analysis from the configured emotion LLM API.
|
| 233 |
"""
|
| 234 |
# Define the prompt structure for the emotion analysis model
|
| 235 |
# (Adjust this based on how you prompted your model in LM Studio)
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
print(
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
|
| 258 |
def process_query(self, user_query):
|
|
@@ -267,7 +276,7 @@ class AACAssistant:
|
|
| 267 |
"""
|
| 268 |
# Step 1: Get emotion analysis from the LM Studio API via the emotion_llm client
|
| 269 |
print(f"Getting emotion analysis for query: '{user_query}'")
|
| 270 |
-
emotion_analysis = self.get_emotion_analysis(user_query)
|
| 271 |
print(f"Emotion Analysis Result: {emotion_analysis}")
|
| 272 |
|
| 273 |
# Step 2: Run the RAG + LLM chain (using the main generation model)
|
|
|
|
| 75 |
"""
|
| 76 |
print(f"=== CONFIGURING LLM CLIENT FOR API: {api_base_url} ===")
|
| 77 |
|
| 78 |
+
from llama_cpp import Llama
|
| 79 |
|
| 80 |
+
llm = Llama.from_pretrained(
|
| 81 |
+
repo_id="rdz-falcon/llma_fine-tuned",
|
| 82 |
+
filename="unsloth.F16.gguf",
|
|
|
|
|
|
|
| 83 |
)
|
| 84 |
+
|
| 85 |
+
# llm = ChatOpenAI(
|
| 86 |
+
# openai_api_base=api_base_url,
|
| 87 |
+
# openai_api_key="dummy-key", # Required by LangChain, but not used by LM Studio
|
| 88 |
+
# temperature=0.7,
|
| 89 |
+
# max_tokens=128,
|
| 90 |
+
# )
|
| 91 |
+
# return llm
|
| 92 |
|
| 93 |
# --- The following code was commented out or unreachable in the original notebook ---
|
| 94 |
# Example code (replace with appropriate code for your model):
|
|
|
|
| 233 |
|
| 234 |
print("AAC Assistant initialized and ready!")
|
| 235 |
|
| 236 |
+
def get_emotion_analysis(self,llm, situation):
|
| 237 |
"""
|
| 238 |
Gets emotion analysis from the configured emotion LLM API.
|
| 239 |
"""
|
| 240 |
# Define the prompt structure for the emotion analysis model
|
| 241 |
# (Adjust this based on how you prompted your model in LM Studio)
|
| 242 |
+
text = situation
|
| 243 |
+
response = llm.create_chat_completion(
|
| 244 |
+
messages=[{"role": "user", "content": text}],
|
| 245 |
+
max_tokens=128, # Max length of the generated response (adjust as needed)
|
| 246 |
+
temperature=0.7, # Controls randomness (adjust)
|
| 247 |
+
# top_p=0.9, # Optional: Nucleus sampling
|
| 248 |
+
# top_k=40, # Optional: Top-k sampling
|
| 249 |
+
stop=["<|eot_id|>"], # Crucial: Stop generation when the model outputs the end-of-turn token
|
| 250 |
+
stream=False, # Set to True to get token-by-token output (like TextStreamer)
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
# --- 4. Extract and print the response ---
|
| 254 |
+
if response and 'choices' in response and len(response['choices']) > 0:
|
| 255 |
+
assistant_message = response['choices'][0]['message']['content']
|
| 256 |
+
print("\nAssistant Response:")
|
| 257 |
+
print(assistant_message.strip())
|
| 258 |
+
print("returning:", assistant_message.strip())
|
| 259 |
+
return assistant_message.strip()
|
| 260 |
+
else:
|
| 261 |
+
print("\nNo response generated or unexpected format.")
|
| 262 |
+
print("Full response:", response)
|
| 263 |
+
|
| 264 |
+
return ""
|
| 265 |
|
| 266 |
|
| 267 |
def process_query(self, user_query):
|
|
|
|
| 276 |
"""
|
| 277 |
# Step 1: Get emotion analysis from the LM Studio API via the emotion_llm client
|
| 278 |
print(f"Getting emotion analysis for query: '{user_query}'")
|
| 279 |
+
emotion_analysis = self.get_emotion_analysis(self.emotion_llm, user_query)
|
| 280 |
print(f"Emotion Analysis Result: {emotion_analysis}")
|
| 281 |
|
| 282 |
# Step 2: Run the RAG + LLM chain (using the main generation model)
|