Spaces:
Sleeping
Sleeping
Nikita Makarov commited on
Commit Β·
27ca72c
1
Parent(s): 0df1705
v1.1
Browse files- requirements.txt +4 -1
- src/app.py +23 -10
- src/radio_agent.py +116 -13
- src/rag_system.py +455 -38
requirements.txt
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
-
gradio==4.44.
|
| 2 |
openai>=1.0.0
|
| 3 |
elevenlabs==1.10.0
|
| 4 |
llama-index==0.11.20
|
| 5 |
llama-index-llms-openai>=0.1.5
|
|
|
|
|
|
|
|
|
|
| 6 |
requests==2.32.3
|
| 7 |
python-dotenv==1.0.1
|
| 8 |
pydantic==2.9.2
|
|
|
|
| 1 |
+
gradio==4.44.1
|
| 2 |
openai>=1.0.0
|
| 3 |
elevenlabs==1.10.0
|
| 4 |
llama-index==0.11.20
|
| 5 |
llama-index-llms-openai>=0.1.5
|
| 6 |
+
llama-index-embeddings-openai>=0.1.5
|
| 7 |
+
llama-index-embeddings-huggingface>=0.1.0
|
| 8 |
+
sentence-transformers>=2.2.0
|
| 9 |
requests==2.32.3
|
| 10 |
python-dotenv==1.0.1
|
| 11 |
pydantic==2.9.2
|
src/app.py
CHANGED
|
@@ -80,12 +80,15 @@ def save_preferences(name: str, favorite_genres: List[str], interests: List[str]
|
|
| 80 |
radio_state["content_filter"] = preferences["content_filter"]
|
| 81 |
|
| 82 |
radio_state["user_preferences"] = preferences
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
# Save to user memory for persistence
|
| 86 |
-
if
|
| 87 |
-
user_memory.save_user_preferences(
|
| 88 |
-
print(f"πΎ Preferences saved for user {
|
| 89 |
|
| 90 |
return f"β
Preferences saved! Welcome, {preferences['name']}! Your personalized radio is ready."
|
| 91 |
|
|
@@ -151,7 +154,9 @@ def start_radio_stream():
|
|
| 151 |
return "π» Radio is already playing!", None, None, None, ""
|
| 152 |
|
| 153 |
# Plan the show with content filter
|
|
|
|
| 154 |
show_plan = agent.plan_radio_show(
|
|
|
|
| 155 |
user_preferences=radio_state["user_preferences"],
|
| 156 |
duration_minutes=30,
|
| 157 |
content_filter=radio_state["content_filter"]
|
|
@@ -227,7 +232,9 @@ def start_and_play_first_segment():
|
|
| 227 |
|
| 228 |
# Step 1: Plan skeleton show (YouTube searches happen here)
|
| 229 |
print(" [1/3] Planning show (searching YouTube)...")
|
|
|
|
| 230 |
show_plan = agent.plan_radio_show(
|
|
|
|
| 231 |
user_preferences=radio_state["user_preferences"],
|
| 232 |
duration_minutes=30,
|
| 233 |
content_filter=radio_state["content_filter"]
|
|
@@ -315,11 +322,15 @@ def play_next_segment():
|
|
| 315 |
radio_state["current_segment_index"] += 1
|
| 316 |
|
| 317 |
# Lazily generate LLM content for this segment
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
radio_state["planned_show"][segment_index] = segment
|
| 320 |
|
| 321 |
# Execute segment (log to RAG)
|
| 322 |
-
agent.execute_segment(segment)
|
| 323 |
radio_state["last_segment"] = segment
|
| 324 |
|
| 325 |
# Generate content display and underlying script text
|
|
@@ -404,7 +415,7 @@ def play_next_segment():
|
|
| 404 |
youtube_id = track["url"].split("v=")[-1].split("&")[0]
|
| 405 |
|
| 406 |
if youtube_id:
|
| 407 |
-
# YouTube embed with
|
| 408 |
music_player_html = f"""
|
| 409 |
<div style="padding: 1rem; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 12px; margin: 1rem 0; box-shadow: 0 4px 15px rgba(0,0,0,0.3);">
|
| 410 |
<h4 style="margin: 0 0 0.75rem 0; color: #fff; font-size: 1.1em;">π΅ {track.get('title', 'Unknown')}</h4>
|
|
@@ -1197,14 +1208,16 @@ with gr.Blocks(css=custom_css, title="AI Radio π΅", theme=gr.themes.Soft()) as
|
|
| 1197 |
dislike_btn = gr.Button("π Don't Play Again", variant="secondary", size="sm")
|
| 1198 |
like_status = gr.Textbox(label="Rating Status", value="Rate the current song!", interactive=False, scale=2)
|
| 1199 |
|
| 1200 |
-
progress_text = gr.Textbox(label="Progress", value="Ready to start", interactive=False)
|
| 1201 |
|
| 1202 |
segment_info = gr.Markdown("**Welcome!** Set your preferences and start the radio.", elem_id="segment-display")
|
|
|
|
| 1203 |
llm_script = gr.Textbox(
|
| 1204 |
label="π§ Model Script (LLM answer)",
|
| 1205 |
value="Model-generated script will appear here for each segment.",
|
| 1206 |
interactive=False,
|
| 1207 |
-
lines=6
|
|
|
|
| 1208 |
)
|
| 1209 |
|
| 1210 |
gr.Markdown("**π‘ Tip:** Host speech plays first, then music/podcasts will stream automatically!")
|
|
@@ -1212,7 +1225,7 @@ with gr.Blocks(css=custom_css, title="AI Radio π΅", theme=gr.themes.Soft()) as
|
|
| 1212 |
audio_output = gr.Audio(label="π Host Speech", autoplay=True, type="filepath", elem_id="host_audio")
|
| 1213 |
music_player = gr.HTML(label="π΅ Music/Podcast Player (streaming)")
|
| 1214 |
|
| 1215 |
-
status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
|
| 1216 |
|
| 1217 |
# Connect buttons
|
| 1218 |
start_btn.click(
|
|
|
|
| 80 |
radio_state["content_filter"] = preferences["content_filter"]
|
| 81 |
|
| 82 |
radio_state["user_preferences"] = preferences
|
| 83 |
+
|
| 84 |
+
# Save to RAG system with user_id
|
| 85 |
+
user_id = radio_state.get("user_id")
|
| 86 |
+
agent.rag_system.store_user_preferences(preferences, user_id=user_id)
|
| 87 |
|
| 88 |
# Save to user memory for persistence
|
| 89 |
+
if user_id:
|
| 90 |
+
user_memory.save_user_preferences(user_id, preferences)
|
| 91 |
+
print(f"πΎ Preferences saved for user {user_id}")
|
| 92 |
|
| 93 |
return f"β
Preferences saved! Welcome, {preferences['name']}! Your personalized radio is ready."
|
| 94 |
|
|
|
|
| 154 |
return "π» Radio is already playing!", None, None, None, ""
|
| 155 |
|
| 156 |
# Plan the show with content filter
|
| 157 |
+
user_id = radio_state.get("user_id")
|
| 158 |
show_plan = agent.plan_radio_show(
|
| 159 |
+
user_id=user_id,
|
| 160 |
user_preferences=radio_state["user_preferences"],
|
| 161 |
duration_minutes=30,
|
| 162 |
content_filter=radio_state["content_filter"]
|
|
|
|
| 232 |
|
| 233 |
# Step 1: Plan skeleton show (YouTube searches happen here)
|
| 234 |
print(" [1/3] Planning show (searching YouTube)...")
|
| 235 |
+
user_id = radio_state.get("user_id")
|
| 236 |
show_plan = agent.plan_radio_show(
|
| 237 |
+
user_id=user_id,
|
| 238 |
user_preferences=radio_state["user_preferences"],
|
| 239 |
duration_minutes=30,
|
| 240 |
content_filter=radio_state["content_filter"]
|
|
|
|
| 322 |
radio_state["current_segment_index"] += 1
|
| 323 |
|
| 324 |
# Lazily generate LLM content for this segment
|
| 325 |
+
user_prefs = radio_state["user_preferences"].copy()
|
| 326 |
+
user_id = radio_state.get("user_id")
|
| 327 |
+
if user_id:
|
| 328 |
+
user_prefs["_user_id"] = user_id # Pass user_id in preferences for RAG queries
|
| 329 |
+
segment = agent.enrich_segment(segment, user_prefs)
|
| 330 |
radio_state["planned_show"][segment_index] = segment
|
| 331 |
|
| 332 |
# Execute segment (log to RAG)
|
| 333 |
+
agent.execute_segment(segment, user_id=user_id)
|
| 334 |
radio_state["last_segment"] = segment
|
| 335 |
|
| 336 |
# Generate content display and underlying script text
|
|
|
|
| 415 |
youtube_id = track["url"].split("v=")[-1].split("&")[0]
|
| 416 |
|
| 417 |
if youtube_id:
|
| 418 |
+
# Simple YouTube iframe embed with autoplay
|
| 419 |
music_player_html = f"""
|
| 420 |
<div style="padding: 1rem; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 12px; margin: 1rem 0; box-shadow: 0 4px 15px rgba(0,0,0,0.3);">
|
| 421 |
<h4 style="margin: 0 0 0.75rem 0; color: #fff; font-size: 1.1em;">π΅ {track.get('title', 'Unknown')}</h4>
|
|
|
|
| 1208 |
dislike_btn = gr.Button("π Don't Play Again", variant="secondary", size="sm")
|
| 1209 |
like_status = gr.Textbox(label="Rating Status", value="Rate the current song!", interactive=False, scale=2)
|
| 1210 |
|
| 1211 |
+
progress_text = gr.Textbox(label="Progress", value="Ready to start", interactive=False, visible=False)
|
| 1212 |
|
| 1213 |
segment_info = gr.Markdown("**Welcome!** Set your preferences and start the radio.", elem_id="segment-display")
|
| 1214 |
+
|
| 1215 |
llm_script = gr.Textbox(
|
| 1216 |
label="π§ Model Script (LLM answer)",
|
| 1217 |
value="Model-generated script will appear here for each segment.",
|
| 1218 |
interactive=False,
|
| 1219 |
+
lines=6,
|
| 1220 |
+
visible=False
|
| 1221 |
)
|
| 1222 |
|
| 1223 |
gr.Markdown("**π‘ Tip:** Host speech plays first, then music/podcasts will stream automatically!")
|
|
|
|
| 1225 |
audio_output = gr.Audio(label="π Host Speech", autoplay=True, type="filepath", elem_id="host_audio")
|
| 1226 |
music_player = gr.HTML(label="π΅ Music/Podcast Player (streaming)")
|
| 1227 |
|
| 1228 |
+
status_text = gr.Textbox(label="Status", value="Ready", interactive=False, visible=False)
|
| 1229 |
|
| 1230 |
# Connect buttons
|
| 1231 |
start_btn.click(
|
src/radio_agent.py
CHANGED
|
@@ -61,9 +61,10 @@ class RadioAgent:
|
|
| 61 |
self.is_streaming = False
|
| 62 |
self.current_segment = None
|
| 63 |
self.segment_history = []
|
|
|
|
| 64 |
|
| 65 |
def plan_radio_show(self, user_preferences: Dict[str, Any], duration_minutes: int = 30,
|
| 66 |
-
content_filter: Dict[str, bool] = None) -> List[Dict[str, Any]]:
|
| 67 |
"""
|
| 68 |
Plan a personalized radio show based on user preferences
|
| 69 |
This demonstrates autonomous planning behavior
|
|
@@ -72,10 +73,14 @@ class RadioAgent:
|
|
| 72 |
user_preferences: User's preferences and mood
|
| 73 |
duration_minutes: Total duration of the show
|
| 74 |
content_filter: Dictionary with content type filters (music, news, podcasts, stories)
|
|
|
|
| 75 |
|
| 76 |
Returns:
|
| 77 |
List of planned segments
|
| 78 |
"""
|
|
|
|
|
|
|
|
|
|
| 79 |
segments = []
|
| 80 |
|
| 81 |
# Default filter - all enabled
|
|
@@ -87,8 +92,8 @@ class RadioAgent:
|
|
| 87 |
"stories": True
|
| 88 |
}
|
| 89 |
|
| 90 |
-
# Get user preferences from RAG
|
| 91 |
-
stored_prefs = self.rag_system.get_user_preferences()
|
| 92 |
merged_prefs = {**stored_prefs, **user_preferences}
|
| 93 |
|
| 94 |
# Calculate segment distribution based on filter
|
|
@@ -320,7 +325,7 @@ class RadioAgent:
|
|
| 320 |
'duration': 2
|
| 321 |
}
|
| 322 |
|
| 323 |
-
def _generate_music_commentary(self, track: Dict[str, Any], preferences: Dict[str, Any]) -> str:
|
| 324 |
"""Generate longer, more engaging commentary for music track with jokes/facts"""
|
| 325 |
if not track or not self.client:
|
| 326 |
return f"Here's a great track for you!"
|
|
@@ -328,12 +333,31 @@ class RadioAgent:
|
|
| 328 |
mood = preferences.get('mood', 'happy')
|
| 329 |
interests = preferences.get('interests', [])
|
| 330 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
try:
|
| 332 |
prompt = f"""You are an energetic, entertaining radio DJ named Lera. This is a middle of a show. Introduce this song in a fun, engaging way:
|
| 333 |
Title: {track['title']}
|
| 334 |
Artist: {track['artist']}
|
| 335 |
Genre: {track['genre']}
|
| 336 |
-
Listener mood: {mood}
|
| 337 |
|
| 338 |
Make it:
|
| 339 |
- About 10-15 seconds of speech, max_tokens = 400
|
|
@@ -341,7 +365,11 @@ class RadioAgent:
|
|
| 341 |
- Add a light joke or witty comment
|
| 342 |
- Be enthusiastic and engaging
|
| 343 |
- Sound natural, like a real radio host
|
| 344 |
-
- Connect it to the listener's mood if possible
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
response = self.client.chat.completions.create(
|
| 347 |
model=self.config.nebius_model,
|
|
@@ -350,10 +378,31 @@ class RadioAgent:
|
|
| 350 |
max_tokens=400 #400
|
| 351 |
)
|
| 352 |
text = response.choices[0].message.content.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
llm_logger.info(
|
| 354 |
-
"MUSIC | model=%s |
|
| 355 |
self.config.nebius_model,
|
| 356 |
-
|
|
|
|
| 357 |
text[:1000],
|
| 358 |
)
|
| 359 |
return text
|
|
@@ -528,15 +577,20 @@ Don't use emojis. Just speak naturally like a real DJ."""
|
|
| 528 |
else:
|
| 529 |
return "evening"
|
| 530 |
|
| 531 |
-
def execute_segment(self, segment: Dict[str, Any]) -> Dict[str, Any]:
|
| 532 |
"""Execute a planned segment and log to RAG"""
|
| 533 |
self.current_segment = segment
|
| 534 |
self.segment_history.append(segment)
|
| 535 |
|
| 536 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
self.rag_system.store_listening_history(
|
| 538 |
item_type=segment['type'],
|
| 539 |
-
item_data=segment
|
|
|
|
| 540 |
)
|
| 541 |
|
| 542 |
return segment
|
|
@@ -567,10 +621,42 @@ Don't use emojis. Just speak naturally like a real DJ."""
|
|
| 567 |
elif seg_type == "music":
|
| 568 |
# Lazy fetch track if not yet fetched
|
| 569 |
if not segment.get("track"):
|
|
|
|
| 570 |
genres = prefs.get('favorite_genres', ['pop'])
|
| 571 |
mood = prefs.get('mood', 'happy')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
tracks = self.music_server.search_free_music(
|
| 573 |
-
genre=
|
| 574 |
mood=mood,
|
| 575 |
limit=1
|
| 576 |
)
|
|
@@ -578,7 +664,24 @@ Don't use emojis. Just speak naturally like a real DJ."""
|
|
| 578 |
|
| 579 |
track = segment.get("track")
|
| 580 |
if track and not segment.get("commentary"):
|
| 581 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
|
| 583 |
elif seg_type == "news":
|
| 584 |
# Lazy fetch news if not yet fetched
|
|
|
|
| 61 |
self.is_streaming = False
|
| 62 |
self.current_segment = None
|
| 63 |
self.segment_history = []
|
| 64 |
+
self.current_user_id = None # Track current user for RAG queries
|
| 65 |
|
| 66 |
def plan_radio_show(self, user_preferences: Dict[str, Any], duration_minutes: int = 30,
|
| 67 |
+
content_filter: Dict[str, bool] = None, user_id: str = None) -> List[Dict[str, Any]]:
|
| 68 |
"""
|
| 69 |
Plan a personalized radio show based on user preferences
|
| 70 |
This demonstrates autonomous planning behavior
|
|
|
|
| 73 |
user_preferences: User's preferences and mood
|
| 74 |
duration_minutes: Total duration of the show
|
| 75 |
content_filter: Dictionary with content type filters (music, news, podcasts, stories)
|
| 76 |
+
user_id: User ID for RAG queries
|
| 77 |
|
| 78 |
Returns:
|
| 79 |
List of planned segments
|
| 80 |
"""
|
| 81 |
+
# Store user_id for later use
|
| 82 |
+
self.current_user_id = user_id
|
| 83 |
+
|
| 84 |
segments = []
|
| 85 |
|
| 86 |
# Default filter - all enabled
|
|
|
|
| 92 |
"stories": True
|
| 93 |
}
|
| 94 |
|
| 95 |
+
# Get user preferences from RAG (filtered by user_id)
|
| 96 |
+
stored_prefs = self.rag_system.get_user_preferences(user_id=user_id)
|
| 97 |
merged_prefs = {**stored_prefs, **user_preferences}
|
| 98 |
|
| 99 |
# Calculate segment distribution based on filter
|
|
|
|
| 325 |
'duration': 2
|
| 326 |
}
|
| 327 |
|
| 328 |
+
def _generate_music_commentary(self, track: Dict[str, Any], preferences: Dict[str, Any], rag_context: List[Dict[str, Any]] = None) -> str:
|
| 329 |
"""Generate longer, more engaging commentary for music track with jokes/facts"""
|
| 330 |
if not track or not self.client:
|
| 331 |
return f"Here's a great track for you!"
|
|
|
|
| 333 |
mood = preferences.get('mood', 'happy')
|
| 334 |
interests = preferences.get('interests', [])
|
| 335 |
|
| 336 |
+
# Add RAG context to prompt if available
|
| 337 |
+
rag_context_text = ""
|
| 338 |
+
if rag_context:
|
| 339 |
+
rag_context_text = "\n\nUser's listening history context (use this to personalize your commentary):\n"
|
| 340 |
+
for i, ctx in enumerate(rag_context[:2]): # Use top 2 most relevant
|
| 341 |
+
ctx_text = ctx.get('text', '')[:200]
|
| 342 |
+
score = ctx.get('score', 0)
|
| 343 |
+
rag_context_text += f"- Context #{i+1} (relevance: {score:.4f}): {ctx_text}\n"
|
| 344 |
+
|
| 345 |
+
llm_logger.info(
|
| 346 |
+
"RAG_CONTEXT | track=%s | context_docs=%d | context_preview=%s",
|
| 347 |
+
track.get('title', 'Unknown'),
|
| 348 |
+
len(rag_context),
|
| 349 |
+
rag_context_text[:300]
|
| 350 |
+
)
|
| 351 |
+
print(f" π [RAG] Added {len(rag_context)} context documents to LLM prompt")
|
| 352 |
+
else:
|
| 353 |
+
print(f" βΉοΈ [RAG] No context available - using standard prompt")
|
| 354 |
+
|
| 355 |
try:
|
| 356 |
prompt = f"""You are an energetic, entertaining radio DJ named Lera. This is a middle of a show. Introduce this song in a fun, engaging way:
|
| 357 |
Title: {track['title']}
|
| 358 |
Artist: {track['artist']}
|
| 359 |
Genre: {track['genre']}
|
| 360 |
+
Listener mood: {mood}{rag_context_text}
|
| 361 |
|
| 362 |
Make it:
|
| 363 |
- About 10-15 seconds of speech, max_tokens = 400
|
|
|
|
| 365 |
- Add a light joke or witty comment
|
| 366 |
- Be enthusiastic and engaging
|
| 367 |
- Sound natural, like a real radio host
|
| 368 |
+
- Connect it to the listener's mood if possible
|
| 369 |
+
- Reference their listening history if relevant (from context above)"""
|
| 370 |
+
|
| 371 |
+
if rag_context:
|
| 372 |
+
print(f" β
[RAG] LLM prompt includes RAG context - model will use personalized history")
|
| 373 |
|
| 374 |
response = self.client.chat.completions.create(
|
| 375 |
model=self.config.nebius_model,
|
|
|
|
| 378 |
max_tokens=400 #400
|
| 379 |
)
|
| 380 |
text = response.choices[0].message.content.strip()
|
| 381 |
+
|
| 382 |
+
# Check if response references RAG context
|
| 383 |
+
rag_used = False
|
| 384 |
+
if rag_context:
|
| 385 |
+
# Check if response mentions anything from context
|
| 386 |
+
text_lower = text.lower()
|
| 387 |
+
for ctx in rag_context:
|
| 388 |
+
ctx_text = ctx.get('text', '').lower()
|
| 389 |
+
# Look for key terms from context in response
|
| 390 |
+
key_terms = ['listened', 'enjoyed', 'prefer', 'favorite', 'history', 'before', 'previous', 'similar']
|
| 391 |
+
if any(term in ctx_text and term in text_lower for term in key_terms):
|
| 392 |
+
rag_used = True
|
| 393 |
+
break
|
| 394 |
+
|
| 395 |
+
if rag_used:
|
| 396 |
+
print(f" β
[RAG] LLM response appears to reference RAG context!")
|
| 397 |
+
rag_logger.info(f"β
RAG CONTEXT USED in commentary for {track.get('title')}")
|
| 398 |
+
else:
|
| 399 |
+
print(f" βΉοΈ [RAG] LLM response doesn't explicitly reference context (may still be influenced)")
|
| 400 |
+
|
| 401 |
llm_logger.info(
|
| 402 |
+
"MUSIC | model=%s | rag_used=%s | prompt_length=%d | response=%r",
|
| 403 |
self.config.nebius_model,
|
| 404 |
+
rag_used,
|
| 405 |
+
len(prompt),
|
| 406 |
text[:1000],
|
| 407 |
)
|
| 408 |
return text
|
|
|
|
| 577 |
else:
|
| 578 |
return "evening"
|
| 579 |
|
| 580 |
+
def execute_segment(self, segment: Dict[str, Any], user_id: str = None) -> Dict[str, Any]:
|
| 581 |
"""Execute a planned segment and log to RAG"""
|
| 582 |
self.current_segment = segment
|
| 583 |
self.segment_history.append(segment)
|
| 584 |
|
| 585 |
+
# Use stored user_id if not provided
|
| 586 |
+
if not user_id:
|
| 587 |
+
user_id = self.current_user_id
|
| 588 |
+
|
| 589 |
+
# Store in RAG system with user_id
|
| 590 |
self.rag_system.store_listening_history(
|
| 591 |
item_type=segment['type'],
|
| 592 |
+
item_data=segment,
|
| 593 |
+
user_id=user_id
|
| 594 |
)
|
| 595 |
|
| 596 |
return segment
|
|
|
|
| 621 |
elif seg_type == "music":
|
| 622 |
# Lazy fetch track if not yet fetched
|
| 623 |
if not segment.get("track"):
|
| 624 |
+
# Use RAG to get personalized recommendations
|
| 625 |
genres = prefs.get('favorite_genres', ['pop'])
|
| 626 |
mood = prefs.get('mood', 'happy')
|
| 627 |
+
|
| 628 |
+
# Use stored user_id if available
|
| 629 |
+
user_id = prefs.get('_user_id') or self.current_user_id
|
| 630 |
+
|
| 631 |
+
# Query RAG for music preferences based on listening history
|
| 632 |
+
rag_query = f"What music genres and styles does the user prefer? Favorite genres: {', '.join(genres)}, Mood: {mood}"
|
| 633 |
+
print(f"\nπ΅ [RAG] Querying for genre recommendations for user {user_id}...")
|
| 634 |
+
rag_context = self.rag_system.query_user_context(rag_query, user_id=user_id, top_k=3)
|
| 635 |
+
|
| 636 |
+
original_genres = genres.copy()
|
| 637 |
+
# Extract genre suggestions from RAG if available
|
| 638 |
+
if rag_context:
|
| 639 |
+
print(f"π΅ [RAG] Analyzing {len(rag_context)} context documents for genre suggestions...")
|
| 640 |
+
for i, ctx in enumerate(rag_context):
|
| 641 |
+
ctx_text = ctx.get('text', '').lower()
|
| 642 |
+
score = ctx.get('score', 0)
|
| 643 |
+
# Look for genre mentions in context
|
| 644 |
+
for genre in ['pop', 'rock', 'jazz', 'classical', 'electronic', 'hip-hop', 'country', 'indie', 'rap', 'blues', 'folk', 'metal', 'reggae', 'soul', 'r&b']:
|
| 645 |
+
if genre in ctx_text and genre not in genres:
|
| 646 |
+
genres.append(genre)
|
| 647 |
+
print(f" β
[RAG] Suggested genre from context #{i+1} (score: {score:.4f}): {genre}")
|
| 648 |
+
else:
|
| 649 |
+
print(f" βΉοΈ [RAG] No context found, using original genres: {original_genres}")
|
| 650 |
+
|
| 651 |
+
# Use RAG-enhanced genre selection
|
| 652 |
+
selected_genre = random.choice(genres) if genres else 'pop'
|
| 653 |
+
if selected_genre not in original_genres:
|
| 654 |
+
print(f"π΅ [RAG] Using RAG-suggested genre: {selected_genre} (was: {original_genres})")
|
| 655 |
+
else:
|
| 656 |
+
print(f"π΅ [RAG] Using genre: {selected_genre} (from preferences)")
|
| 657 |
+
|
| 658 |
tracks = self.music_server.search_free_music(
|
| 659 |
+
genre=selected_genre,
|
| 660 |
mood=mood,
|
| 661 |
limit=1
|
| 662 |
)
|
|
|
|
| 664 |
|
| 665 |
track = segment.get("track")
|
| 666 |
if track and not segment.get("commentary"):
|
| 667 |
+
# Use stored user_id if available
|
| 668 |
+
user_id = prefs.get('_user_id') or self.current_user_id
|
| 669 |
+
|
| 670 |
+
# Use RAG context for more personalized commentary
|
| 671 |
+
rag_query = f"User's music listening history and preferences for {track.get('title', 'this track')} by {track.get('artist', 'this artist')}"
|
| 672 |
+
print(f"\nπ€ [RAG] Querying for commentary context for user {user_id}: {track.get('title')} by {track.get('artist')}")
|
| 673 |
+
rag_context = self.rag_system.query_user_context(rag_query, user_id=user_id, top_k=2)
|
| 674 |
+
|
| 675 |
+
if rag_context:
|
| 676 |
+
print(f"π€ [RAG] Using {len(rag_context)} context documents for personalized commentary")
|
| 677 |
+
for i, ctx in enumerate(rag_context):
|
| 678 |
+
score = ctx.get('score', 0)
|
| 679 |
+
preview = ctx.get('text', '')[:80].replace('\n', ' ')
|
| 680 |
+
print(f" π Context #{i+1} (score: {score:.4f}): {preview}...")
|
| 681 |
+
else:
|
| 682 |
+
print(f" βΉοΈ [RAG] No context found, generating standard commentary")
|
| 683 |
+
|
| 684 |
+
segment["commentary"] = self._generate_music_commentary(track, prefs, rag_context=rag_context)
|
| 685 |
|
| 686 |
elif seg_type == "news":
|
| 687 |
# Lazy fetch news if not yet fetched
|
src/rag_system.py
CHANGED
|
@@ -1,15 +1,29 @@
|
|
| 1 |
"""RAG System for User Preferences and History using LlamaIndex"""
|
| 2 |
import json
|
| 3 |
import os
|
|
|
|
| 4 |
from typing import Dict, Any, List
|
| 5 |
from datetime import datetime
|
| 6 |
from llama_index.core import VectorStoreIndex, Document, Settings
|
| 7 |
from llama_index.core.storage.storage_context import StorageContext
|
| 8 |
from llama_index.core.vector_stores import SimpleVectorStore
|
|
|
|
| 9 |
from llama_index.llms.openai import OpenAI as LlamaOpenAI
|
|
|
|
| 10 |
|
| 11 |
# Get project root directory (parent of src/)
|
| 12 |
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
class RadioRAGSystem:
|
| 15 |
"""RAG system for storing and retrieving user preferences and listening history"""
|
|
@@ -32,19 +46,88 @@ class RadioRAGSystem:
|
|
| 32 |
temperature=0.7
|
| 33 |
)
|
| 34 |
self.llm_available = True
|
|
|
|
| 35 |
except Exception as e:
|
| 36 |
print(f"Warning: Could not initialize Nebius/OpenAI LLM: {e}")
|
| 37 |
print("RAG system will work in fallback mode without LLM features")
|
| 38 |
self.llm_available = False
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
#
|
| 42 |
-
self.embedding_available = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
# Initialize vector store
|
| 45 |
self.vector_store = SimpleVectorStore()
|
| 46 |
self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
# Load existing index or create new one
|
| 49 |
self.index = None
|
| 50 |
self.documents = []
|
|
@@ -53,44 +136,188 @@ class RadioRAGSystem:
|
|
| 53 |
self._load_user_data()
|
| 54 |
|
| 55 |
def _load_user_data(self):
|
| 56 |
-
"""Load user data from file"""
|
| 57 |
if os.path.exists(self.user_data_file):
|
| 58 |
try:
|
| 59 |
with open(self.user_data_file, 'r') as f:
|
| 60 |
data = json.load(f)
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
)
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
except Exception as e:
|
|
|
|
| 72 |
print(f"Error loading user data: {e}")
|
| 73 |
|
| 74 |
def _save_user_data(self):
|
| 75 |
"""Save user data to file"""
|
| 76 |
try:
|
| 77 |
-
data = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
with open(self.user_data_file, 'w') as f:
|
| 79 |
json.dump(data, f, indent=2)
|
|
|
|
| 80 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
| 81 |
print(f"Error saving user data: {e}")
|
| 82 |
|
| 83 |
-
def store_user_preferences(self, preferences: Dict[str, Any]):
|
| 84 |
-
"""Store user preferences in RAG system"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
pref_doc = {
|
| 86 |
"type": "preferences",
|
|
|
|
| 87 |
"timestamp": datetime.now().isoformat(),
|
| 88 |
"data": preferences
|
| 89 |
}
|
| 90 |
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
self.documents.append(doc)
|
| 93 |
|
|
|
|
|
|
|
|
|
|
| 94 |
# Rebuild index if embeddings are available
|
| 95 |
if self.embedding_available:
|
| 96 |
try:
|
|
@@ -98,68 +325,258 @@ class RadioRAGSystem:
|
|
| 98 |
self.documents,
|
| 99 |
storage_context=self.storage_context
|
| 100 |
)
|
|
|
|
|
|
|
| 101 |
except Exception as e:
|
|
|
|
| 102 |
print(f"Warning: Could not rebuild index: {e}")
|
| 103 |
self.index = None
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
self._save_user_data()
|
| 106 |
|
| 107 |
-
def store_listening_history(self, item_type: str, item_data: Dict[str, Any], user_feedback: str = None):
|
| 108 |
-
"""Store listening history with optional feedback"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
history_doc = {
|
| 110 |
"type": "history",
|
|
|
|
| 111 |
"item_type": item_type, # music, news, podcast, story
|
| 112 |
"timestamp": datetime.now().isoformat(),
|
| 113 |
"data": item_data,
|
| 114 |
"feedback": user_feedback
|
| 115 |
}
|
| 116 |
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
self.documents.append(doc)
|
| 119 |
|
| 120 |
-
# Rebuild index if embeddings are available
|
| 121 |
-
if
|
|
|
|
| 122 |
try:
|
| 123 |
self.index = VectorStoreIndex.from_documents(
|
| 124 |
self.documents,
|
| 125 |
storage_context=self.storage_context
|
| 126 |
)
|
|
|
|
|
|
|
|
|
|
| 127 |
except Exception as e:
|
|
|
|
| 128 |
print(f"Warning: Could not rebuild index: {e}")
|
| 129 |
self.index = None
|
| 130 |
|
| 131 |
self._save_user_data()
|
| 132 |
|
| 133 |
-
def get_user_preferences(self) -> Dict[str, Any]:
|
| 134 |
-
"""Retrieve latest user preferences"""
|
| 135 |
preferences = {}
|
| 136 |
for doc in reversed(self.documents):
|
| 137 |
try:
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
continue
|
| 144 |
|
|
|
|
|
|
|
|
|
|
| 145 |
return preferences
|
| 146 |
|
| 147 |
-
def get_recommendations(self, query: str) -> Dict[str, Any]:
|
| 148 |
-
"""Get personalized recommendations based on user history and preferences"""
|
| 149 |
if not self.index or not self.llm_available:
|
| 150 |
-
|
|
|
|
|
|
|
| 151 |
|
| 152 |
try:
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
response = query_engine.query(query)
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
except Exception as e:
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
def _get_default_recommendations(self) -> Dict[str, Any]:
|
| 165 |
"""Return default recommendations when RAG is not available"""
|
|
|
|
| 1 |
"""RAG System for User Preferences and History using LlamaIndex"""
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
+
import logging
|
| 5 |
from typing import Dict, Any, List
|
| 6 |
from datetime import datetime
|
| 7 |
from llama_index.core import VectorStoreIndex, Document, Settings
|
| 8 |
from llama_index.core.storage.storage_context import StorageContext
|
| 9 |
from llama_index.core.vector_stores import SimpleVectorStore
|
| 10 |
+
from llama_index.core.node_parser import SimpleNodeParser
|
| 11 |
from llama_index.llms.openai import OpenAI as LlamaOpenAI
|
| 12 |
+
from llama_index.embeddings.openai import OpenAIEmbedding
|
| 13 |
|
| 14 |
# Get project root directory (parent of src/)
|
| 15 |
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 16 |
+
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
|
| 17 |
+
os.makedirs(LOG_DIR, exist_ok=True)
|
| 18 |
+
|
| 19 |
+
# Setup RAG logger
|
| 20 |
+
rag_logger = logging.getLogger("ai_radio.rag")
|
| 21 |
+
if not rag_logger.handlers:
|
| 22 |
+
rag_logger.setLevel(logging.INFO)
|
| 23 |
+
_fh = logging.FileHandler(os.path.join(LOG_DIR, "rag_system.log"), encoding="utf-8")
|
| 24 |
+
_fmt = logging.Formatter("%(asctime)s [RAG] %(levelname)s: %(message)s")
|
| 25 |
+
_fh.setFormatter(_fmt)
|
| 26 |
+
rag_logger.addHandler(_fh)
|
| 27 |
|
| 28 |
class RadioRAGSystem:
|
| 29 |
"""RAG system for storing and retrieving user preferences and listening history"""
|
|
|
|
| 46 |
temperature=0.7
|
| 47 |
)
|
| 48 |
self.llm_available = True
|
| 49 |
+
print("β
RAG LLM initialized (Nebius GPT-OSS-120B)")
|
| 50 |
except Exception as e:
|
| 51 |
print(f"Warning: Could not initialize Nebius/OpenAI LLM: {e}")
|
| 52 |
print("RAG system will work in fallback mode without LLM features")
|
| 53 |
self.llm_available = False
|
| 54 |
|
| 55 |
+
# Enable embeddings - try local model first, then OpenAI
|
| 56 |
+
# Note: Nebius doesn't support embeddings, so we use local or OpenAI
|
| 57 |
+
self.embedding_available = False
|
| 58 |
+
try:
|
| 59 |
+
# First, try local sentence-transformers (no API key needed)
|
| 60 |
+
try:
|
| 61 |
+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 62 |
+
Settings.embed_model = HuggingFaceEmbedding(
|
| 63 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
| 64 |
+
)
|
| 65 |
+
self.embedding_available = True
|
| 66 |
+
print("β
RAG Embeddings enabled (local sentence-transformers/all-MiniLM-L6-v2)")
|
| 67 |
+
rag_logger.info("β
Using local HuggingFace embeddings: sentence-transformers/all-MiniLM-L6-v2")
|
| 68 |
+
except ImportError:
|
| 69 |
+
# Fallback: Try OpenAI embeddings endpoint (requires OpenAI API key)
|
| 70 |
+
openai_key = os.environ.get("OPENAI_API_KEY")
|
| 71 |
+
if openai_key:
|
| 72 |
+
try:
|
| 73 |
+
Settings.embed_model = OpenAIEmbedding(
|
| 74 |
+
api_key=openai_key,
|
| 75 |
+
api_base="https://api.openai.com/v1",
|
| 76 |
+
model="text-embedding-3-small"
|
| 77 |
+
)
|
| 78 |
+
self.embedding_available = True
|
| 79 |
+
print("β
RAG Embeddings enabled (OpenAI text-embedding-3-small via OPENAI_API_KEY)")
|
| 80 |
+
rag_logger.info("β
Using OpenAI embeddings: text-embedding-3-small")
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"β οΈ OpenAI embeddings failed: {e}")
|
| 83 |
+
print("βΉοΈ Embeddings disabled. RAG will use fallback mode.")
|
| 84 |
+
rag_logger.warning(f"β οΈ OpenAI embeddings failed: {e}")
|
| 85 |
+
self.embedding_available = False
|
| 86 |
+
else:
|
| 87 |
+
print("βΉοΈ Embeddings disabled: No local model or OPENAI_API_KEY found.")
|
| 88 |
+
print(" Install: pip install sentence-transformers")
|
| 89 |
+
print(" Or set OPENAI_API_KEY environment variable.")
|
| 90 |
+
rag_logger.warning("β οΈ No embeddings available - install sentence-transformers or set OPENAI_API_KEY")
|
| 91 |
+
self.embedding_available = False
|
| 92 |
+
except Exception as e:
|
| 93 |
+
print(f"β οΈ Local embeddings failed: {e}")
|
| 94 |
+
# Try OpenAI as last resort
|
| 95 |
+
openai_key = os.environ.get("OPENAI_API_KEY")
|
| 96 |
+
if openai_key:
|
| 97 |
+
try:
|
| 98 |
+
Settings.embed_model = OpenAIEmbedding(
|
| 99 |
+
api_key=openai_key,
|
| 100 |
+
api_base="https://api.openai.com/v1",
|
| 101 |
+
model="text-embedding-ada-002" # Try older model
|
| 102 |
+
)
|
| 103 |
+
self.embedding_available = True
|
| 104 |
+
print("β
RAG Embeddings enabled (OpenAI text-embedding-ada-002)")
|
| 105 |
+
rag_logger.info("β
Using OpenAI embeddings: text-embedding-ada-002")
|
| 106 |
+
except:
|
| 107 |
+
print("βΉοΈ All embedding options failed. RAG will use fallback mode.")
|
| 108 |
+
rag_logger.warning("β οΈ All embedding options failed")
|
| 109 |
+
self.embedding_available = False
|
| 110 |
+
else:
|
| 111 |
+
print("βΉοΈ Embeddings disabled. RAG will use fallback mode.")
|
| 112 |
+
rag_logger.warning("β οΈ Embeddings disabled - no fallback available")
|
| 113 |
+
self.embedding_available = False
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"Warning: Could not initialize embeddings: {e}")
|
| 116 |
+
print("RAG will work without vector search (fallback mode)")
|
| 117 |
+
rag_logger.error(f"β Embedding initialization error: {e}")
|
| 118 |
+
self.embedding_available = False
|
| 119 |
|
| 120 |
# Initialize vector store
|
| 121 |
self.vector_store = SimpleVectorStore()
|
| 122 |
self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
|
| 123 |
|
| 124 |
+
# Configure chunk size to handle larger metadata
|
| 125 |
+
# Increase chunk size to 4096 to accommodate metadata (metadata can be up to 1438 chars)
|
| 126 |
+
Settings.chunk_size = 4096
|
| 127 |
+
Settings.chunk_overlap = 400
|
| 128 |
+
rag_logger.info(f"π RAG chunk size set to {Settings.chunk_size} (overlap: {Settings.chunk_overlap})")
|
| 129 |
+
print(f"π [RAG] Chunk size: {Settings.chunk_size}, Overlap: {Settings.chunk_overlap}")
|
| 130 |
+
|
| 131 |
# Load existing index or create new one
|
| 132 |
self.index = None
|
| 133 |
self.documents = []
|
|
|
|
| 136 |
self._load_user_data()
|
| 137 |
|
| 138 |
def _load_user_data(self):
|
| 139 |
+
"""Load user data from file and build vector index"""
|
| 140 |
if os.path.exists(self.user_data_file):
|
| 141 |
try:
|
| 142 |
with open(self.user_data_file, 'r') as f:
|
| 143 |
data = json.load(f)
|
| 144 |
+
|
| 145 |
+
# Convert old format (JSON in text) to new format (descriptive text + raw_data in metadata)
|
| 146 |
+
self.documents = []
|
| 147 |
+
for d in data:
|
| 148 |
+
try:
|
| 149 |
+
# Create descriptive text for RAG retrieval
|
| 150 |
+
if d.get("type") == "preferences":
|
| 151 |
+
prefs = d.get("data", {})
|
| 152 |
+
pref_text = f"""User Preferences:
|
| 153 |
+
Name: {prefs.get('name', 'Unknown')}
|
| 154 |
+
Favorite Genres: {', '.join(prefs.get('favorite_genres', []))}
|
| 155 |
+
Interests: {', '.join(prefs.get('interests', []))}
|
| 156 |
+
Podcast Interests: {', '.join(prefs.get('podcast_interests', []))}
|
| 157 |
+
Mood: {prefs.get('mood', 'neutral')}
|
| 158 |
+
Content Filter: Music={prefs.get('content_filter', {}).get('music', True)}, News={prefs.get('content_filter', {}).get('news', True)}, Podcasts={prefs.get('content_filter', {}).get('podcasts', True)}, Stories={prefs.get('content_filter', {}).get('stories', True)}
|
| 159 |
+
"""
|
| 160 |
+
doc = Document(
|
| 161 |
+
text=pref_text,
|
| 162 |
+
metadata={
|
| 163 |
+
"type": "preferences",
|
| 164 |
+
"user_id": d.get("user_id"),
|
| 165 |
+
"timestamp": d.get("timestamp", datetime.now().isoformat()),
|
| 166 |
+
# Removed raw_data to reduce metadata size - essential fields stored separately
|
| 167 |
+
}
|
| 168 |
)
|
| 169 |
+
elif d.get("type") == "history":
|
| 170 |
+
item_type = d.get("item_type", "")
|
| 171 |
+
item_data = d.get("data", {})
|
| 172 |
+
user_feedback = d.get("feedback")
|
| 173 |
+
|
| 174 |
+
if item_type == "music":
|
| 175 |
+
track = item_data.get("track", {})
|
| 176 |
+
history_text = f"""Music Listening History:
|
| 177 |
+
Title: {track.get('title', 'Unknown')}
|
| 178 |
+
Artist: {track.get('artist', 'Unknown')}
|
| 179 |
+
Genre: {track.get('genre', 'Unknown')}
|
| 180 |
+
Source: {track.get('source', 'Unknown')}
|
| 181 |
+
Feedback: {user_feedback or 'No feedback'}
|
| 182 |
+
"""
|
| 183 |
+
elif item_type == "news":
|
| 184 |
+
history_text = f"""News Listening History:
|
| 185 |
+
Items: {len(item_data.get('news_items', []))} news items
|
| 186 |
+
Topics: {', '.join([item.get('category', '') for item in item_data.get('news_items', [])[:3]])}
|
| 187 |
+
Feedback: {user_feedback or 'No feedback'}
|
| 188 |
+
"""
|
| 189 |
+
elif item_type == "podcast":
|
| 190 |
+
podcast = item_data.get("podcast", {})
|
| 191 |
+
history_text = f"""Podcast Listening History:
|
| 192 |
+
Title: {podcast.get('title', 'Unknown')}
|
| 193 |
+
Host: {podcast.get('host', 'Unknown')}
|
| 194 |
+
Category: {podcast.get('category', 'Unknown')}
|
| 195 |
+
Feedback: {user_feedback or 'No feedback'}
|
| 196 |
+
"""
|
| 197 |
+
else:
|
| 198 |
+
history_text = f"""Story Listening History:
|
| 199 |
+
Type: {item_type}
|
| 200 |
+
Feedback: {user_feedback or 'No feedback'}
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
+
# Store minimal metadata to avoid chunk size issues
|
| 204 |
+
doc = Document(
|
| 205 |
+
text=history_text,
|
| 206 |
+
metadata={
|
| 207 |
+
"type": "history",
|
| 208 |
+
"user_id": d.get("user_id"),
|
| 209 |
+
"item_type": item_type,
|
| 210 |
+
"timestamp": d.get("timestamp", datetime.now().isoformat()),
|
| 211 |
+
# Store only essential fields, not full raw_data
|
| 212 |
+
"feedback": user_feedback or ""
|
| 213 |
+
}
|
| 214 |
+
)
|
| 215 |
+
else:
|
| 216 |
+
# Unknown type, create basic document
|
| 217 |
+
doc = Document(
|
| 218 |
+
text=json.dumps(d),
|
| 219 |
+
metadata={
|
| 220 |
+
"type": d.get("type", "unknown"),
|
| 221 |
+
"user_id": d.get("user_id"),
|
| 222 |
+
"timestamp": d.get("timestamp", datetime.now().isoformat()),
|
| 223 |
+
# Removed raw_data to reduce metadata size - essential fields stored separately
|
| 224 |
+
}
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
self.documents.append(doc)
|
| 228 |
+
except Exception as e:
|
| 229 |
+
rag_logger.warning(f"β οΈ Skipping invalid document during load: {e}")
|
| 230 |
+
continue
|
| 231 |
+
|
| 232 |
+
rag_logger.info(f"π Loaded {len(self.documents)} documents from RAG storage")
|
| 233 |
+
print(f"π [RAG] Loaded {len(self.documents)} documents from storage")
|
| 234 |
+
|
| 235 |
+
# Build vector index if embeddings are available
|
| 236 |
+
if self.documents and self.embedding_available:
|
| 237 |
+
try:
|
| 238 |
+
rag_logger.info(f"π¨ Building vector index from {len(self.documents)} documents...")
|
| 239 |
+
print(f"π¨ [RAG] Building vector index from {len(self.documents)} documents...")
|
| 240 |
+
self.index = VectorStoreIndex.from_documents(
|
| 241 |
+
self.documents,
|
| 242 |
+
storage_context=self.storage_context
|
| 243 |
+
)
|
| 244 |
+
rag_logger.info(f"β
Vector index built successfully with {len(self.documents)} documents")
|
| 245 |
+
print(f"β
[RAG] Vector index built with {len(self.documents)} documents")
|
| 246 |
+
except Exception as e:
|
| 247 |
+
rag_logger.error(f"β Failed to build vector index: {e}")
|
| 248 |
+
print(f"Warning: Could not build vector index: {e}")
|
| 249 |
+
self.index = None
|
| 250 |
+
elif self.documents:
|
| 251 |
+
rag_logger.info(f"βΉοΈ {len(self.documents)} documents loaded but embeddings disabled - using fallback mode")
|
| 252 |
+
print(f"βΉοΈ [RAG] {len(self.documents)} documents loaded but embeddings disabled - using fallback mode")
|
| 253 |
except Exception as e:
|
| 254 |
+
rag_logger.error(f"β Error loading user data: {e}")
|
| 255 |
print(f"Error loading user data: {e}")
|
| 256 |
|
| 257 |
def _save_user_data(self):
|
| 258 |
"""Save user data to file"""
|
| 259 |
try:
|
| 260 |
+
data = []
|
| 261 |
+
for doc in self.documents:
|
| 262 |
+
try:
|
| 263 |
+
# Try to get raw_data from metadata first (new format)
|
| 264 |
+
raw_data = doc.metadata.get("raw_data")
|
| 265 |
+
if raw_data:
|
| 266 |
+
data.append(json.loads(raw_data))
|
| 267 |
+
else:
|
| 268 |
+
# Fallback: try to parse doc.text as JSON (old format)
|
| 269 |
+
data.append(json.loads(doc.text))
|
| 270 |
+
except (json.JSONDecodeError, KeyError) as e:
|
| 271 |
+
# Skip documents that can't be parsed
|
| 272 |
+
rag_logger.warning(f"β οΈ Skipping document that couldn't be parsed: {e}")
|
| 273 |
+
continue
|
| 274 |
+
|
| 275 |
with open(self.user_data_file, 'w') as f:
|
| 276 |
json.dump(data, f, indent=2)
|
| 277 |
+
rag_logger.info(f"πΎ Saved {len(data)} documents to {self.user_data_file}")
|
| 278 |
except Exception as e:
|
| 279 |
+
rag_logger.error(f"β Error saving user data: {e}")
|
| 280 |
+
import traceback
|
| 281 |
+
rag_logger.error(traceback.format_exc())
|
| 282 |
print(f"Error saving user data: {e}")
|
| 283 |
|
| 284 |
+
def store_user_preferences(self, preferences: Dict[str, Any], user_id: str = None):
|
| 285 |
+
"""Store user preferences in RAG system with user ID"""
|
| 286 |
+
if not user_id:
|
| 287 |
+
rag_logger.warning("β οΈ Storing preferences without user_id - data will not be user-specific")
|
| 288 |
+
print("β οΈ [RAG] Warning: Storing preferences without user_id")
|
| 289 |
+
|
| 290 |
pref_doc = {
|
| 291 |
"type": "preferences",
|
| 292 |
+
"user_id": user_id,
|
| 293 |
"timestamp": datetime.now().isoformat(),
|
| 294 |
"data": preferences
|
| 295 |
}
|
| 296 |
|
| 297 |
+
# Create a more descriptive document for better RAG retrieval
|
| 298 |
+
pref_text = f"""User Preferences:
|
| 299 |
+
Name: {preferences.get('name', 'Unknown')}
|
| 300 |
+
Favorite Genres: {', '.join(preferences.get('favorite_genres', []))}
|
| 301 |
+
Interests: {', '.join(preferences.get('interests', []))}
|
| 302 |
+
Podcast Interests: {', '.join(preferences.get('podcast_interests', []))}
|
| 303 |
+
Mood: {preferences.get('mood', 'neutral')}
|
| 304 |
+
Content Filter: Music={preferences.get('content_filter', {}).get('music', True)}, News={preferences.get('content_filter', {}).get('news', True)}, Podcasts={preferences.get('content_filter', {}).get('podcasts', True)}, Stories={preferences.get('content_filter', {}).get('stories', True)}
|
| 305 |
+
"""
|
| 306 |
+
|
| 307 |
+
doc = Document(
|
| 308 |
+
text=pref_text,
|
| 309 |
+
metadata={
|
| 310 |
+
"type": "preferences",
|
| 311 |
+
"user_id": user_id,
|
| 312 |
+
"timestamp": datetime.now().isoformat(),
|
| 313 |
+
"raw_data": json.dumps(pref_doc)
|
| 314 |
+
}
|
| 315 |
+
)
|
| 316 |
self.documents.append(doc)
|
| 317 |
|
| 318 |
+
rag_logger.info(f"π STORING PREFERENCES: user_id={user_id}, Name={preferences.get('name')}, Genres={preferences.get('favorite_genres')}, Mood={preferences.get('mood')}")
|
| 319 |
+
print(f"π [RAG] Storing preferences for user {user_id} ({preferences.get('name', 'user')})")
|
| 320 |
+
|
| 321 |
# Rebuild index if embeddings are available
|
| 322 |
if self.embedding_available:
|
| 323 |
try:
|
|
|
|
| 325 |
self.documents,
|
| 326 |
storage_context=self.storage_context
|
| 327 |
)
|
| 328 |
+
rag_logger.info(f"β
Vector index rebuilt with {len(self.documents)} documents (embeddings enabled)")
|
| 329 |
+
print(f"β
[RAG] Index updated with {len(self.documents)} documents")
|
| 330 |
except Exception as e:
|
| 331 |
+
rag_logger.error(f"β Failed to rebuild index: {e}")
|
| 332 |
print(f"Warning: Could not rebuild index: {e}")
|
| 333 |
self.index = None
|
| 334 |
+
else:
|
| 335 |
+
rag_logger.info(f"βΉοΈ Preferences stored (embeddings disabled, {len(self.documents)} total documents)")
|
| 336 |
+
print(f"βΉοΈ [RAG] Preferences stored (embeddings disabled)")
|
| 337 |
|
| 338 |
self._save_user_data()
|
| 339 |
|
| 340 |
+
def store_listening_history(self, item_type: str, item_data: Dict[str, Any], user_id: str = None, user_feedback: str = None):
|
| 341 |
+
"""Store listening history with optional feedback and user ID"""
|
| 342 |
+
if not user_id:
|
| 343 |
+
rag_logger.warning(f"β οΈ Storing {item_type} history without user_id - data will not be user-specific")
|
| 344 |
+
print(f"β οΈ [RAG] Warning: Storing {item_type} history without user_id")
|
| 345 |
+
|
| 346 |
history_doc = {
|
| 347 |
"type": "history",
|
| 348 |
+
"user_id": user_id,
|
| 349 |
"item_type": item_type, # music, news, podcast, story
|
| 350 |
"timestamp": datetime.now().isoformat(),
|
| 351 |
"data": item_data,
|
| 352 |
"feedback": user_feedback
|
| 353 |
}
|
| 354 |
|
| 355 |
+
# Create descriptive text for better RAG retrieval
|
| 356 |
+
if item_type == "music":
|
| 357 |
+
track = item_data.get("track", {})
|
| 358 |
+
history_text = f"""Music Listening History:
|
| 359 |
+
Title: {track.get('title', 'Unknown')}
|
| 360 |
+
Artist: {track.get('artist', 'Unknown')}
|
| 361 |
+
Genre: {track.get('genre', 'Unknown')}
|
| 362 |
+
Source: {track.get('source', 'Unknown')}
|
| 363 |
+
Feedback: {user_feedback or 'No feedback'}
|
| 364 |
+
"""
|
| 365 |
+
rag_logger.info(f"π΅ STORING MUSIC HISTORY: user_id={user_id}, {track.get('title', 'Unknown')} by {track.get('artist', 'Unknown')} ({track.get('genre', 'Unknown')}) - Feedback: {user_feedback or 'None'}")
|
| 366 |
+
print(f"π΅ [RAG] Storing music for user {user_id}: {track.get('title', 'Unknown')} by {track.get('artist', 'Unknown')}")
|
| 367 |
+
elif item_type == "news":
|
| 368 |
+
history_text = f"""News Listening History:
|
| 369 |
+
Items: {len(item_data.get('news_items', []))} news items
|
| 370 |
+
Topics: {', '.join([item.get('category', '') for item in item_data.get('news_items', [])[:3]])}
|
| 371 |
+
Feedback: {user_feedback or 'No feedback'}
|
| 372 |
+
"""
|
| 373 |
+
rag_logger.info(f"π° STORING NEWS HISTORY: user_id={user_id}, {len(item_data.get('news_items', []))} items - Feedback: {user_feedback or 'None'}")
|
| 374 |
+
print(f"π° [RAG] Storing news history for user {user_id}: {len(item_data.get('news_items', []))} items")
|
| 375 |
+
elif item_type == "podcast":
|
| 376 |
+
podcast = item_data.get("podcast", {})
|
| 377 |
+
history_text = f"""Podcast Listening History:
|
| 378 |
+
Title: {podcast.get('title', 'Unknown')}
|
| 379 |
+
Host: {podcast.get('host', 'Unknown')}
|
| 380 |
+
Category: {podcast.get('category', 'Unknown')}
|
| 381 |
+
Feedback: {user_feedback or 'No feedback'}
|
| 382 |
+
"""
|
| 383 |
+
rag_logger.info(f"ποΈ STORING PODCAST HISTORY: user_id={user_id}, {podcast.get('title', 'Unknown')} - Feedback: {user_feedback or 'None'}")
|
| 384 |
+
print(f"ποΈ [RAG] Storing podcast for user {user_id}: {podcast.get('title', 'Unknown')}")
|
| 385 |
+
else:
|
| 386 |
+
history_text = f"""Story Listening History:
|
| 387 |
+
Type: {item_type}
|
| 388 |
+
Feedback: {user_feedback or 'No feedback'}
|
| 389 |
+
"""
|
| 390 |
+
rag_logger.info(f"π STORING STORY HISTORY: user_id={user_id}, {item_type} - Feedback: {user_feedback or 'None'}")
|
| 391 |
+
print(f"π [RAG] Storing story history for user {user_id}: {item_type}")
|
| 392 |
+
|
| 393 |
+
doc = Document(
|
| 394 |
+
text=history_text,
|
| 395 |
+
metadata={
|
| 396 |
+
"type": "history",
|
| 397 |
+
"user_id": user_id,
|
| 398 |
+
"item_type": item_type,
|
| 399 |
+
"timestamp": datetime.now().isoformat(),
|
| 400 |
+
"raw_data": json.dumps(history_doc)
|
| 401 |
+
}
|
| 402 |
+
)
|
| 403 |
self.documents.append(doc)
|
| 404 |
|
| 405 |
+
# Rebuild index if embeddings are available (but only periodically to avoid too many rebuilds)
|
| 406 |
+
# Rebuild every 5 documents or if index doesn't exist
|
| 407 |
+
if self.embedding_available and (not self.index or len(self.documents) % 5 == 0):
|
| 408 |
try:
|
| 409 |
self.index = VectorStoreIndex.from_documents(
|
| 410 |
self.documents,
|
| 411 |
storage_context=self.storage_context
|
| 412 |
)
|
| 413 |
+
if len(self.documents) % 5 == 0:
|
| 414 |
+
rag_logger.info(f"β
Vector index rebuilt (total documents: {len(self.documents)})")
|
| 415 |
+
print(f"β
[RAG] Index updated (total documents: {len(self.documents)})")
|
| 416 |
except Exception as e:
|
| 417 |
+
rag_logger.error(f"β Failed to rebuild index: {e}")
|
| 418 |
print(f"Warning: Could not rebuild index: {e}")
|
| 419 |
self.index = None
|
| 420 |
|
| 421 |
self._save_user_data()
|
| 422 |
|
| 423 |
+
def get_user_preferences(self, user_id: str = None) -> Dict[str, Any]:
|
| 424 |
+
"""Retrieve latest user preferences for a specific user"""
|
| 425 |
preferences = {}
|
| 426 |
for doc in reversed(self.documents):
|
| 427 |
try:
|
| 428 |
+
# Check metadata first (new format with user_id)
|
| 429 |
+
doc_user_id = doc.metadata.get("user_id")
|
| 430 |
+
if user_id and doc_user_id != user_id:
|
| 431 |
+
continue # Skip documents from other users
|
| 432 |
+
|
| 433 |
+
# Try to parse from metadata raw_data (new format)
|
| 434 |
+
raw_data = doc.metadata.get("raw_data")
|
| 435 |
+
if raw_data:
|
| 436 |
+
data = json.loads(raw_data)
|
| 437 |
+
if data.get("type") == "preferences":
|
| 438 |
+
if not user_id or data.get("user_id") == user_id:
|
| 439 |
+
preferences = data.get("data", {})
|
| 440 |
+
break
|
| 441 |
+
else:
|
| 442 |
+
# Fallback: try to parse doc.text as JSON (old format)
|
| 443 |
+
data = json.loads(doc.text)
|
| 444 |
+
if data.get("type") == "preferences":
|
| 445 |
+
if not user_id or data.get("user_id") == user_id:
|
| 446 |
+
preferences = data.get("data", {})
|
| 447 |
+
break
|
| 448 |
+
except (json.JSONDecodeError, KeyError, AttributeError) as e:
|
| 449 |
+
rag_logger.debug(f"Skipping document in get_user_preferences: {e}")
|
| 450 |
continue
|
| 451 |
|
| 452 |
+
if user_id and not preferences:
|
| 453 |
+
rag_logger.warning(f"β οΈ No preferences found for user_id={user_id}")
|
| 454 |
+
|
| 455 |
return preferences
|
| 456 |
|
| 457 |
+
def get_recommendations(self, query: str, user_id: str = None) -> Dict[str, Any]:
|
| 458 |
+
"""Get personalized recommendations based on user history and preferences using RAG"""
|
| 459 |
if not self.index or not self.llm_available:
|
| 460 |
+
rag_logger.warning(f"β οΈ RAG RECOMMENDATIONS UNAVAILABLE: user_id={user_id}, query='{query}' (no index or LLM)")
|
| 461 |
+
print("βΉοΈ RAG recommendations unavailable (no index or LLM) - using defaults")
|
| 462 |
+
return self._get_default_recommendations(user_id=user_id)
|
| 463 |
|
| 464 |
try:
|
| 465 |
+
rag_logger.info(f"π RAG RECOMMENDATIONS QUERY: '{query}'")
|
| 466 |
+
print(f"π [RAG] Getting recommendations for: '{query[:60]}...'")
|
| 467 |
+
|
| 468 |
+
# Use RAG to query user history and preferences
|
| 469 |
+
query_engine = self.index.as_query_engine(
|
| 470 |
+
similarity_top_k=5, # Get top 5 relevant documents
|
| 471 |
+
response_mode="compact" # Compact response
|
| 472 |
+
)
|
| 473 |
response = query_engine.query(query)
|
| 474 |
|
| 475 |
+
response_text = str(response)
|
| 476 |
+
rag_logger.info(f"β
RAG RECOMMENDATIONS RESPONSE: {response_text[:200]}...")
|
| 477 |
+
print(f"β
[RAG] LLM generated recommendations: {response_text[:150]}...")
|
| 478 |
+
|
| 479 |
+
# Extract recommendations from RAG response
|
| 480 |
+
recommendations = {
|
| 481 |
+
"recommendations": response_text,
|
| 482 |
+
"source": "RAG",
|
| 483 |
+
"query": query
|
| 484 |
}
|
| 485 |
+
|
| 486 |
+
# Also try to extract structured data from response
|
| 487 |
+
response_lower = response_text.lower()
|
| 488 |
+
if "genre" in response_lower or "music" in response_lower:
|
| 489 |
+
# Try to extract genre preferences
|
| 490 |
+
for genre in ["pop", "rock", "jazz", "classical", "electronic", "hip-hop", "country", "indie", "rap", "blues", "folk"]:
|
| 491 |
+
if genre in response_lower:
|
| 492 |
+
recommendations.setdefault("suggested_genres", []).append(genre)
|
| 493 |
+
rag_logger.info(f" π΅ Extracted genre from RAG: {genre}")
|
| 494 |
+
|
| 495 |
+
return recommendations
|
| 496 |
+
except Exception as e:
|
| 497 |
+
rag_logger.error(f"β RAG RECOMMENDATIONS ERROR: {e}")
|
| 498 |
+
import traceback
|
| 499 |
+
rag_logger.error(traceback.format_exc())
|
| 500 |
+
print(f"Error getting RAG recommendations: {e}")
|
| 501 |
+
traceback.print_exc()
|
| 502 |
+
return self._get_default_recommendations(user_id=user_id)
|
| 503 |
+
|
| 504 |
+
def query_user_context(self, query: str, user_id: str = None, top_k: int = 3) -> List[Dict[str, Any]]:
|
| 505 |
+
"""Query user context using vector search - returns relevant documents filtered by user_id"""
|
| 506 |
+
if not self.index or not self.embedding_available:
|
| 507 |
+
rag_logger.warning(f"β οΈ RAG QUERY SKIPPED (no index/embeddings): '{query}'")
|
| 508 |
+
print(f"β οΈ [RAG] Query skipped - embeddings not available")
|
| 509 |
+
return []
|
| 510 |
+
|
| 511 |
+
try:
|
| 512 |
+
rag_logger.info(f"π RAG QUERY: user_id={user_id}, query='{query}' (top_k={top_k})")
|
| 513 |
+
print(f"π [RAG] Querying for user {user_id}: '{query[:60]}...'")
|
| 514 |
+
|
| 515 |
+
# Retrieve more documents than needed, then filter by user_id
|
| 516 |
+
# This ensures we get top_k results for the specific user
|
| 517 |
+
retrieve_count = top_k * 3 if user_id else top_k # Get more if filtering
|
| 518 |
+
retriever = self.index.as_retriever(similarity_top_k=retrieve_count)
|
| 519 |
+
nodes = retriever.retrieve(query)
|
| 520 |
+
|
| 521 |
+
results = []
|
| 522 |
+
for i, node in enumerate(nodes):
|
| 523 |
+
try:
|
| 524 |
+
# Filter by user_id if provided
|
| 525 |
+
node_user_id = node.metadata.get("user_id")
|
| 526 |
+
if user_id and node_user_id != user_id:
|
| 527 |
+
continue # Skip documents from other users
|
| 528 |
+
|
| 529 |
+
score = node.score if hasattr(node, 'score') else None
|
| 530 |
+
node_type = node.metadata.get("type", "unknown")
|
| 531 |
+
item_type = node.metadata.get("item_type", "")
|
| 532 |
+
|
| 533 |
+
data = json.loads(node.metadata.get("raw_data", "{}"))
|
| 534 |
+
result = {
|
| 535 |
+
"text": node.text,
|
| 536 |
+
"score": score,
|
| 537 |
+
"metadata": node.metadata,
|
| 538 |
+
"data": data
|
| 539 |
+
}
|
| 540 |
+
results.append(result)
|
| 541 |
+
|
| 542 |
+
# Log each retrieved document
|
| 543 |
+
preview = node.text[:100].replace('\n', ' ')
|
| 544 |
+
rag_logger.info(f" π Retrieved #{len(results)}: user_id={node_user_id}, type={node_type}, item_type={item_type}, score={score:.4f if score else 'N/A'}, preview='{preview}...'")
|
| 545 |
+
print(f" π [RAG] Retrieved #{len(results)}: {node_type} (user: {node_user_id}, score: {score:.4f if score else 'N/A'}) - {preview}...")
|
| 546 |
+
|
| 547 |
+
# Stop if we have enough results for this user
|
| 548 |
+
if len(results) >= top_k:
|
| 549 |
+
break
|
| 550 |
+
|
| 551 |
+
except Exception as parse_error:
|
| 552 |
+
# Still check user_id even if parsing fails
|
| 553 |
+
node_user_id = node.metadata.get("user_id")
|
| 554 |
+
if user_id and node_user_id != user_id:
|
| 555 |
+
continue
|
| 556 |
+
|
| 557 |
+
score = node.score if hasattr(node, 'score') else None
|
| 558 |
+
result = {
|
| 559 |
+
"text": node.text,
|
| 560 |
+
"score": score,
|
| 561 |
+
"metadata": node.metadata
|
| 562 |
+
}
|
| 563 |
+
results.append(result)
|
| 564 |
+
rag_logger.warning(f" β οΈ Retrieved #{len(results)} (parse error): user_id={node_user_id}, score={score}, text_preview='{node.text[:50]}...'")
|
| 565 |
+
print(f" β οΈ [RAG] Retrieved #{len(results)} (parse error, user: {node_user_id}, score: {score:.4f if score else 'N/A'})")
|
| 566 |
+
|
| 567 |
+
if len(results) >= top_k:
|
| 568 |
+
break
|
| 569 |
+
|
| 570 |
+
rag_logger.info(f"β
RAG QUERY COMPLETE: Retrieved {len(results)} documents for user_id={user_id}")
|
| 571 |
+
print(f"β
[RAG] Query complete: {len(results)} documents retrieved for user {user_id}")
|
| 572 |
+
|
| 573 |
+
return results
|
| 574 |
except Exception as e:
|
| 575 |
+
rag_logger.error(f"β RAG QUERY ERROR: {e}")
|
| 576 |
+
import traceback
|
| 577 |
+
rag_logger.error(traceback.format_exc())
|
| 578 |
+
print(f"β [RAG] Query error: {e}")
|
| 579 |
+
return []
|
| 580 |
|
| 581 |
def _get_default_recommendations(self) -> Dict[str, Any]:
|
| 582 |
"""Return default recommendations when RAG is not available"""
|