SulmanK commited on
Commit
f9db034
·
1 Parent(s): 6489f49

Phase 2.1: Core Agent Infrastructure Complete - PlannerAgent with strategic planning, Pydantic models, BaseAgent class, comprehensive tests (11/11 passing, 90% coverage)

Browse files
pyproject.toml CHANGED
@@ -139,4 +139,11 @@ select = [
139
  ignore = [
140
  "E501", # line too long, handled by black
141
  "B008", # do not perform function calls in argument defaults
142
- ]
 
 
 
 
 
 
 
 
139
  ignore = [
140
  "E501", # line too long, handled by black
141
  "B008", # do not perform function calls in argument defaults
142
+ ]
143
+
144
+ [dependency-groups]
145
+ dev = [
146
+ "pytest>=8.3.5",
147
+ "pytest-asyncio>=0.26.0",
148
+ "pytest-cov>=6.1.1",
149
+ ]
src/__init__.py CHANGED
@@ -1,10 +1,8 @@
1
  """
2
  BeatDebate - Multi-Agent Music Recommendation System
3
 
4
- A sophisticated music recommendation system using 4 specialized AI agents
5
- that demonstrate advanced agentic planning behavior for music discovery.
6
-
7
- Built for the AgentX competition.
8
  """
9
 
10
  __version__ = "0.1.0"
 
1
  """
2
  BeatDebate - Multi-Agent Music Recommendation System
3
 
4
+ A sophisticated music recommendation system using strategic planning
5
+ and multi-agent coordination for the AgentX competition.
 
 
6
  """
7
 
8
  __version__ = "0.1.0"
src/agents/__init__.py CHANGED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agents Package for BeatDebate Multi-Agent System
3
+
4
+ Contains all agent implementations for the music recommendation workflow.
5
+ """
6
+
7
+ from .base_agent import BaseAgent
8
+ from .planner_agent import PlannerAgent
9
+
10
+ __all__ = [
11
+ "BaseAgent",
12
+ "PlannerAgent",
13
+ ]
src/agents/base_agent.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Base Agent Class for BeatDebate Multi-Agent Music Recommendation System
3
+
4
+ Provides common functionality for all agents including LLM integration,
5
+ logging, error handling, and reasoning chain management.
6
+ """
7
+
8
+ import asyncio
9
+ import time
10
+ from abc import ABC, abstractmethod
11
+ from typing import Dict, List, Any, Optional
12
+ import structlog
13
+ from datetime import datetime
14
+
15
+ from ..models.agent_models import (
16
+ MusicRecommenderState,
17
+ AgentDeliberation,
18
+ ReasoningChain,
19
+ AgentConfig
20
+ )
21
+
22
+ logger = structlog.get_logger(__name__)
23
+
24
+
25
+ class BaseAgent(ABC):
26
+ """
27
+ Base class for all agents in the BeatDebate system.
28
+
29
+ Provides common functionality:
30
+ - LLM integration with Gemini
31
+ - Reasoning chain management
32
+ - Error handling and logging
33
+ - Strategy processing utilities
34
+ - Performance monitoring
35
+ """
36
+
37
+ def __init__(self, config: AgentConfig):
38
+ """
39
+ Initialize base agent with configuration.
40
+
41
+ Args:
42
+ config: Agent configuration including LLM settings
43
+ """
44
+ self.config = config
45
+ self.agent_name = config.agent_name
46
+ self.agent_type = config.agent_type
47
+ self.logger = logger.bind(agent=self.agent_name)
48
+
49
+ # Initialize LLM client (will be set up in subclasses)
50
+ self.llm_client = None
51
+
52
+ # Performance tracking
53
+ self.processing_times: List[float] = []
54
+ self.success_count = 0
55
+ self.error_count = 0
56
+
57
+ self.logger.info(
58
+ "Agent initialized",
59
+ agent_type=self.agent_type,
60
+ llm_model=config.llm_model,
61
+ temperature=config.temperature
62
+ )
63
+
64
+ @abstractmethod
65
+ async def process(self, state: MusicRecommenderState) -> MusicRecommenderState:
66
+ """
67
+ Main processing method that each agent must implement.
68
+
69
+ Args:
70
+ state: Current state of the music recommendation workflow
71
+
72
+ Returns:
73
+ Updated state after agent processing
74
+ """
75
+ pass
76
+
77
+ async def execute_with_monitoring(self, state: MusicRecommenderState) -> MusicRecommenderState:
78
+ """
79
+ Execute agent processing with performance monitoring and error handling.
80
+
81
+ Args:
82
+ state: Current workflow state
83
+
84
+ Returns:
85
+ Updated state after processing
86
+ """
87
+ start_time = time.time()
88
+
89
+ try:
90
+ self.logger.info("Starting agent processing", user_query=state.user_query)
91
+
92
+ # Execute main processing
93
+ updated_state = await self._execute_with_timeout(state)
94
+
95
+ # Record successful execution
96
+ processing_time = time.time() - start_time
97
+ self.processing_times.append(processing_time)
98
+ self.success_count += 1
99
+
100
+ # Add deliberation record
101
+ deliberation = AgentDeliberation(
102
+ agent_name=self.agent_name,
103
+ timestamp=datetime.now(),
104
+ input_data={"user_query": state.user_query},
105
+ reasoning_steps=self._extract_reasoning_steps(updated_state),
106
+ output_data=self._extract_output_data(updated_state),
107
+ confidence=self._calculate_confidence(updated_state),
108
+ processing_time=processing_time
109
+ )
110
+
111
+ updated_state.agent_deliberations.append(deliberation.dict())
112
+
113
+ self.logger.info(
114
+ "Agent processing completed successfully",
115
+ processing_time=processing_time,
116
+ confidence=deliberation.confidence
117
+ )
118
+
119
+ return updated_state
120
+
121
+ except asyncio.TimeoutError:
122
+ self.error_count += 1
123
+ self.logger.error(
124
+ "Agent processing timed out",
125
+ timeout_seconds=self.config.timeout_seconds
126
+ )
127
+ # Return state with error information
128
+ return self._handle_timeout_error(state)
129
+
130
+ except Exception as e:
131
+ self.error_count += 1
132
+ self.logger.error(
133
+ "Agent processing failed",
134
+ error=str(e),
135
+ error_type=type(e).__name__
136
+ )
137
+ # Return state with error information
138
+ return self._handle_processing_error(state, e)
139
+
140
+ async def _execute_with_timeout(self, state: MusicRecommenderState) -> MusicRecommenderState:
141
+ """Execute processing with timeout."""
142
+ return await asyncio.wait_for(
143
+ self.process(state),
144
+ timeout=self.config.timeout_seconds
145
+ )
146
+
147
+ def _extract_reasoning_steps(self, state: MusicRecommenderState) -> List[str]:
148
+ """Extract reasoning steps from the updated state."""
149
+ # Get the most recent reasoning log entries added by this agent
150
+ if hasattr(self, '_reasoning_steps'):
151
+ return self._reasoning_steps
152
+ return ["Processing completed"]
153
+
154
+ def _extract_output_data(self, state: MusicRecommenderState) -> Dict[str, Any]:
155
+ """Extract output data specific to this agent."""
156
+ return {"status": "completed"}
157
+
158
+ def _calculate_confidence(self, state: MusicRecommenderState) -> float:
159
+ """Calculate confidence score for this agent's processing."""
160
+ # Default implementation - subclasses should override
161
+ return 0.8
162
+
163
+ def _handle_timeout_error(self, state: MusicRecommenderState) -> MusicRecommenderState:
164
+ """Handle timeout error by adding error information to state."""
165
+ error_msg = f"{self.agent_name} processing timed out after {self.config.timeout_seconds}s"
166
+ state.reasoning_log.append(f"ERROR: {error_msg}")
167
+ return state
168
+
169
+ def _handle_processing_error(self, state: MusicRecommenderState, error: Exception) -> MusicRecommenderState:
170
+ """Handle processing error by adding error information to state."""
171
+ error_msg = f"{self.agent_name} processing failed: {str(error)}"
172
+ state.reasoning_log.append(f"ERROR: {error_msg}")
173
+ return state
174
+
175
+ def add_reasoning_step(self, step: str, evidence: List[str] = None, confidence: float = 0.8):
176
+ """
177
+ Add a reasoning step for transparency.
178
+
179
+ Args:
180
+ step: Description of the reasoning step
181
+ evidence: Supporting evidence for this step
182
+ confidence: Confidence in this reasoning step
183
+ """
184
+ if not hasattr(self, '_reasoning_steps'):
185
+ self._reasoning_steps = []
186
+
187
+ self._reasoning_steps.append(step)
188
+
189
+ if evidence:
190
+ self._reasoning_steps.append(f"Evidence: {', '.join(evidence)}")
191
+
192
+ self.logger.debug(
193
+ "Reasoning step added",
194
+ step=step,
195
+ confidence=confidence
196
+ )
197
+
198
+ def log_strategy_application(self, strategy: Dict[str, Any], step: str):
199
+ """
200
+ Log how strategy is being applied.
201
+
202
+ Args:
203
+ strategy: Strategy object being applied
204
+ step: Description of strategy application step
205
+ """
206
+ self.logger.info(
207
+ "Applying strategy",
208
+ step=step,
209
+ strategy_keys=list(strategy.keys()) if strategy else []
210
+ )
211
+
212
+ async def call_llm(self, prompt: str, system_prompt: str = None) -> str:
213
+ """
214
+ Call LLM with proper error handling and logging.
215
+
216
+ Args:
217
+ prompt: User prompt for the LLM
218
+ system_prompt: System prompt (optional)
219
+
220
+ Returns:
221
+ LLM response text
222
+ """
223
+ if not self.llm_client:
224
+ raise RuntimeError(f"LLM client not initialized for {self.agent_name}")
225
+
226
+ try:
227
+ self.logger.debug(
228
+ "Calling LLM",
229
+ prompt_length=len(prompt),
230
+ model=self.config.llm_model
231
+ )
232
+
233
+ # This will be implemented by subclasses with actual LLM integration
234
+ response = await self._make_llm_call(prompt, system_prompt)
235
+
236
+ self.logger.debug(
237
+ "LLM response received",
238
+ response_length=len(response)
239
+ )
240
+
241
+ return response
242
+
243
+ except Exception as e:
244
+ self.logger.error(
245
+ "LLM call failed",
246
+ error=str(e),
247
+ prompt_length=len(prompt)
248
+ )
249
+ raise
250
+
251
+ async def _make_llm_call(self, prompt: str, system_prompt: str = None) -> str:
252
+ """
253
+ Make actual LLM call - to be implemented by subclasses.
254
+
255
+ Args:
256
+ prompt: User prompt
257
+ system_prompt: System prompt
258
+
259
+ Returns:
260
+ LLM response
261
+ """
262
+ raise NotImplementedError("Subclasses must implement _make_llm_call")
263
+
264
+ def get_performance_metrics(self) -> Dict[str, Any]:
265
+ """
266
+ Get performance metrics for this agent.
267
+
268
+ Returns:
269
+ Dictionary of performance metrics
270
+ """
271
+ avg_processing_time = (
272
+ sum(self.processing_times) / len(self.processing_times)
273
+ if self.processing_times else 0
274
+ )
275
+
276
+ total_requests = self.success_count + self.error_count
277
+ success_rate = self.success_count / total_requests if total_requests > 0 else 0
278
+
279
+ return {
280
+ "agent_name": self.agent_name,
281
+ "total_requests": total_requests,
282
+ "success_count": self.success_count,
283
+ "error_count": self.error_count,
284
+ "success_rate": success_rate,
285
+ "avg_processing_time": avg_processing_time,
286
+ "processing_times": self.processing_times[-10:] # Last 10 times
287
+ }
288
+
289
+ def validate_strategy(self, strategy: Dict[str, Any], required_keys: List[str]) -> bool:
290
+ """
291
+ Validate that strategy contains required keys.
292
+
293
+ Args:
294
+ strategy: Strategy object to validate
295
+ required_keys: List of required keys
296
+
297
+ Returns:
298
+ True if strategy is valid, False otherwise
299
+ """
300
+ if not strategy:
301
+ self.logger.warning("Strategy is None or empty")
302
+ return False
303
+
304
+ missing_keys = [key for key in required_keys if key not in strategy]
305
+ if missing_keys:
306
+ self.logger.warning(
307
+ "Strategy missing required keys",
308
+ missing_keys=missing_keys,
309
+ available_keys=list(strategy.keys())
310
+ )
311
+ return False
312
+
313
+ return True
314
+
315
+ def extract_strategy_for_agent(self, full_strategy: Dict[str, Any]) -> Dict[str, Any]:
316
+ """
317
+ Extract strategy specific to this agent from full strategy.
318
+
319
+ Args:
320
+ full_strategy: Complete strategy from PlannerAgent
321
+
322
+ Returns:
323
+ Strategy specific to this agent
324
+ """
325
+ if not full_strategy:
326
+ return {}
327
+
328
+ coordination_strategy = full_strategy.get("coordination_strategy", {})
329
+
330
+ # Map agent names to strategy keys
331
+ agent_strategy_map = {
332
+ "GenreMoodAgent": "genre_mood_agent",
333
+ "DiscoveryAgent": "discovery_agent",
334
+ "JudgeAgent": "evaluation_framework"
335
+ }
336
+
337
+ strategy_key = agent_strategy_map.get(self.agent_name)
338
+ if strategy_key and strategy_key in coordination_strategy:
339
+ return coordination_strategy[strategy_key]
340
+
341
+ # Return evaluation framework for JudgeAgent
342
+ if self.agent_name == "JudgeAgent":
343
+ return full_strategy.get("evaluation_framework", {})
344
+
345
+ return {}
346
+
347
+ def format_reasoning_chain(self, steps: List[str]) -> str:
348
+ """
349
+ Format reasoning steps into a coherent chain.
350
+
351
+ Args:
352
+ steps: List of reasoning steps
353
+
354
+ Returns:
355
+ Formatted reasoning chain
356
+ """
357
+ if not steps:
358
+ return "No reasoning steps recorded."
359
+
360
+ formatted_steps = []
361
+ for i, step in enumerate(steps, 1):
362
+ formatted_steps.append(f"{i}. {step}")
363
+
364
+ return "\n".join(formatted_steps)
src/agents/planner_agent.py ADDED
@@ -0,0 +1,583 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PlannerAgent for BeatDebate Multi-Agent Music Recommendation System
3
+
4
+ Strategic coordinator and planning engine that demonstrates sophisticated
5
+ agentic planning behavior for the AgentX competition.
6
+ """
7
+
8
+ import json
9
+ import re
10
+ from typing import Dict, List, Any, Optional
11
+ import structlog
12
+
13
+ from .base_agent import BaseAgent
14
+ from ..models.agent_models import MusicRecommenderState, AgentConfig
15
+
16
+ logger = structlog.get_logger(__name__)
17
+
18
+
19
+ class PlannerAgent(BaseAgent):
20
+ """
21
+ Master planning agent that coordinates the entire music recommendation workflow.
22
+
23
+ Demonstrates agentic planning behavior required for AgentX competition:
24
+ - Strategic task decomposition
25
+ - Resource allocation and coordination
26
+ - Success criteria definition
27
+ - Adaptive execution monitoring
28
+ """
29
+
30
+ def __init__(self, config: AgentConfig, gemini_client=None):
31
+ """
32
+ Initialize PlannerAgent with Gemini LLM client.
33
+
34
+ Args:
35
+ config: Agent configuration
36
+ gemini_client: Gemini LLM client for reasoning
37
+ """
38
+ super().__init__(config)
39
+ self.llm_client = gemini_client
40
+
41
+ # Planning templates and patterns
42
+ self.query_patterns = self._initialize_query_patterns()
43
+ self.strategy_templates = self._initialize_strategy_templates()
44
+
45
+ self.logger.info("PlannerAgent initialized with strategic planning capabilities")
46
+
47
+ async def process(self, state: MusicRecommenderState) -> MusicRecommenderState:
48
+ """
49
+ Create comprehensive music discovery strategy.
50
+
51
+ Args:
52
+ state: Current workflow state with user query
53
+
54
+ Returns:
55
+ Updated state with planning strategy
56
+ """
57
+ self.add_reasoning_step("Starting strategic planning for music discovery")
58
+
59
+ try:
60
+ # Step 1: Analyze user query complexity and intent
61
+ task_analysis = await self._analyze_user_query(state.user_query)
62
+ self.add_reasoning_step(f"Query analysis completed: {task_analysis['primary_goal']}")
63
+
64
+ # Step 2: Create coordination strategy for advocate agents
65
+ coordination_strategy = await self._plan_agent_coordination(state.user_query, task_analysis)
66
+ self.add_reasoning_step("Agent coordination strategy developed")
67
+
68
+ # Step 3: Define evaluation framework for judge
69
+ evaluation_framework = await self._create_evaluation_framework(state.user_query, task_analysis)
70
+ self.add_reasoning_step("Evaluation framework established")
71
+
72
+ # Step 4: Set up execution monitoring
73
+ execution_monitoring = await self._setup_execution_monitoring(task_analysis)
74
+ self.add_reasoning_step("Execution monitoring protocols defined")
75
+
76
+ # Combine into comprehensive strategy
77
+ planning_strategy = {
78
+ "task_analysis": task_analysis,
79
+ "coordination_strategy": coordination_strategy,
80
+ "evaluation_framework": evaluation_framework,
81
+ "execution_monitoring": execution_monitoring
82
+ }
83
+
84
+ # Update state with strategy
85
+ state.planning_strategy = planning_strategy
86
+ state.reasoning_log.append(f"PlannerAgent: Created comprehensive strategy for '{task_analysis['primary_goal']}'")
87
+
88
+ self.logger.info(
89
+ "Strategic planning completed",
90
+ primary_goal=task_analysis['primary_goal'],
91
+ complexity=task_analysis['complexity_level'],
92
+ strategy_components=len(planning_strategy)
93
+ )
94
+
95
+ return state
96
+
97
+ except Exception as e:
98
+ self.logger.error("Strategic planning failed", error=str(e))
99
+ state.reasoning_log.append(f"PlannerAgent ERROR: {str(e)}")
100
+ return state
101
+
102
+ async def _analyze_user_query(self, user_query: str) -> Dict[str, Any]:
103
+ """
104
+ Analyze user query for complexity, intent, and context factors.
105
+
106
+ Args:
107
+ user_query: User's music request
108
+
109
+ Returns:
110
+ Task analysis dictionary
111
+ """
112
+ system_prompt = """You are a strategic music recommendation planner. Analyze the user's query to understand their intent, mood, and context.
113
+
114
+ Extract:
115
+ 1. Primary goal (what they want to achieve with music)
116
+ 2. Complexity level (simple/medium/complex)
117
+ 3. Context factors (activity, mood, setting)
118
+ 4. Mood indicators (energy level, emotional state)
119
+ 5. Genre hints (explicit or implicit preferences)
120
+
121
+ Respond in JSON format."""
122
+
123
+ user_prompt = f"""Analyze this music request:
124
+ "{user_query}"
125
+
126
+ Provide analysis in this JSON format:
127
+ {{
128
+ "primary_goal": "brief description of main intent",
129
+ "complexity_level": "simple|medium|complex",
130
+ "context_factors": ["factor1", "factor2"],
131
+ "mood_indicators": ["mood1", "mood2"],
132
+ "genre_hints": ["genre1", "genre2"]
133
+ }}"""
134
+
135
+ try:
136
+ response = await self.call_llm(user_prompt, system_prompt)
137
+ analysis = self._parse_json_response(response)
138
+
139
+ # Validate and enhance analysis
140
+ analysis = self._enhance_task_analysis(analysis, user_query)
141
+
142
+ return analysis
143
+
144
+ except Exception as e:
145
+ self.logger.warning("LLM analysis failed, using pattern matching", error=str(e))
146
+ return self._fallback_query_analysis(user_query)
147
+
148
+ async def _plan_agent_coordination(self, user_query: str, task_analysis: Dict[str, Any]) -> Dict[str, Any]:
149
+ """
150
+ Plan coordination strategy for GenreMoodAgent and DiscoveryAgent.
151
+
152
+ Args:
153
+ user_query: Original user query
154
+ task_analysis: Analysis of the query
155
+
156
+ Returns:
157
+ Coordination strategy for advocate agents
158
+ """
159
+ system_prompt = """You are planning coordination between two music recommendation agents:
160
+ 1. GenreMoodAgent: Specializes in genre and mood-based search
161
+ 2. DiscoveryAgent: Specializes in similarity and underground discovery
162
+
163
+ Create specific strategies for each agent based on the user's request and analysis."""
164
+
165
+ user_prompt = f"""User Query: "{user_query}"
166
+ Task Analysis: {json.dumps(task_analysis, indent=2)}
167
+
168
+ Create coordination strategy in this JSON format:
169
+ {{
170
+ "genre_mood_agent": {{
171
+ "focus_areas": ["area1", "area2"],
172
+ "energy_level": "low|medium|high",
173
+ "search_tags": ["tag1", "tag2"],
174
+ "mood_priority": "primary mood to target",
175
+ "genre_constraints": ["constraint1", "constraint2"]
176
+ }},
177
+ "discovery_agent": {{
178
+ "novelty_priority": "low|medium|high",
179
+ "similarity_base": "what to base similarity on",
180
+ "underground_bias": 0.0-1.0,
181
+ "discovery_scope": "narrow|medium|broad",
182
+ "exploration_strategy": "strategy description"
183
+ }}
184
+ }}"""
185
+
186
+ try:
187
+ response = await self.call_llm(user_prompt, system_prompt)
188
+ coordination = self._parse_json_response(response)
189
+
190
+ # Validate and enhance coordination strategy
191
+ coordination = self._enhance_coordination_strategy(coordination, task_analysis)
192
+
193
+ return coordination
194
+
195
+ except Exception as e:
196
+ self.logger.warning("LLM coordination planning failed, using templates", error=str(e))
197
+ return self._fallback_coordination_strategy(task_analysis)
198
+
199
+ async def _create_evaluation_framework(self, user_query: str, task_analysis: Dict[str, Any]) -> Dict[str, Any]:
200
+ """
201
+ Create evaluation framework for JudgeAgent decision making.
202
+
203
+ Args:
204
+ user_query: Original user query
205
+ task_analysis: Analysis of the query
206
+
207
+ Returns:
208
+ Evaluation framework for judge
209
+ """
210
+ system_prompt = """You are creating an evaluation framework for a judge agent to select the best music recommendations.
211
+
212
+ The framework should define:
213
+ 1. Primary weights for different criteria
214
+ 2. Diversity targets
215
+ 3. Explanation style preferences
216
+
217
+ Consider the user's specific request and context."""
218
+
219
+ user_prompt = f"""User Query: "{user_query}"
220
+ Task Analysis: {json.dumps(task_analysis, indent=2)}
221
+
222
+ Create evaluation framework in this JSON format:
223
+ {{
224
+ "primary_weights": {{
225
+ "relevance": 0.0-1.0,
226
+ "novelty": 0.0-1.0,
227
+ "quality": 0.0-1.0,
228
+ "mood_match": 0.0-1.0,
229
+ "context_fit": 0.0-1.0
230
+ }},
231
+ "diversity_targets": {{
232
+ "genre": 1-3,
233
+ "era": 1-3,
234
+ "energy": 1-2,
235
+ "artist": 2-3
236
+ }},
237
+ "explanation_style": "detailed|concise|technical|casual",
238
+ "selection_criteria": ["criterion1", "criterion2"]
239
+ }}"""
240
+
241
+ try:
242
+ response = await self.call_llm(user_prompt, system_prompt)
243
+ framework = self._parse_json_response(response)
244
+
245
+ # Validate and enhance framework
246
+ framework = self._enhance_evaluation_framework(framework, task_analysis)
247
+
248
+ return framework
249
+
250
+ except Exception as e:
251
+ self.logger.warning("LLM framework creation failed, using templates", error=str(e))
252
+ return self._fallback_evaluation_framework(task_analysis)
253
+
254
+ async def _setup_execution_monitoring(self, task_analysis: Dict[str, Any]) -> Dict[str, Any]:
255
+ """
256
+ Set up execution monitoring and adaptation protocols.
257
+
258
+ Args:
259
+ task_analysis: Analysis of the user query
260
+
261
+ Returns:
262
+ Execution monitoring configuration
263
+ """
264
+ complexity = task_analysis.get('complexity_level', 'medium')
265
+
266
+ # Define quality thresholds based on complexity
267
+ quality_thresholds = {
268
+ 'simple': {'min_confidence': 0.7, 'min_relevance': 0.8},
269
+ 'medium': {'min_confidence': 0.6, 'min_relevance': 0.7},
270
+ 'complex': {'min_confidence': 0.5, 'min_relevance': 0.6}
271
+ }
272
+
273
+ # Define fallback strategies
274
+ fallback_strategies = [
275
+ "Broaden search criteria if no results found",
276
+ "Reduce novelty requirements if underground tracks unavailable",
277
+ "Adjust mood constraints if mood-specific search fails",
278
+ "Use genre similarity if exact genre match fails"
279
+ ]
280
+
281
+ # Define coordination protocols
282
+ coordination_protocols = {
283
+ "parallel_execution": True,
284
+ "result_sharing": False, # Agents work independently
285
+ "conflict_resolution": "judge_decides",
286
+ "timeout_handling": "partial_results_acceptable"
287
+ }
288
+
289
+ return {
290
+ "quality_thresholds": quality_thresholds.get(complexity, quality_thresholds['medium']),
291
+ "fallback_strategies": fallback_strategies,
292
+ "coordination_protocols": coordination_protocols,
293
+ "success_metrics": {
294
+ "min_recommendations": 2,
295
+ "target_recommendations": 3,
296
+ "max_processing_time": 300 # 5 minutes
297
+ }
298
+ }
299
+
300
+ def _parse_json_response(self, response: str) -> Dict[str, Any]:
301
+ """Parse JSON response from LLM, handling common formatting issues."""
302
+ try:
303
+ # Clean up response - remove markdown formatting
304
+ cleaned = re.sub(r'```json\s*', '', response)
305
+ cleaned = re.sub(r'```\s*$', '', cleaned)
306
+ cleaned = cleaned.strip()
307
+
308
+ return json.loads(cleaned)
309
+
310
+ except json.JSONDecodeError as e:
311
+ self.logger.warning("Failed to parse JSON response", error=str(e), response=response[:200])
312
+ raise
313
+
314
+ def _enhance_task_analysis(self, analysis: Dict[str, Any], user_query: str) -> Dict[str, Any]:
315
+ """Enhance and validate task analysis."""
316
+ # Ensure required fields exist
317
+ analysis.setdefault('primary_goal', 'music_discovery')
318
+ analysis.setdefault('complexity_level', 'medium')
319
+ analysis.setdefault('context_factors', [])
320
+ analysis.setdefault('mood_indicators', [])
321
+ analysis.setdefault('genre_hints', [])
322
+
323
+ # Add pattern-based enhancements
324
+ query_lower = user_query.lower()
325
+
326
+ # Detect activity context
327
+ activity_patterns = {
328
+ 'work': ['work', 'coding', 'study', 'focus', 'concentration'],
329
+ 'exercise': ['workout', 'gym', 'running', 'exercise'],
330
+ 'relax': ['chill', 'relax', 'calm', 'peaceful'],
331
+ 'party': ['party', 'dance', 'energetic', 'upbeat']
332
+ }
333
+
334
+ for activity, keywords in activity_patterns.items():
335
+ if any(keyword in query_lower for keyword in keywords):
336
+ if activity not in analysis['context_factors']:
337
+ analysis['context_factors'].append(activity)
338
+
339
+ return analysis
340
+
341
+ def _enhance_coordination_strategy(self, coordination: Dict[str, Any], task_analysis: Dict[str, Any]) -> Dict[str, Any]:
342
+ """Enhance and validate coordination strategy."""
343
+ # Ensure required structure
344
+ coordination.setdefault('genre_mood_agent', {})
345
+ coordination.setdefault('discovery_agent', {})
346
+
347
+ # Set defaults for GenreMoodAgent
348
+ gma = coordination['genre_mood_agent']
349
+ gma.setdefault('focus_areas', task_analysis.get('genre_hints', ['indie', 'alternative']))
350
+ gma.setdefault('energy_level', 'medium')
351
+ gma.setdefault('search_tags', task_analysis.get('mood_indicators', ['chill']))
352
+
353
+ # Set defaults for DiscoveryAgent
354
+ da = coordination['discovery_agent']
355
+ da.setdefault('novelty_priority', 'medium')
356
+ da.setdefault('underground_bias', 0.6)
357
+ da.setdefault('discovery_scope', 'medium')
358
+
359
+ return coordination
360
+
361
+ def _enhance_evaluation_framework(self, framework: Dict[str, Any], task_analysis: Dict[str, Any]) -> Dict[str, Any]:
362
+ """Enhance and validate evaluation framework."""
363
+ # Ensure required structure
364
+ framework.setdefault('primary_weights', {})
365
+ framework.setdefault('diversity_targets', {})
366
+
367
+ # Set default weights
368
+ weights = framework['primary_weights']
369
+ weights.setdefault('relevance', 0.3)
370
+ weights.setdefault('novelty', 0.25)
371
+ weights.setdefault('quality', 0.25)
372
+ weights.setdefault('mood_match', 0.2)
373
+
374
+ # Normalize weights to sum to 1.0
375
+ total_weight = sum(weights.values())
376
+ if total_weight > 0:
377
+ for key in weights:
378
+ weights[key] = weights[key] / total_weight
379
+
380
+ # Set default diversity targets
381
+ diversity = framework['diversity_targets']
382
+ diversity.setdefault('genre', 2)
383
+ diversity.setdefault('era', 2)
384
+ diversity.setdefault('energy', 1)
385
+ diversity.setdefault('artist', 3)
386
+
387
+ return framework
388
+
389
+ def _fallback_query_analysis(self, user_query: str) -> Dict[str, Any]:
390
+ """Fallback query analysis using pattern matching."""
391
+ query_lower = user_query.lower()
392
+
393
+ # Determine complexity
394
+ complexity_indicators = {
395
+ 'simple': ['play', 'song', 'music'],
396
+ 'complex': ['discover', 'explore', 'recommend', 'find', 'suggest']
397
+ }
398
+
399
+ complexity = 'medium' # default
400
+ for level, indicators in complexity_indicators.items():
401
+ if any(indicator in query_lower for indicator in indicators):
402
+ complexity = level
403
+ break
404
+
405
+ # Extract mood indicators
406
+ mood_patterns = {
407
+ 'chill': ['chill', 'relax', 'calm', 'peaceful'],
408
+ 'energetic': ['energetic', 'upbeat', 'pump', 'hype'],
409
+ 'focus': ['focus', 'concentration', 'study', 'work'],
410
+ 'sad': ['sad', 'melancholy', 'depressing'],
411
+ 'happy': ['happy', 'joyful', 'cheerful']
412
+ }
413
+
414
+ mood_indicators = []
415
+ for mood, keywords in mood_patterns.items():
416
+ if any(keyword in query_lower for keyword in keywords):
417
+ mood_indicators.append(mood)
418
+
419
+ return {
420
+ 'primary_goal': 'music_discovery',
421
+ 'complexity_level': complexity,
422
+ 'context_factors': [],
423
+ 'mood_indicators': mood_indicators or ['general'],
424
+ 'genre_hints': []
425
+ }
426
+
427
+ def _fallback_coordination_strategy(self, task_analysis: Dict[str, Any]) -> Dict[str, Any]:
428
+ """Fallback coordination strategy using templates."""
429
+ return {
430
+ 'genre_mood_agent': {
431
+ 'focus_areas': task_analysis.get('genre_hints', ['indie', 'alternative']),
432
+ 'energy_level': 'medium',
433
+ 'search_tags': task_analysis.get('mood_indicators', ['chill']),
434
+ 'mood_priority': task_analysis.get('mood_indicators', ['general'])[0],
435
+ 'genre_constraints': []
436
+ },
437
+ 'discovery_agent': {
438
+ 'novelty_priority': 'medium',
439
+ 'similarity_base': 'genre_and_mood',
440
+ 'underground_bias': 0.6,
441
+ 'discovery_scope': 'medium',
442
+ 'exploration_strategy': 'balanced_discovery'
443
+ }
444
+ }
445
+
446
+ def _fallback_evaluation_framework(self, task_analysis: Dict[str, Any]) -> Dict[str, Any]:
447
+ """Fallback evaluation framework using templates."""
448
+ return {
449
+ 'primary_weights': {
450
+ 'relevance': 0.3,
451
+ 'novelty': 0.25,
452
+ 'quality': 0.25,
453
+ 'mood_match': 0.2
454
+ },
455
+ 'diversity_targets': {
456
+ 'genre': 2,
457
+ 'era': 2,
458
+ 'energy': 1,
459
+ 'artist': 3
460
+ },
461
+ 'explanation_style': 'detailed',
462
+ 'selection_criteria': ['relevance', 'novelty', 'quality']
463
+ }
464
+
465
+ def _initialize_query_patterns(self) -> Dict[str, List[str]]:
466
+ """Initialize patterns for query analysis."""
467
+ return {
468
+ 'activity_context': {
469
+ 'work': ['work', 'coding', 'study', 'focus', 'concentration', 'productivity'],
470
+ 'exercise': ['workout', 'gym', 'running', 'exercise', 'fitness'],
471
+ 'relax': ['chill', 'relax', 'calm', 'peaceful', 'unwind'],
472
+ 'party': ['party', 'dance', 'energetic', 'upbeat', 'celebration'],
473
+ 'sleep': ['sleep', 'bedtime', 'lullaby', 'peaceful'],
474
+ 'drive': ['driving', 'road trip', 'car', 'travel']
475
+ },
476
+ 'mood_indicators': {
477
+ 'happy': ['happy', 'joyful', 'cheerful', 'uplifting', 'positive'],
478
+ 'sad': ['sad', 'melancholy', 'depressing', 'emotional', 'heartbreak'],
479
+ 'energetic': ['energetic', 'pump up', 'hype', 'motivational'],
480
+ 'calm': ['calm', 'peaceful', 'serene', 'tranquil'],
481
+ 'nostalgic': ['nostalgic', 'throwback', 'memories', 'classic'],
482
+ 'romantic': ['romantic', 'love', 'intimate', 'passionate']
483
+ },
484
+ 'genre_hints': {
485
+ 'rock': ['rock', 'guitar', 'band', 'alternative'],
486
+ 'electronic': ['electronic', 'edm', 'techno', 'house', 'ambient'],
487
+ 'hip_hop': ['hip hop', 'rap', 'beats', 'urban'],
488
+ 'indie': ['indie', 'independent', 'underground', 'alternative'],
489
+ 'classical': ['classical', 'orchestra', 'symphony', 'instrumental'],
490
+ 'jazz': ['jazz', 'blues', 'swing', 'improvisation'],
491
+ 'folk': ['folk', 'acoustic', 'singer-songwriter', 'country']
492
+ }
493
+ }
494
+
495
+ def _initialize_strategy_templates(self) -> Dict[str, Dict[str, Any]]:
496
+ """Initialize strategy templates for different scenarios."""
497
+ return {
498
+ 'work_focus': {
499
+ 'genre_mood_agent': {
500
+ 'focus_areas': ['instrumental', 'ambient', 'post-rock'],
501
+ 'energy_level': 'medium-low',
502
+ 'search_tags': ['focus', 'study', 'instrumental', 'concentration']
503
+ },
504
+ 'discovery_agent': {
505
+ 'novelty_priority': 'medium',
506
+ 'underground_bias': 0.7,
507
+ 'discovery_scope': 'narrow'
508
+ }
509
+ },
510
+ 'workout_energy': {
511
+ 'genre_mood_agent': {
512
+ 'focus_areas': ['electronic', 'rock', 'hip-hop'],
513
+ 'energy_level': 'high',
514
+ 'search_tags': ['energetic', 'pump', 'workout', 'motivational']
515
+ },
516
+ 'discovery_agent': {
517
+ 'novelty_priority': 'low',
518
+ 'underground_bias': 0.3,
519
+ 'discovery_scope': 'broad'
520
+ }
521
+ },
522
+ 'chill_discovery': {
523
+ 'genre_mood_agent': {
524
+ 'focus_areas': ['indie', 'alternative', 'folk'],
525
+ 'energy_level': 'low',
526
+ 'search_tags': ['chill', 'relax', 'mellow', 'peaceful']
527
+ },
528
+ 'discovery_agent': {
529
+ 'novelty_priority': 'high',
530
+ 'underground_bias': 0.8,
531
+ 'discovery_scope': 'broad'
532
+ }
533
+ }
534
+ }
535
+
536
+ async def _make_llm_call(self, prompt: str, system_prompt: str = None) -> str:
537
+ """
538
+ Make LLM call using Gemini client.
539
+
540
+ Args:
541
+ prompt: User prompt
542
+ system_prompt: System prompt
543
+
544
+ Returns:
545
+ LLM response
546
+ """
547
+ if not self.llm_client:
548
+ raise RuntimeError("Gemini client not initialized")
549
+
550
+ try:
551
+ # Combine system and user prompts
552
+ full_prompt = f"{system_prompt}\n\n{prompt}" if system_prompt else prompt
553
+
554
+ # Call Gemini (this will be implemented when we integrate Gemini)
555
+ # For now, return a placeholder
556
+ response = await self.llm_client.generate_content(full_prompt)
557
+ return response.text
558
+
559
+ except Exception as e:
560
+ self.logger.error("Gemini API call failed", error=str(e))
561
+ raise
562
+
563
+ def _extract_output_data(self, state: MusicRecommenderState) -> Dict[str, Any]:
564
+ """Extract PlannerAgent output data."""
565
+ return {
566
+ "planning_strategy_created": state.planning_strategy is not None,
567
+ "strategy_components": len(state.planning_strategy) if state.planning_strategy else 0
568
+ }
569
+
570
+ def _calculate_confidence(self, state: MusicRecommenderState) -> float:
571
+ """Calculate confidence in planning strategy."""
572
+ if not state.planning_strategy:
573
+ return 0.0
574
+
575
+ # Base confidence
576
+ confidence = 0.7
577
+
578
+ # Increase confidence based on strategy completeness
579
+ required_components = ['task_analysis', 'coordination_strategy', 'evaluation_framework']
580
+ present_components = sum(1 for comp in required_components if comp in state.planning_strategy)
581
+ confidence += (present_components / len(required_components)) * 0.3
582
+
583
+ return min(confidence, 1.0)
src/models/__init__.py CHANGED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Models Package for BeatDebate Multi-Agent System
3
+
4
+ Contains Pydantic models for state management and data structures.
5
+ """
6
+
7
+ from .agent_models import (
8
+ MusicRecommenderState,
9
+ AgentStrategy,
10
+ TaskAnalysis,
11
+ AgentCoordinationStrategy,
12
+ EvaluationFramework,
13
+ TrackRecommendation,
14
+ AgentDeliberation,
15
+ ReasoningChain,
16
+ FinalRecommendationResponse,
17
+ AgentConfig,
18
+ SystemConfig
19
+ )
20
+
21
+ __all__ = [
22
+ "MusicRecommenderState",
23
+ "AgentStrategy",
24
+ "TaskAnalysis",
25
+ "AgentCoordinationStrategy",
26
+ "EvaluationFramework",
27
+ "TrackRecommendation",
28
+ "AgentDeliberation",
29
+ "ReasoningChain",
30
+ "FinalRecommendationResponse",
31
+ "AgentConfig",
32
+ "SystemConfig",
33
+ ]
src/models/agent_models.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent Models for BeatDebate Multi-Agent Music Recommendation System
3
+
4
+ Pydantic models for state management, agent communication, and data structures.
5
+ """
6
+
7
+ from typing import Dict, List, Any, Optional
8
+ from pydantic import BaseModel, Field
9
+ from datetime import datetime
10
+
11
+
12
+ class MusicRecommenderState(BaseModel):
13
+ """Shared state across all agents in the LangGraph workflow"""
14
+
15
+ # Input
16
+ user_query: str = Field(..., description="Original user query for music recommendation")
17
+ user_profile: Optional[Dict[str, Any]] = Field(default=None, description="User preferences and history")
18
+
19
+ # Planning phase
20
+ planning_strategy: Optional[Dict[str, Any]] = Field(default=None, description="Strategy created by PlannerAgent")
21
+ execution_plan: Optional[Dict[str, Any]] = Field(default=None, description="Execution monitoring plan")
22
+
23
+ # Advocate phase
24
+ genre_mood_recommendations: List[Dict] = Field(default_factory=list, description="GenreMoodAgent recommendations")
25
+ discovery_recommendations: List[Dict] = Field(default_factory=list, description="DiscoveryAgent recommendations")
26
+
27
+ # Judge phase
28
+ final_recommendations: List[Dict] = Field(default_factory=list, description="Final selected recommendations")
29
+
30
+ # Reasoning transparency
31
+ reasoning_log: List[str] = Field(default_factory=list, description="Step-by-step reasoning log")
32
+ agent_deliberations: List[Dict] = Field(default_factory=list, description="Agent decision records")
33
+
34
+ # Metadata
35
+ processing_start_time: Optional[float] = Field(default=None, description="Processing start timestamp")
36
+ total_processing_time: Optional[float] = Field(default=None, description="Total processing time in seconds")
37
+ session_id: Optional[str] = Field(default=None, description="Unique session identifier")
38
+
39
+
40
+ class AgentStrategy(BaseModel):
41
+ """Strategy object passed between agents"""
42
+
43
+ task_analysis: Dict[str, Any] = Field(..., description="Analysis of the user query and task")
44
+ coordination_strategy: Dict[str, Any] = Field(..., description="Strategy for each advocate agent")
45
+ evaluation_framework: Dict[str, Any] = Field(..., description="Criteria for judge evaluation")
46
+ execution_monitoring: Dict[str, Any] = Field(..., description="Monitoring and adaptation protocols")
47
+
48
+
49
+ class TaskAnalysis(BaseModel):
50
+ """Analysis of user query complexity and intent"""
51
+
52
+ primary_goal: str = Field(..., description="Main intent extracted from query")
53
+ complexity_level: str = Field(..., description="Query complexity: simple, medium, complex")
54
+ context_factors: List[str] = Field(default_factory=list, description="Context clues from query")
55
+ mood_indicators: List[str] = Field(default_factory=list, description="Mood/energy indicators")
56
+ genre_hints: List[str] = Field(default_factory=list, description="Genre preferences or hints")
57
+
58
+
59
+ class AgentCoordinationStrategy(BaseModel):
60
+ """Coordination strategy for advocate agents"""
61
+
62
+ genre_mood_agent: Dict[str, Any] = Field(..., description="Strategy for GenreMoodAgent")
63
+ discovery_agent: Dict[str, Any] = Field(..., description="Strategy for DiscoveryAgent")
64
+
65
+
66
+ class EvaluationFramework(BaseModel):
67
+ """Framework for judge evaluation"""
68
+
69
+ primary_weights: Dict[str, float] = Field(..., description="Weights for different criteria")
70
+ diversity_targets: Dict[str, int] = Field(..., description="Diversity targets for recommendations")
71
+ explanation_style: str = Field(..., description="Style for generating explanations")
72
+
73
+
74
+ class TrackRecommendation(BaseModel):
75
+ """Individual track recommendation with reasoning"""
76
+
77
+ # Track metadata
78
+ title: str = Field(..., description="Track title")
79
+ artist: str = Field(..., description="Artist name")
80
+ album: Optional[str] = Field(default=None, description="Album name")
81
+ year: Optional[int] = Field(default=None, description="Release year")
82
+
83
+ # External identifiers
84
+ lastfm_url: Optional[str] = Field(default=None, description="Last.fm URL")
85
+ spotify_url: Optional[str] = Field(default=None, description="Spotify URL")
86
+ preview_url: Optional[str] = Field(default=None, description="Audio preview URL")
87
+
88
+ # Metadata
89
+ genres: List[str] = Field(default_factory=list, description="Genre tags")
90
+ tags: List[str] = Field(default_factory=list, description="Mood/style tags")
91
+ similar_artists: List[str] = Field(default_factory=list, description="Similar artists")
92
+
93
+ # Recommendation context
94
+ reasoning_chain: str = Field(..., description="Agent's reasoning for this recommendation")
95
+ confidence_score: float = Field(..., ge=0.0, le=1.0, description="Confidence in recommendation")
96
+ novelty_score: Optional[float] = Field(default=None, ge=0.0, le=1.0, description="Novelty/underground score")
97
+ relevance_score: float = Field(..., ge=0.0, le=1.0, description="Relevance to user query")
98
+
99
+ # Agent attribution
100
+ recommending_agent: str = Field(..., description="Agent that made this recommendation")
101
+ strategy_applied: Dict[str, Any] = Field(..., description="Strategy used for this recommendation")
102
+
103
+
104
+ class AgentDeliberation(BaseModel):
105
+ """Record of agent decision-making process"""
106
+
107
+ agent_name: str = Field(..., description="Name of the agent")
108
+ timestamp: datetime = Field(default_factory=datetime.now, description="When deliberation occurred")
109
+ input_data: Dict[str, Any] = Field(..., description="Input data for the agent")
110
+ reasoning_steps: List[str] = Field(..., description="Step-by-step reasoning process")
111
+ output_data: Dict[str, Any] = Field(..., description="Agent's output/decision")
112
+ confidence: float = Field(..., ge=0.0, le=1.0, description="Agent's confidence in decision")
113
+ processing_time: float = Field(..., description="Time taken for deliberation in seconds")
114
+
115
+
116
+ class ReasoningChain(BaseModel):
117
+ """Structured reasoning chain for transparency"""
118
+
119
+ step_number: int = Field(..., description="Step number in reasoning chain")
120
+ step_type: str = Field(..., description="Type of reasoning step")
121
+ description: str = Field(..., description="Description of reasoning step")
122
+ evidence: List[str] = Field(default_factory=list, description="Evidence supporting this step")
123
+ confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence in this reasoning step")
124
+
125
+
126
+ class FinalRecommendationResponse(BaseModel):
127
+ """Final response format for the user"""
128
+
129
+ recommendations: List[TrackRecommendation] = Field(..., description="Final track recommendations")
130
+ explanation: str = Field(..., description="Overall explanation of recommendations")
131
+ planning_summary: str = Field(..., description="Summary of planning process")
132
+ agent_coordination_summary: str = Field(..., description="Summary of agent coordination")
133
+
134
+ # Metadata
135
+ total_processing_time: float = Field(..., description="Total processing time in seconds")
136
+ agents_involved: List[str] = Field(..., description="List of agents that participated")
137
+ reasoning_transparency: List[AgentDeliberation] = Field(..., description="Full reasoning transparency")
138
+ session_id: str = Field(..., description="Session identifier")
139
+
140
+ # Quality metrics
141
+ diversity_score: float = Field(..., ge=0.0, le=1.0, description="Diversity of recommendations")
142
+ novelty_score: float = Field(..., ge=0.0, le=1.0, description="Average novelty score")
143
+ confidence_score: float = Field(..., ge=0.0, le=1.0, description="Overall confidence")
144
+
145
+
146
+ class AgentConfig(BaseModel):
147
+ """Configuration for individual agents"""
148
+
149
+ agent_name: str = Field(..., description="Name of the agent")
150
+ agent_type: str = Field(..., description="Type of agent (planner, advocate, judge)")
151
+ llm_model: str = Field(default="gemini-2.0-flash-exp", description="LLM model to use")
152
+ temperature: float = Field(default=0.7, ge=0.0, le=2.0, description="LLM temperature")
153
+ max_tokens: int = Field(default=1000, description="Maximum tokens for LLM response")
154
+ timeout_seconds: int = Field(default=30, description="Timeout for agent processing")
155
+
156
+ # Agent-specific configuration
157
+ specialty_config: Dict[str, Any] = Field(default_factory=dict, description="Agent-specific configuration")
158
+
159
+
160
+ class SystemConfig(BaseModel):
161
+ """Overall system configuration"""
162
+
163
+ # API configurations
164
+ gemini_api_key: str = Field(..., description="Gemini API key")
165
+ lastfm_api_key: str = Field(..., description="Last.fm API key")
166
+ spotify_client_id: Optional[str] = Field(default=None, description="Spotify client ID")
167
+ spotify_client_secret: Optional[str] = Field(default=None, description="Spotify client secret")
168
+
169
+ # Rate limiting
170
+ gemini_rate_limit: int = Field(default=15, description="Gemini requests per minute")
171
+ lastfm_rate_limit: float = Field(default=3.0, description="Last.fm requests per second")
172
+ spotify_rate_limit: int = Field(default=50, description="Spotify requests per hour")
173
+
174
+ # Caching
175
+ cache_enabled: bool = Field(default=True, description="Enable caching")
176
+ cache_ttl_hours: int = Field(default=24, description="Cache TTL in hours")
177
+ cache_directory: str = Field(default="data/cache", description="Cache directory path")
178
+
179
+ # Agent configurations
180
+ agent_configs: Dict[str, AgentConfig] = Field(default_factory=dict, description="Configuration for each agent")
181
+
182
+ # Performance settings
183
+ max_concurrent_agents: int = Field(default=2, description="Maximum concurrent agent executions")
184
+ total_timeout_minutes: int = Field(default=5, description="Total workflow timeout in minutes")
src/services/__init__.py CHANGED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Services Package for BeatDebate Multi-Agent System
3
+
4
+ Contains business logic, workflow orchestration, and utility services.
5
+ """
6
+
7
+ # Services will be added as we implement them
8
+ __all__ = []
tests/test_planner_agent.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for PlannerAgent
3
+
4
+ Basic tests to verify PlannerAgent functionality and strategic planning behavior.
5
+ """
6
+
7
+ import pytest
8
+ import asyncio
9
+ from unittest.mock import Mock, AsyncMock
10
+
11
+ from src.models.agent_models import MusicRecommenderState, AgentConfig
12
+ from src.agents.planner_agent import PlannerAgent
13
+
14
+
15
+ class TestPlannerAgent:
16
+ """Test suite for PlannerAgent"""
17
+
18
+ @pytest.fixture
19
+ def agent_config(self):
20
+ """Create test agent configuration"""
21
+ return AgentConfig(
22
+ agent_name="PlannerAgent",
23
+ agent_type="planner",
24
+ llm_model="gemini-2.0-flash-exp",
25
+ temperature=0.7,
26
+ timeout_seconds=30
27
+ )
28
+
29
+ @pytest.fixture
30
+ def mock_gemini_client(self):
31
+ """Create mock Gemini client"""
32
+ client = Mock()
33
+ client.generate_content = AsyncMock()
34
+ return client
35
+
36
+ @pytest.fixture
37
+ def planner_agent(self, agent_config, mock_gemini_client):
38
+ """Create PlannerAgent instance for testing"""
39
+ return PlannerAgent(agent_config, mock_gemini_client)
40
+
41
+ @pytest.fixture
42
+ def test_state(self):
43
+ """Create test state with user query"""
44
+ return MusicRecommenderState(
45
+ user_query="I need focus music for coding",
46
+ session_id="test_session_123"
47
+ )
48
+
49
+ def test_planner_agent_initialization(self, planner_agent):
50
+ """Test that PlannerAgent initializes correctly"""
51
+ assert planner_agent.agent_name == "PlannerAgent"
52
+ assert planner_agent.agent_type == "planner"
53
+ assert planner_agent.llm_client is not None
54
+ assert hasattr(planner_agent, 'query_patterns')
55
+ assert hasattr(planner_agent, 'strategy_templates')
56
+
57
+ def test_fallback_query_analysis(self, planner_agent):
58
+ """Test fallback query analysis without LLM"""
59
+ # Test simple query
60
+ analysis = planner_agent._fallback_query_analysis("play some music")
61
+ assert analysis['complexity_level'] == 'simple'
62
+ assert analysis['primary_goal'] == 'music_discovery'
63
+
64
+ # Test complex query
65
+ analysis = planner_agent._fallback_query_analysis("discover underground indie rock for studying")
66
+ assert analysis['complexity_level'] == 'complex'
67
+ assert 'focus' in analysis['mood_indicators']
68
+
69
+ def test_fallback_coordination_strategy(self, planner_agent):
70
+ """Test fallback coordination strategy creation"""
71
+ task_analysis = {
72
+ 'primary_goal': 'focus_music',
73
+ 'complexity_level': 'medium',
74
+ 'mood_indicators': ['focus', 'chill'],
75
+ 'genre_hints': ['indie', 'electronic']
76
+ }
77
+
78
+ coordination = planner_agent._fallback_coordination_strategy(task_analysis)
79
+
80
+ assert 'genre_mood_agent' in coordination
81
+ assert 'discovery_agent' in coordination
82
+ assert coordination['genre_mood_agent']['focus_areas'] == ['indie', 'electronic']
83
+ assert coordination['discovery_agent']['novelty_priority'] == 'medium'
84
+
85
+ def test_fallback_evaluation_framework(self, planner_agent):
86
+ """Test fallback evaluation framework creation"""
87
+ task_analysis = {'complexity_level': 'medium'}
88
+
89
+ framework = planner_agent._fallback_evaluation_framework(task_analysis)
90
+
91
+ assert 'primary_weights' in framework
92
+ assert 'diversity_targets' in framework
93
+ assert sum(framework['primary_weights'].values()) == pytest.approx(1.0, rel=1e-2)
94
+
95
+ def test_enhance_task_analysis(self, planner_agent):
96
+ """Test task analysis enhancement"""
97
+ analysis = {'primary_goal': 'test'}
98
+ query = "I need music for coding and focus"
99
+
100
+ enhanced = planner_agent._enhance_task_analysis(analysis, query)
101
+
102
+ assert 'work' in enhanced['context_factors']
103
+ assert enhanced['complexity_level'] == 'medium' # default
104
+ assert isinstance(enhanced['mood_indicators'], list)
105
+
106
+ def test_parse_json_response(self, planner_agent):
107
+ """Test JSON response parsing"""
108
+ # Test clean JSON
109
+ response = '{"test": "value", "number": 123}'
110
+ parsed = planner_agent._parse_json_response(response)
111
+ assert parsed['test'] == 'value'
112
+ assert parsed['number'] == 123
113
+
114
+ # Test JSON with markdown
115
+ response = '```json\n{"test": "value"}\n```'
116
+ parsed = planner_agent._parse_json_response(response)
117
+ assert parsed['test'] == 'value'
118
+
119
+ @pytest.mark.asyncio
120
+ async def test_process_with_fallback(self, planner_agent, test_state):
121
+ """Test process method using fallback strategies (no LLM calls)"""
122
+ # Mock LLM to raise exception, forcing fallback
123
+ planner_agent.llm_client.generate_content.side_effect = Exception("LLM unavailable")
124
+
125
+ result_state = await planner_agent.process(test_state)
126
+
127
+ # Verify strategy was created using fallbacks
128
+ assert result_state.planning_strategy is not None
129
+ assert 'task_analysis' in result_state.planning_strategy
130
+ assert 'coordination_strategy' in result_state.planning_strategy
131
+ assert 'evaluation_framework' in result_state.planning_strategy
132
+ assert 'execution_monitoring' in result_state.planning_strategy
133
+
134
+ # Verify reasoning log was updated
135
+ assert len(result_state.reasoning_log) > 0
136
+ assert any('PlannerAgent' in log for log in result_state.reasoning_log)
137
+
138
+ @pytest.mark.asyncio
139
+ async def test_process_with_mock_llm(self, planner_agent, test_state):
140
+ """Test process method with mocked LLM responses"""
141
+ # Mock LLM responses
142
+ mock_responses = [
143
+ '{"primary_goal": "focus_music", "complexity_level": "medium", "context_factors": ["work"], "mood_indicators": ["focus"], "genre_hints": ["instrumental"]}',
144
+ '{"genre_mood_agent": {"focus_areas": ["instrumental"], "energy_level": "medium"}, "discovery_agent": {"novelty_priority": "medium", "underground_bias": 0.6}}',
145
+ '{"primary_weights": {"relevance": 0.4, "novelty": 0.3, "quality": 0.3}, "diversity_targets": {"genre": 2, "artist": 3}}'
146
+ ]
147
+
148
+ planner_agent.llm_client.generate_content.side_effect = [
149
+ Mock(text=response) for response in mock_responses
150
+ ]
151
+
152
+ result_state = await planner_agent.process(test_state)
153
+
154
+ # Verify strategy was created
155
+ assert result_state.planning_strategy is not None
156
+ strategy = result_state.planning_strategy
157
+
158
+ # Verify task analysis
159
+ assert strategy['task_analysis']['primary_goal'] == 'focus_music'
160
+ assert strategy['task_analysis']['complexity_level'] == 'medium'
161
+
162
+ # Verify coordination strategy
163
+ assert 'genre_mood_agent' in strategy['coordination_strategy']
164
+ assert 'discovery_agent' in strategy['coordination_strategy']
165
+
166
+ # Verify evaluation framework
167
+ assert 'primary_weights' in strategy['evaluation_framework']
168
+ assert 'diversity_targets' in strategy['evaluation_framework']
169
+
170
+ def test_execution_monitoring_setup(self, planner_agent):
171
+ """Test execution monitoring setup"""
172
+ task_analysis = {'complexity_level': 'complex'}
173
+
174
+ monitoring = asyncio.run(planner_agent._setup_execution_monitoring(task_analysis))
175
+
176
+ assert 'quality_thresholds' in monitoring
177
+ assert 'fallback_strategies' in monitoring
178
+ assert 'coordination_protocols' in monitoring
179
+ assert 'success_metrics' in monitoring
180
+
181
+ # Verify complex query gets lower thresholds
182
+ assert monitoring['quality_thresholds']['min_confidence'] == 0.5
183
+ assert monitoring['success_metrics']['target_recommendations'] == 3
184
+
185
+ def test_strategy_templates_initialization(self, planner_agent):
186
+ """Test that strategy templates are properly initialized"""
187
+ templates = planner_agent.strategy_templates
188
+
189
+ assert 'work_focus' in templates
190
+ assert 'workout_energy' in templates
191
+ assert 'chill_discovery' in templates
192
+
193
+ # Verify template structure
194
+ work_template = templates['work_focus']
195
+ assert 'genre_mood_agent' in work_template
196
+ assert 'discovery_agent' in work_template
197
+ assert work_template['genre_mood_agent']['energy_level'] == 'medium-low'
198
+
199
+ def test_query_patterns_initialization(self, planner_agent):
200
+ """Test that query patterns are properly initialized"""
201
+ patterns = planner_agent.query_patterns
202
+
203
+ assert 'activity_context' in patterns
204
+ assert 'mood_indicators' in patterns
205
+ assert 'genre_hints' in patterns
206
+
207
+ # Verify pattern content
208
+ assert 'work' in patterns['activity_context']
209
+ assert 'coding' in patterns['activity_context']['work']
210
+ assert 'happy' in patterns['mood_indicators']
211
+ assert 'rock' in patterns['genre_hints']
212
+
213
+
214
+ if __name__ == "__main__":
215
+ pytest.main([__file__])
uv.lock CHANGED
@@ -368,6 +368,13 @@ dev = [
368
  { name = "ruff" },
369
  ]
370
 
 
 
 
 
 
 
 
371
  [package.metadata]
372
  requires-dist = [
373
  { name = "aiohttp", specifier = ">=3.9.0" },
@@ -401,6 +408,13 @@ requires-dist = [
401
  { name = "uvicorn", specifier = ">=0.24.0" },
402
  ]
403
 
 
 
 
 
 
 
 
404
  [[package]]
405
  name = "beautifulsoup4"
406
  version = "4.13.4"
 
368
  { name = "ruff" },
369
  ]
370
 
371
+ [package.dev-dependencies]
372
+ dev = [
373
+ { name = "pytest" },
374
+ { name = "pytest-asyncio" },
375
+ { name = "pytest-cov" },
376
+ ]
377
+
378
  [package.metadata]
379
  requires-dist = [
380
  { name = "aiohttp", specifier = ">=3.9.0" },
 
408
  { name = "uvicorn", specifier = ">=0.24.0" },
409
  ]
410
 
411
+ [package.metadata.requires-dev]
412
+ dev = [
413
+ { name = "pytest", specifier = ">=8.3.5" },
414
+ { name = "pytest-asyncio", specifier = ">=0.26.0" },
415
+ { name = "pytest-cov", specifier = ">=6.1.1" },
416
+ ]
417
+
418
  [[package]]
419
  name = "beautifulsoup4"
420
  version = "4.13.4"