Spaces:

meetmendapara
/

SPG_ML

Sleeping

App Files Files Community

SPG_ML / SERVICE_INTEGRATION_GUIDE.md

meetmendapara

Initial commit for ML space

df31aa1 3 months ago

preview code

raw

history blame contribute delete

16.2 kB

	# Service Integration Guide: Cognitive Theory & SHAP

	## Quick Start

	### 1. Install Dependencies

	```bash
	cd ML
	pip install -r requirements.txt

	# For real SHAP support (optional but recommended):
	pip install shap
	```

	### 2. Start ML Service

	```bash
	cd ML
	python main.py
	```

	Service will be available at `http://localhost:8000`

	### 3. Test Endpoints

	```bash
	# Health check
	curl http://localhost:8000/health

	# Personality analysis
	curl -X POST http://localhost:8000/ml/personality/analyze \
	-H "Content-Type: application/json" \
	-d '{
	"openness": 75,
	"conscientiousness": 80,
	"extraversion": 60,
	"agreeableness": 70,
	"neuroticism": 40
	}'

	# Task prediction with SHAP explanation
	curl -X POST http://localhost:8000/ml/predict/explain \
	-H "Content-Type: application/json" \
	-d '{
	"task": {
	"title": "Complete project report",
	"category": "WORK",
	"priority": "HIGH",
	"estimated_duration": 120,
	"complexity": 4,
	"due_date": "2026-03-10T17:00:00Z",
	"personality": {
	"openness": 75,
	"conscientiousness": 80,
	"extraversion": 60,
	"agreeableness": 70,
	"neuroticism": 40
	}
	}
	}'
	```

	## API Endpoints

	### Personality Analysis

	POST /ml/personality/analyze

	Request:
	```json
	{
	"openness": 75,
	"conscientiousness": 80,
	"extraversion": 60,
	"agreeableness": 70,
	"neuroticism": 40
	}
	```

	Response:
	```json
	{
	"personality_type": "ENTJ - The Commander",
	"type_code": "ENTJ",
	"strengths": ["Strong organization", "Reliable delivery", "Excellent communication"],
	"weaknesses": ["May face team collaboration challenges"],
	"work_style": "Thrives in collaborative environments \| Benefits from detailed planning",
	"recommendations": "Use detailed task breakdowns to leverage your planning strength",
	"cognitive_style": {
	"primary_style": "Systematic",
	"scores": {
	"analytical": 67.5,
	"creative": 75.0,
	"systematic": 80.0,
	"social": 65.0
	}
	},
	"traits_analysis": {
	"conscientiousness": {
	"value": 80,
	"level": "high",
	"description": "Highly organized and disciplined...",
	"percentile": 84
	}
	}
	}
	```

	### Task Prediction with SHAP Explanation

	POST /ml/predict/explain

	Request:
	```json
	{
	"task": {
	"title": "Complete project report",
	"description": "Write comprehensive analysis report",
	"category": "WORK",
	"priority": "HIGH",
	"estimated_duration": 120,
	"complexity": 4,
	"due_date": "2026-03-10T17:00:00Z",
	"personality": {
	"openness": 75,
	"conscientiousness": 80,
	"extraversion": 60,
	"agreeableness": 70,
	"neuroticism": 40
	},
	"historical_completion_rate": 0.78
	}
	}
	```

	Response:
	```json
	{
	"prediction_summary": {
	"completion_probability": 0.75,
	"stress_level": 6.5,
	"difficulty": "MODERATE",
	"outcome_assessment": "Likely to succeed with some attention"
	},
	"feature_attribution": {
	"base_value": 0.5,
	"prediction": 0.75,
	"shap_values": {
	"completion_rate": 0.08,
	"trait_conscientiousness": 0.12,
	"time_pressure": -0.05,
	"complexity_normalized": -0.03,
	"pri_attention_demand": 0.06
	},
	"method": "tree_shap",
	"feature_ranking": [
	{
	"feature": "trait_conscientiousness",
	"impact": 0.12,
	"direction": "positive",
	"plain_english": "Your high conscientiousness means you tend to be disciplined and organized, which helps task completion."
	},
	{
	"feature": "completion_rate",
	"impact": 0.08,
	"direction": "positive",
	"plain_english": "You've completed 78% of past tasks on time - this strong track record boosts your predicted success."
	}
	],
	"top_positive_features": [
	{
	"feature": "Conscientiousness",
	"impact": 0.12,
	"plain_english": "Your high conscientiousness..."
	}
	],
	"top_negative_features": [
	{
	"feature": "Time Pressure",
	"impact": 0.05,
	"plain_english": "The deadline is approaching fast..."
	}
	],
	"waterfall_data": [
	{"name": "Base Probability", "value": 0.5, "cumulative": 0.5, "type": "base"},
	{"name": "Conscientiousness", "value": 0.12, "cumulative": 0.62, "type": "positive"},
	{"name": "Completion Rate", "value": 0.08, "cumulative": 0.70, "type": "positive"},
	{"name": "Time Pressure", "value": -0.05, "cumulative": 0.65, "type": "negative"},
	{"name": "Final Prediction", "value": 0.75, "cumulative": 0.75, "type": "total"}
	]
	},
	"counterfactual_scenarios": [
	{
	"feature": "complexity_normalized",
	"current_value": 0.8,
	"suggested_value": 0.5,
	"action": "Break task into smaller subtasks",
	"expected_probability": 0.85,
	"feasibility": "high"
	}
	],
	"recommendations": [
	{
	"title": "Break Down Task",
	"description": "Split into smaller, manageable subtasks",
	"priority": "high",
	"risk_addressed": "completion_risk"
	},
	{
	"title": "Time Block",
	"description": "Reserve dedicated time slots for this task",
	"priority": "medium",
	"risk_addressed": "focus_risk"
	}
	],
	"confidence_assessment": {
	"data_quality": "high",
	"prediction_confidence": 0.85,
	"explanation_confidence": 0.82
	},
	"natural_language_summary": "This task has a moderate 75% completion probability. Your Conscientiousness is working in your favor. However, Time Pressure is a concern. Top recommendation: Reserve dedicated time slots for this task."
	}
	```

	### Cognitive Theory Analysis

	POST /ml/cognitive/analyze

	Request:
	```json
	{
	"task": {
	"title": "Complete project report",
	"category": "WORK",
	"priority": "HIGH",
	"estimated_duration": 120,
	"complexity": 4
	},
	"personality": {
	"openness": 75,
	"conscientiousness": 80,
	"extraversion": 60,
	"agreeableness": 70,
	"neuroticism": 40
	},
	"context": {
	"active_tasks_count": 5,
	"time_pressure": 0.3,
	"high_interruption_risk": false
	},
	"historical_performance": {
	"completion_rate": 0.78,
	"on_time_rate": 0.82
	}
	}
	```

	Response:
	```json
	{
	"success_probability": 0.72,
	"cognitive_load_analysis": {
	"intrinsic_load": 0.68,
	"extraneous_load": 0.25,
	"germane_load": 0.35,
	"total_load": 0.52,
	"overload_risk": false,
	"working_memory_utilization": 52.0,
	"recommendations": [
	"Minimize distractions and interruptions",
	"Schedule during low-interruption periods"
	]
	},
	"personality_task_fit": {
	"overall_fit": 0.15,
	"component_fits": {
	"conscientiousness_fit": 0.216,
	"stress_vulnerability": -0.09
	},
	"fit_level": "excellent",
	"recommendations": [
	"Use external structure: timers, checklists"
	]
	},
	"motivation_analysis": {
	"intrinsic_motivation": 0.65,
	"motivation_type": "intrinsic",
	"needs_satisfaction": {
	"autonomy": 0.7,
	"competence": 0.75,
	"relatedness": 0.5
	},
	"recommendations": [
	"Enhance relatedness: Find accountability partner"
	]
	},
	"flow_state_analysis": {
	"flow_potential": 0.85,
	"challenge_level": 0.72,
	"skill_level": 0.83,
	"challenge_skill_ratio": 0.87,
	"zone": "flow",
	"recommendations": [
	"Optimal conditions for flow state!",
	"Minimize interruptions to maintain flow"
	]
	},
	"integrated_recommendations": [
	"Minimize distractions and interruptions",
	"Schedule during low-interruption periods",
	"Optimal conditions for flow state!",
	"Minimize interruptions to maintain flow"
	],
	"risk_factors": []
	}
	```

	## Backend Integration

	### Java Service Layer

	MLClientService.java - Add cognitive theory endpoint:

	```java
	public CognitiveAnalysisResponse analyzeCognitiveFactors(
	Task task,
	PersonalityProfile personality,
	Map<String, Object> context,
	Map<String, Object> historicalPerformance) {

	Map<String, Object> request = new HashMap<>();
	request.put("task", buildTaskRequest(task));
	request.put("personality", buildPersonalityMap(personality));
	request.put("context", context);
	request.put("historical_performance", historicalPerformance);

	return webClient
	.post()
	.uri("/ml/cognitive/analyze")
	.bodyValue(request)
	.retrieve()
	.bodyToMono(CognitiveAnalysisResponse.class)
	.block();
	}
	```

	PredictionService.java - Enhanced prediction with cognitive theory:

	```java
	public PredictionDetailDTO getPredictionDetail(UUID taskId) {
	Task task = taskRepository.findById(taskId).orElseThrow();
	User user = getCurrentUser();
	PersonalityProfile personality = personalityRepository.findByUserId(user.getId()).orElse(null);

	// Get ML prediction with SHAP
	MLExplainabilityResponse mlExplanation = mlClientService.explainPrediction(task, personality);

	// Get cognitive theory analysis
	Map<String, Object> context = buildContext(user);
	Map<String, Object> historicalPerf = buildHistoricalPerformance(user);
	CognitiveAnalysisResponse cognitiveAnalysis = mlClientService.analyzeCognitiveFactors(
	task, personality, context, historicalPerf
	);

	// Combine into comprehensive response
	return buildEnhancedPrediction(task, mlExplanation, cognitiveAnalysis);
	}
	```

	### Frontend Integration

	api.ts - Add cognitive analysis endpoint:

	```typescript
	async getCognitiveAnalysis(taskId: string): Promise<CognitiveAnalysisResponse> {
	return this.backendRequest<CognitiveAnalysisResponse>(
	`/predictions/task/${taskId}/cognitive`
	);
	}
	```

	PredictionExplainer.tsx - Display SHAP and cognitive insights:

	```typescript
	export function PredictionExplainer({ taskId }: { taskId: string }) {
	const { data: explanation } = useQuery(['explanation', taskId], () =>
	api.getTaskExplanation(taskId)
	);

	const { data: cognitive } = useQuery(['cognitive', taskId], () =>
	api.getCognitiveAnalysis(taskId)
	);

	return (
	<div className="space-y-6">
	{/* SHAP Waterfall Chart */}
	<ShapWaterfall data={explanation?.feature_attribution?.waterfall_data} />

	{/* Feature Contributions */}
	<FeatureContributions
	positive={explanation?.feature_attribution?.top_positive_features}
	negative={explanation?.feature_attribution?.top_negative_features}
	/>

	{/* Cognitive Load Analysis */}
	<CognitiveLoadCard analysis={cognitive?.cognitive_load_analysis} />

	{/* Flow State Indicator */}
	<FlowStateIndicator analysis={cognitive?.flow_state_analysis} />

	{/* Counterfactual Suggestions */}
	<CounterfactualSuggestions
	scenarios={explanation?.counterfactual_scenarios}
	/>

	{/* Integrated Recommendations */}
	<RecommendationCards
	recommendations={cognitive?.integrated_recommendations}
	/>
	</div>
	);
	}
	```

	## Model Training

	### Train Ensemble Models

	```bash
	cd ML
	python train_all_models.py
	```

	This will:
	1. Load synthetic or real training data
	2. Train ensemble models (GradientBoosting, RandomForest, XGBoost)
	3. Evaluate with cross-validation
	4. Save best models to `trained_models/`
	5. Generate SHAP feature importance plots

	### Collect Feedback for Continuous Learning

	```python
	# In your application, collect ground truth
	from feedback import feedback_collector

	feedback_collector.collect_feedback(
	task_id="task-123",
	predicted_probability=0.75,
	actual_completed=True,
	actual_on_time=True,
	user_rating=4,
	features=feature_vector
	)

	# Periodically retrain
	if feedback_collector.get_feedback_count() >= 1000:
	from training import train_ensemble_models
	train_ensemble_models(feedback_data)
	```

	## Testing

	### Unit Tests

	```bash
	cd ML
	pytest tests/test_cognitive_theory.py -v
	pytest tests/test_explainability.py -v
	```

	### Integration Tests

	```bash
	cd server
	mvn test -Dtest=PredictionServiceTest
	mvn test -Dtest=MLClientServiceTest
	```

	### End-to-End Tests

	```bash
	cd client
	pnpm test:e2e tests/predictions.spec.ts
	```

	## Monitoring

	### Prometheus Metrics

	```bash
	# View metrics
	curl http://localhost:8000/metrics
	```

	Key metrics:
	- `ml_http_requests_total` - Total requests
	- `ml_http_request_duration_seconds` - Latency
	- `ml_prediction_accuracy` - Model accuracy
	- `ml_shap_computation_time` - SHAP performance

	### Logging

	```python
	# ML service logs
	tail -f ML/logs/ml_service.log

	# Check SHAP method usage
	grep "SHAP" ML/logs/ml_service.log \| grep "method"
	```

	## Troubleshooting

	### SHAP Library Not Available

	If SHAP library is not installed, the system automatically falls back to weighted approximation:

	```
	INFO: SHAP library not installed; using approximation-based explainability.
	```

	To enable real SHAP:
	```bash
	pip install shap
	```

	### Model Not Found

	If trained models are not found, the system uses heuristic predictors:

	```
	WARNING: Trained model not found, using heuristic predictor
	```

	To train models:
	```bash
	cd ML
	python train_all_models.py
	```

	### Slow SHAP Computation

	For faster SHAP computation:
	1. Use TreeExplainer (faster than KernelExplainer)
	2. Enable caching in SHAPExplainer
	3. Use batch processing for multiple predictions

	### Memory Issues

	If running out of memory:
	1. Reduce batch size in predictions
	2. Use model quantization
	3. Increase server memory allocation

	## Performance Optimization

	### Caching

	```python
	# Enable Redis caching for predictions
	REDIS_URL = "redis://localhost:6379"
	cache = redis.Redis.from_url(REDIS_URL)

	@cache_result(ttl=3600)
	def get_prediction(task_id):
	return predict_task(task_id)
	```

	### Async Processing

	```python
	# Use async for non-blocking predictions
	@app.post("/ml/predict/async")
	async def predict_async(request: TaskPredictionRequest, background_tasks: BackgroundTasks):
	task_id = str(uuid.uuid4())
	background_tasks.add_task(process_prediction, task_id, request)
	return {"task_id": task_id, "status": "processing"}
	```

	### Batch Optimization

	```python
	# Process multiple tasks in one request
	@app.post("/ml/predict/batch")
	async def predict_batch(request: BatchPredictionRequest):
	# Extract features for all tasks at once
	feature_matrix = extract_features_batch(request.tasks)

	# Predict in batch (much faster)
	predictions = model.predict_proba(feature_matrix)

	# Compute SHAP in batch
	shap_values = explainer.shap_values(feature_matrix)

	return format_batch_response(predictions, shap_values)
	```

	## Security Considerations

	### Input Validation

	All inputs are validated using Pydantic models:

	```python
	class TaskPredictionRequest(BaseModel):
	title: str = Field(..., min_length=1, max_length=500)
	complexity: Optional[int] = Field(None, ge=1, le=5)
	estimated_duration: Optional[int] = Field(None, ge=1, le=1440)
	```

	### Rate Limiting

	```python
	from slowapi import Limiter
	from slowapi.util import get_remote_address

	limiter = Limiter(key_func=get_remote_address)

	@app.post("/ml/predict")
	@limiter.limit("100/minute")
	async def predict(request: TaskPredictionRequest):
	...
	```

	### Authentication

	Integrate with backend JWT authentication:

	```python
	from fastapi.security import HTTPBearer
	security = HTTPBearer()

	@app.post("/ml/predict")
	async def predict(request: TaskPredictionRequest, token: str = Depends(security)):
	# Verify token with backend
	user = verify_token(token)
	...
	```

	## Next Steps

	1. Deploy to Production: Use Docker Compose or Kubernetes
	2. Monitor Performance: Set up Grafana dashboards
	3. Collect Feedback: Implement user feedback collection
	4. Retrain Models: Schedule periodic retraining
	5. A/B Testing: Test different model versions
	6. Expand Features: Add more cognitive theory models

	## Support

	For issues or questions:
	- Check logs: `ML/logs/ml_service.log`
	- Review documentation: `COGNITIVE_THEORY_SHAP_IMPLEMENTATION.md`
	- Run tests: `pytest tests/ -v`
	- Check metrics: `http://localhost:8000/metrics`