| """
|
| ENHANCED ML LEARNER - UNIFIED LEARNING SYSTEM
|
| Integrates context manager, Python analyzer, Google learner, and feedback learner
|
| """
|
|
|
| import logging
|
| from typing import Dict, List, Optional, Any
|
| import json
|
| import os
|
| from datetime import datetime
|
|
|
| from context_manager import get_context_manager
|
| from python_analyzer import get_python_analyzer
|
| from google_search_learner import get_google_learner
|
| from feedback_learner import get_feedback_learner
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
| class EnhancedMLLearner:
|
| """Unified learning system that integrates all learning modules"""
|
|
|
| def __init__(self):
|
| self.context_manager = get_context_manager()
|
| self.python_analyzer = get_python_analyzer()
|
| self.google_learner = get_google_learner()
|
| self.feedback_learner = get_feedback_learner()
|
|
|
| self.learning_metrics = {
|
| 'context_awareness': 0.0,
|
| 'python_quality': 0.0,
|
| 'web_learning': 0.0,
|
| 'feedback_quality': 0.0,
|
| 'overall_improvement': 0.0
|
| }
|
|
|
| self.load_metrics()
|
|
|
| def process_message_with_learning(self, session_id: str, message: str,
|
| response: str, handler: str = '',
|
| intent: str = '', confidence: float = 0.0) -> Dict:
|
| """Process message and extract learning from all modules"""
|
|
|
| learning_result = {
|
| 'message': message,
|
| 'response': response,
|
| 'handler': handler,
|
| 'learning': {
|
| 'context': None,
|
| 'python_analysis': None,
|
| 'web_search': None,
|
| 'feedback_ready': False
|
| },
|
| 'improvements': []
|
| }
|
|
|
|
|
| context_msg = self.context_manager.add_message(
|
| session_id, message, response, intent, confidence
|
| )
|
| learning_result['learning']['context'] = {
|
| 'added': True,
|
| 'message_number': context_msg['turn_number'],
|
| 'tokens': context_msg['tokens']
|
| }
|
|
|
|
|
| if self._is_python_question(message) or self._contains_python_code(response):
|
| analysis = self.python_analyzer.analyze_python_code(response)
|
| if analysis['syntax_valid']:
|
| learning_result['learning']['python_analysis'] = {
|
| 'quality_score': analysis['quality_score'],
|
| 'complexity': analysis['complexity'],
|
| 'issues_found': len(analysis['issues']),
|
| 'suggestions': analysis['suggestions'][:3]
|
| }
|
| learning_result['improvements'].append({
|
| 'type': 'python_quality',
|
| 'score_before': 0,
|
| 'score_after': analysis['quality_score']
|
| })
|
|
|
|
|
| if self._is_knowledge_question(message):
|
| search_result = self.google_learner.search_and_learn(message, num_results=3)
|
| if search_result.get('results'):
|
| learning_result['learning']['web_search'] = {
|
| 'searched': True,
|
| 'results_found': len(search_result['results']),
|
| 'learned_topics': self._extract_topics(message)
|
| }
|
| learning_result['improvements'].append({
|
| 'type': 'web_learning',
|
| 'topics_learned': len(learning_result['learning']['web_search']['learned_topics'])
|
| })
|
|
|
|
|
| learning_result['learning']['feedback_ready'] = True
|
|
|
| return learning_result
|
|
|
| def handle_user_feedback(self, session_id: str, message_id: str, handler: str,
|
| response: str, feedback: Dict) -> Dict:
|
| """Process user feedback and learn from it"""
|
|
|
| feedback_type = feedback.get('type', 'neutral')
|
| rating = feedback.get('rating')
|
| comment = feedback.get('comment', '')
|
| corrections = feedback.get('corrections')
|
|
|
|
|
| feedback_record = self.feedback_learner.record_feedback(
|
| session_id, message_id, handler, response,
|
| feedback_type, rating, comment, corrections
|
| )
|
|
|
|
|
| if corrections:
|
| improved = self.feedback_learner.apply_learned_improvements(handler, response)
|
| else:
|
| improved = {'applied': []}
|
|
|
|
|
| self._update_learning_metrics()
|
|
|
| return {
|
| 'feedback_recorded': True,
|
| 'handler': handler,
|
| 'feedback_type': feedback_type,
|
| 'improvements_applied': len(improved['applied']),
|
| 'quality_score': self.feedback_learner.get_handler_quality_score(handler)
|
| }
|
|
|
| def get_context_aware_response(self, session_id: str, message: str) -> Dict:
|
| """Get context-aware response using conversation history"""
|
|
|
| context = self.context_manager.get_context(session_id, num_messages=5)
|
|
|
| return {
|
| 'message': message,
|
| 'context': context['context_text'],
|
| 'message_count': context['message_count'],
|
| 'patterns_found': len(context['patterns']),
|
| 'context_prompt': self.context_manager.get_context_aware_prompt(message, session_id),
|
| 'should_optimize': self.context_manager.should_optimize_response(),
|
| 'token_status': self.context_manager.get_token_status()
|
| }
|
|
|
| def improve_python_code(self, code: str) -> Dict:
|
| """Analyze and improve Python code"""
|
|
|
| analysis = self.python_analyzer.analyze_python_code(code)
|
| improved = self.python_analyzer.improve_python_code(code)
|
|
|
| return {
|
| 'original': code,
|
| 'analysis': analysis,
|
| 'improved': improved['improved'],
|
| 'quality_before': 0,
|
| 'quality_after': improved['quality_after'],
|
| 'improvements_made': improved['improvements']
|
| }
|
|
|
| def learn_from_web(self, topic: str, query: str) -> Dict:
|
| """Learn from web search on a topic"""
|
|
|
| search_result = self.google_learner.search_and_learn(query)
|
|
|
| if search_result.get('results'):
|
| knowledge = self.google_learner.extract_knowledge_from_text(
|
| self._combine_results(search_result['results']),
|
| topic
|
| )
|
| else:
|
| knowledge = None
|
|
|
| return {
|
| 'topic': topic,
|
| 'query': query,
|
| 'search_success': search_result.get('success'),
|
| 'results_found': len(search_result.get('results', [])),
|
| 'knowledge': knowledge,
|
| 'learning_stats': self.google_learner.get_learning_stats()
|
| }
|
|
|
| def generate_improved_python_from_requirement(self, requirement: str) -> Dict:
|
| """Generate improved Python code from natural language"""
|
|
|
| result = self.python_analyzer.generate_python_from_requirement(requirement)
|
|
|
| if result['success']:
|
|
|
| analysis = self.python_analyzer.analyze_python_code(result['code'])
|
|
|
| result['analysis'] = analysis
|
| result['quality'] = analysis['quality_score']
|
|
|
| return result
|
|
|
| def get_overall_learning_status(self) -> Dict:
|
| """Get overall learning status across all modules"""
|
|
|
| context_summary = self.context_manager.get_summary()
|
| feedback_summary = self.feedback_learner.get_feedback_summary()
|
| google_stats = self.google_learner.get_learning_stats()
|
| python_patterns = len(self.python_analyzer.learned_patterns)
|
|
|
| return {
|
| 'context': context_summary,
|
| 'feedback_quality': feedback_summary,
|
| 'web_learning': google_stats,
|
| 'python_patterns_learned': python_patterns,
|
| 'learning_metrics': self.learning_metrics,
|
| 'timestamp': datetime.now().isoformat()
|
| }
|
|
|
| def _is_python_question(self, message: str) -> bool:
|
| """Check if message is about Python"""
|
| python_keywords = ['python', 'code', 'function', 'class', 'def', 'py']
|
| return any(kw in message.lower() for kw in python_keywords)
|
|
|
| def _contains_python_code(self, text: str) -> bool:
|
| """Check if text contains Python code"""
|
| python_patterns = ['def ', 'class ', 'import ', 'for ', 'while ', 'if ']
|
| return any(pattern in text for pattern in python_patterns)
|
|
|
| def _is_knowledge_question(self, message: str) -> bool:
|
| """Check if message is a knowledge question"""
|
| keywords = ['what', 'how', 'why', 'where', 'when', 'explain', 'tell me', 'what is']
|
| return any(kw in message.lower() for kw in keywords)
|
|
|
| def _extract_topics(self, text: str) -> List[str]:
|
| """Extract topics from text"""
|
| topics = []
|
|
|
| topic_keywords = {
|
| 'python': ['python', 'code', 'programming'],
|
| 'web': ['web', 'html', 'css', 'javascript'],
|
| 'database': ['database', 'sql', 'data'],
|
| 'machine_learning': ['machine learning', 'ml', 'neural', 'ai'],
|
| 'general': ['general', 'common', 'basic']
|
| }
|
|
|
| text_lower = text.lower()
|
| for topic, keywords in topic_keywords.items():
|
| if any(kw in text_lower for kw in keywords):
|
| topics.append(topic)
|
|
|
| return topics
|
|
|
| def _combine_results(self, results: List) -> str:
|
| """Combine multiple search results into single text"""
|
| combined = []
|
| for result in results[:5]:
|
| if isinstance(result, dict) and 'snippet' in result:
|
| combined.append(result['snippet'])
|
| return ' '.join(combined)
|
|
|
| def _update_learning_metrics(self):
|
| """Update overall learning metrics"""
|
|
|
|
|
| context_summary = self.context_manager.get_summary()
|
| self.learning_metrics['context_awareness'] = min(
|
| 1.0,
|
| context_summary['total_interactions'] / 100.0
|
| )
|
|
|
|
|
| if self.python_analyzer.learned_patterns:
|
| avg_quality = sum(
|
| p['quality'] for p in self.python_analyzer.learned_patterns.values()
|
| ) / len(self.python_analyzer.learned_patterns)
|
| self.learning_metrics['python_quality'] = avg_quality / 100.0
|
|
|
|
|
| google_stats = self.google_learner.get_learning_stats()
|
| self.learning_metrics['web_learning'] = min(
|
| 1.0,
|
| google_stats['topics_learned'] / 50.0
|
| )
|
|
|
|
|
| feedback_summary = self.feedback_learner.get_feedback_summary()
|
| if feedback_summary['overall_quality'] > 0:
|
| self.learning_metrics['feedback_quality'] = feedback_summary['overall_quality']
|
|
|
|
|
| scores = [v for k, v in self.learning_metrics.items() if k != 'overall_improvement']
|
| self.learning_metrics['overall_improvement'] = sum(scores) / len(scores) if scores else 0
|
|
|
| def save_all_learning(self):
|
| """Save all learning data to disk"""
|
| self.context_manager.save_context()
|
| self.python_analyzer.save_learned_patterns()
|
| self.google_learner.save_knowledge_cache()
|
| self.feedback_learner.save_feedback_history()
|
| self._save_metrics()
|
|
|
| logger.info("All learning data saved")
|
|
|
| def _save_metrics(self):
|
| """Save learning metrics"""
|
| try:
|
| os.makedirs('noahski_data', exist_ok=True)
|
|
|
| metrics_file = 'noahski_data/learning_metrics.json'
|
| with open(metrics_file, 'w', encoding='utf-8') as f:
|
| json.dump({
|
| 'metrics': self.learning_metrics,
|
| 'timestamp': datetime.now().isoformat()
|
| }, f, indent=2)
|
|
|
| logger.info("Learning metrics saved")
|
| except Exception as e:
|
| logger.error(f"Error saving metrics: {e}")
|
|
|
| def load_metrics(self):
|
| """Load learning metrics from file"""
|
| try:
|
| metrics_file = 'noahski_data/learning_metrics.json'
|
| if os.path.exists(metrics_file):
|
| with open(metrics_file, 'r', encoding='utf-8') as f:
|
| data = json.load(f)
|
|
|
| self.learning_metrics = data.get('metrics', self.learning_metrics)
|
| logger.info("Learning metrics loaded")
|
| except Exception as e:
|
| logger.error(f"Error loading metrics: {e}")
|
|
|
|
|
|
|
| _enhanced_learner = None
|
|
|
| def get_enhanced_ml_learner() -> EnhancedMLLearner:
|
| """Get or create global enhanced ML learner"""
|
| global _enhanced_learner
|
| if _enhanced_learner is None:
|
| _enhanced_learner = EnhancedMLLearner()
|
| return _enhanced_learner
|
|
|