Spaces:
Sleeping
Sleeping
Update self_learning_bot.py
Browse files- self_learning_bot.py +389 -361
self_learning_bot.py
CHANGED
|
@@ -8,345 +8,243 @@ import math
|
|
| 8 |
import hashlib
|
| 9 |
import requests
|
| 10 |
from collections import deque
|
| 11 |
-
import time
|
| 12 |
-
from typing import Dict, List, Any, Tuple
|
| 13 |
from bs4 import BeautifulSoup
|
| 14 |
import urllib.parse
|
|
|
|
| 15 |
|
| 16 |
-
class
|
| 17 |
-
def __init__(self, state_file="/tmp/
|
| 18 |
self.state_file = state_file
|
| 19 |
-
self.conversation_memory = deque(maxlen=
|
| 20 |
self.learned_patterns = {}
|
| 21 |
self.response_memory = {}
|
| 22 |
-
self.reward_history = deque(maxlen=
|
| 23 |
-
self.web_cache = {}
|
| 24 |
|
| 25 |
# Learning parameters
|
| 26 |
self.learning_rate = 0.3
|
| 27 |
self.exploration_rate = 0.1
|
| 28 |
|
| 29 |
-
# Web
|
| 30 |
-
self.search_timeout =
|
| 31 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# Load existing state
|
| 34 |
self.load_state()
|
| 35 |
|
| 36 |
-
print(f"
|
| 37 |
|
| 38 |
-
def
|
| 39 |
-
"""Main chat method
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Fallback to learned responses
|
| 53 |
response = self._get_learned_response(user_input)
|
| 54 |
-
self._store_interaction(user_input, response, 0.5,
|
| 55 |
-
return response, False
|
| 56 |
|
| 57 |
-
def _should_search_web(self, user_input
|
| 58 |
"""Determine if we should search the web for this query"""
|
| 59 |
input_lower = user_input.lower()
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
'
|
| 64 |
-
'
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
'what is', 'who is', 'where is', 'when was', 'how to', 'why does',
|
| 70 |
-
'explain', 'tell me about', 'information about', 'details about'
|
| 71 |
-
]
|
| 72 |
-
|
| 73 |
-
# Specific topics that change frequently
|
| 74 |
-
dynamic_topics = [
|
| 75 |
-
'weather', 'temperature', 'forecast', 'stock', 'price', 'crypto',
|
| 76 |
-
'sports', 'game', 'score', 'election', 'politics', 'celebrity'
|
| 77 |
]
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
try:
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
except Exception as e:
|
| 111 |
-
print(f"
|
| 112 |
|
| 113 |
-
return
|
| 114 |
|
| 115 |
-
def
|
| 116 |
-
"""Search
|
| 117 |
try:
|
| 118 |
-
#
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
"Accept": "application/json",
|
| 122 |
-
"X-Subscription-Token": "BSA-Your-Free-Key-Here" # Get free key from brave.com
|
| 123 |
-
}
|
| 124 |
-
params = {
|
| 125 |
-
"q": query,
|
| 126 |
-
"count": self.max_results
|
| 127 |
-
}
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
data = response.json()
|
| 132 |
-
results = []
|
| 133 |
-
for web_result in data.get('web', {}).get('results', [])[:self.max_results]:
|
| 134 |
-
results.append({
|
| 135 |
-
'title': web_result.get('title', ''),
|
| 136 |
-
'url': web_result.get('url', ''),
|
| 137 |
-
'description': web_result.get('description', '')
|
| 138 |
-
})
|
| 139 |
-
return {'results': results}
|
| 140 |
-
except:
|
| 141 |
-
pass
|
| 142 |
-
return {}
|
| 143 |
-
|
| 144 |
-
def _search_duckduckgo(self, query: str) -> Dict[str, Any]:
|
| 145 |
-
"""Fallback to DuckDuckGo instant answers and web results"""
|
| 146 |
-
try:
|
| 147 |
-
# DuckDuckGo Instant Answer API
|
| 148 |
-
ia_url = f"https://api.duckduckgo.com/"
|
| 149 |
-
params = {
|
| 150 |
-
"q": query,
|
| 151 |
-
"format": "json",
|
| 152 |
-
"no_html": "1",
|
| 153 |
-
"skip_disambig": "1"
|
| 154 |
-
}
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
data = response.json()
|
| 159 |
-
|
| 160 |
-
# Check for instant answer
|
| 161 |
-
if data.get('AbstractText'):
|
| 162 |
-
return {
|
| 163 |
-
'results': [{
|
| 164 |
-
'title': data.get('Heading', 'Instant Answer'),
|
| 165 |
-
'url': data.get('AbstractURL', ''),
|
| 166 |
-
'description': data.get('AbstractText', '')
|
| 167 |
-
}]
|
| 168 |
-
}
|
| 169 |
-
|
| 170 |
-
# Check for related topics
|
| 171 |
-
if data.get('RelatedTopics'):
|
| 172 |
-
for topic in data['RelatedTopics'][:self.max_results]:
|
| 173 |
-
if topic.get('Text'):
|
| 174 |
-
return {
|
| 175 |
-
'results': [{
|
| 176 |
-
'title': topic.get('FirstURL', '').split('/')[-1].replace('_', ' '),
|
| 177 |
-
'url': topic.get('FirstURL', ''),
|
| 178 |
-
'description': topic.get('Text', '')
|
| 179 |
-
}]
|
| 180 |
-
}
|
| 181 |
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
except Exception as e:
|
| 186 |
-
print(f"
|
| 187 |
-
|
|
|
|
| 188 |
|
| 189 |
-
def
|
| 190 |
-
"""
|
| 191 |
try:
|
| 192 |
url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
|
| 193 |
headers = {
|
| 194 |
-
'User-Agent':
|
| 195 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
| 196 |
-
'Accept-Language': 'en-US,en;q=0.5',
|
| 197 |
}
|
| 198 |
|
| 199 |
response = requests.get(url, headers=headers, timeout=self.search_timeout)
|
| 200 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 201 |
|
| 202 |
results = []
|
| 203 |
-
for result in soup.find_all('div', class_='result')[:
|
| 204 |
title_elem = result.find('a', class_='result__a')
|
| 205 |
snippet_elem = result.find('a', class_='result__snippet')
|
| 206 |
|
| 207 |
if title_elem and snippet_elem:
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
'description': snippet_elem.get_text().strip()
|
| 212 |
-
})
|
| 213 |
|
| 214 |
-
return
|
| 215 |
|
| 216 |
except Exception as e:
|
| 217 |
-
print(f"
|
| 218 |
-
return
|
| 219 |
-
|
| 220 |
-
def _extract_meaningful_content(self, result: Dict) -> str:
|
| 221 |
-
"""Extract meaningful content from search result"""
|
| 222 |
-
title = result.get('title', '')
|
| 223 |
-
description = result.get('description', '')
|
| 224 |
-
url = result.get('url', '')
|
| 225 |
-
|
| 226 |
-
# Combine title and description for context
|
| 227 |
-
content_parts = []
|
| 228 |
-
if title:
|
| 229 |
-
content_parts.append(title)
|
| 230 |
-
if description:
|
| 231 |
-
content_parts.append(description)
|
| 232 |
-
|
| 233 |
-
full_content = ". ".join(content_parts)
|
| 234 |
-
|
| 235 |
-
# Clean up the content
|
| 236 |
-
full_content = re.sub(r'\[\d+\]', '', full_content) # Remove citation numbers
|
| 237 |
-
full_content = re.sub(r'\s+', ' ', full_content) # Normalize whitespace
|
| 238 |
-
|
| 239 |
-
return full_content.strip()
|
| 240 |
-
|
| 241 |
-
def _create_web_answer(self, user_input: str, web_content: Dict) -> str:
|
| 242 |
-
"""Create an actual answer using web content"""
|
| 243 |
-
content = web_content.get('content', '')
|
| 244 |
-
source = web_content.get('source', '')
|
| 245 |
-
|
| 246 |
-
if not content:
|
| 247 |
-
return "I searched but couldn't find specific information about that. Could you try rephrasing your question?"
|
| 248 |
-
|
| 249 |
-
# Analyze the type of question and create appropriate response
|
| 250 |
-
question_type = self._analyze_question_type(user_input)
|
| 251 |
-
|
| 252 |
-
if question_type == "factual":
|
| 253 |
-
return self._format_factual_answer(user_input, content, source)
|
| 254 |
-
elif question_type == "current_events":
|
| 255 |
-
return self._format_current_events_answer(user_input, content, source)
|
| 256 |
-
elif question_type == "how_to":
|
| 257 |
-
return self._format_how_to_answer(user_input, content, source)
|
| 258 |
-
elif question_type == "weather":
|
| 259 |
-
return self._format_weather_answer(user_input, content, source)
|
| 260 |
-
else:
|
| 261 |
-
return self._format_general_answer(user_input, content, source)
|
| 262 |
-
|
| 263 |
-
def _analyze_question_type(self, user_input: str) -> str:
|
| 264 |
-
"""Analyze what type of question this is"""
|
| 265 |
-
input_lower = user_input.lower()
|
| 266 |
-
|
| 267 |
-
if any(word in input_lower for word in ['weather', 'temperature', 'forecast']):
|
| 268 |
-
return "weather"
|
| 269 |
-
elif any(word in input_lower for word in ['how to', 'how do i', 'tutorial', 'guide']):
|
| 270 |
-
return "how_to"
|
| 271 |
-
elif any(word in input_lower for word in ['news', 'current', 'recent', 'breaking', 'today']):
|
| 272 |
-
return "current_events"
|
| 273 |
-
elif any(word in input_lower for word in ['what is', 'who is', 'where is', 'when was']):
|
| 274 |
-
return "factual"
|
| 275 |
-
else:
|
| 276 |
-
return "general"
|
| 277 |
-
|
| 278 |
-
def _format_factual_answer(self, question: str, content: str, source: str) -> str:
|
| 279 |
-
"""Format factual answers"""
|
| 280 |
-
# Extract the most relevant sentence
|
| 281 |
-
sentences = content.split('. ')
|
| 282 |
-
relevant_sentence = sentences[0] if sentences else content
|
| 283 |
-
|
| 284 |
-
answer = f"**According to web sources:** {relevant_sentence}"
|
| 285 |
-
|
| 286 |
-
if len(sentences) > 1:
|
| 287 |
-
additional_info = '. '.join(sentences[1:3])
|
| 288 |
-
answer += f" {additional_info}."
|
| 289 |
-
|
| 290 |
-
return answer
|
| 291 |
|
| 292 |
-
def
|
| 293 |
-
"""
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
instructions.append(sentence)
|
| 311 |
-
|
| 312 |
-
if instructions:
|
| 313 |
-
answer = "**Here's what I found:**\n" + "\n".join(f"• {inst}" for inst in instructions[:3])
|
| 314 |
-
else:
|
| 315 |
-
answer = f"**Based on available information:** {sentences[0] if sentences else content}"
|
| 316 |
-
|
| 317 |
-
return answer
|
| 318 |
-
|
| 319 |
-
def _format_weather_answer(self, question: str, content: str, source: str) -> str:
|
| 320 |
-
"""Format weather-related answers"""
|
| 321 |
-
# Extract location from question
|
| 322 |
-
location = self._extract_location(question)
|
| 323 |
-
|
| 324 |
-
# Look for temperature and conditions in content
|
| 325 |
-
temp_match = re.search(r'(\d+)\s*°?[CF]', content)
|
| 326 |
-
condition_match = re.search(r'(sunny|rain|cloud|snow|clear|storm)', content.lower())
|
| 327 |
-
|
| 328 |
-
answer_parts = []
|
| 329 |
-
if location:
|
| 330 |
-
answer_parts.append(f"**Weather for {location}:**")
|
| 331 |
-
|
| 332 |
-
if temp_match:
|
| 333 |
-
answer_parts.append(f"Temperature around {temp_match.group(1)}°F")
|
| 334 |
-
|
| 335 |
-
if condition_match:
|
| 336 |
-
answer_parts.append(f"Conditions: {condition_match.group(1).title()}")
|
| 337 |
|
| 338 |
-
|
| 339 |
-
return " ".join(answer_parts) + f"\n*Source: {source}*" if source else ""
|
| 340 |
-
else:
|
| 341 |
-
return f"**Weather information:** {content[:300]}"
|
| 342 |
-
|
| 343 |
-
def _format_general_answer(self, question: str, content: str, source: str) -> str:
|
| 344 |
-
"""Format general answers"""
|
| 345 |
-
return f"**I found this information:** {content[:500]}" + ("..." if len(content) > 500 else "")
|
| 346 |
|
| 347 |
-
def _extract_location(self, text
|
| 348 |
-
"""Extract location from text
|
| 349 |
-
# Common city/country names
|
| 350 |
locations = {
|
| 351 |
'new york', 'london', 'paris', 'tokyo', 'berlin', 'sydney', 'toronto',
|
| 352 |
'mumbai', 'beijing', 'moscow', 'dubai', 'rome', 'madrid', 'amsterdam',
|
|
@@ -356,15 +254,115 @@ class WebEnhancedBot:
|
|
| 356 |
text_lower = text.lower()
|
| 357 |
for location in locations:
|
| 358 |
if location in text_lower:
|
| 359 |
-
return location
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
|
| 361 |
return ""
|
| 362 |
|
| 363 |
-
def
|
| 364 |
-
"""
|
| 365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
similar_patterns = self._find_similar_patterns(user_input)
|
| 369 |
if similar_patterns:
|
| 370 |
best_pattern = max(similar_patterns, key=lambda x: x[1]['score'])
|
|
@@ -372,106 +370,116 @@ class WebEnhancedBot:
|
|
| 372 |
return best_pattern[1]['response']
|
| 373 |
|
| 374 |
# Generate contextual response
|
| 375 |
-
return self._generate_contextual_response(user_input
|
| 376 |
|
| 377 |
-
def
|
| 378 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
if context['has_question']:
|
| 380 |
responses = [
|
| 381 |
-
"That's an interesting question. Based on
|
| 382 |
-
"I
|
| 383 |
-
"That's a great question.
|
| 384 |
]
|
| 385 |
-
|
| 386 |
|
| 387 |
if context['topics']:
|
| 388 |
-
|
|
|
|
| 389 |
else:
|
| 390 |
-
return
|
| 391 |
-
|
| 392 |
-
# Conversational responses
|
| 393 |
-
|
| 394 |
-
"I
|
| 395 |
-
"That's interesting.
|
| 396 |
-
"I
|
| 397 |
-
"That's fascinating. I'm learning from our conversation."
|
|
|
|
| 398 |
]
|
| 399 |
-
|
|
|
|
| 400 |
|
| 401 |
-
def _analyze_input(self, text
|
| 402 |
-
"""Analyze user input"""
|
|
|
|
|
|
|
| 403 |
return {
|
| 404 |
-
'words':
|
| 405 |
'topics': self._extract_topics(text),
|
| 406 |
'has_question': '?' in text,
|
| 407 |
-
'sentiment': self._analyze_sentiment(text)
|
|
|
|
| 408 |
}
|
| 409 |
|
| 410 |
-
def _extract_topics(self, text
|
| 411 |
"""Extract topics from text"""
|
| 412 |
topics = []
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
'
|
| 417 |
-
'
|
| 418 |
-
'
|
|
|
|
|
|
|
| 419 |
}
|
| 420 |
|
| 421 |
-
|
| 422 |
-
for topic, keywords in topic_keywords.items():
|
| 423 |
if any(keyword in text_lower for keyword in keywords):
|
| 424 |
topics.append(topic)
|
| 425 |
|
| 426 |
return topics
|
| 427 |
|
| 428 |
-
def _analyze_sentiment(self, text
|
| 429 |
"""Basic sentiment analysis"""
|
| 430 |
-
|
| 431 |
-
|
| 432 |
|
| 433 |
-
|
| 434 |
-
|
|
|
|
| 435 |
|
| 436 |
-
if
|
| 437 |
return "positive"
|
| 438 |
-
elif
|
| 439 |
return "negative"
|
| 440 |
else:
|
| 441 |
return "neutral"
|
| 442 |
|
| 443 |
-
def
|
| 444 |
-
"""Find similar learned patterns"""
|
| 445 |
-
similar = []
|
| 446 |
-
text_words = set(text.split())
|
| 447 |
-
|
| 448 |
-
for pattern, data in self.learned_patterns.items():
|
| 449 |
-
pattern_words = set(pattern.split())
|
| 450 |
-
similarity = len(text_words & pattern_words) / len(text_words | pattern_words)
|
| 451 |
-
if similarity > 0.3:
|
| 452 |
-
similar.append((pattern, data))
|
| 453 |
-
|
| 454 |
-
return similar
|
| 455 |
-
|
| 456 |
-
def _store_interaction(self, user_input: str, response: str, reward: float, web_context: dict):
|
| 457 |
"""Store interaction in memory"""
|
| 458 |
interaction = {
|
| 459 |
'input': user_input,
|
| 460 |
'response': response,
|
| 461 |
'reward': reward,
|
| 462 |
-
'
|
| 463 |
'timestamp': datetime.now().isoformat()
|
| 464 |
}
|
| 465 |
|
| 466 |
self.conversation_memory.append(interaction)
|
| 467 |
-
|
| 468 |
-
# Learn from this interaction
|
| 469 |
self._update_learning(user_input, response, reward)
|
| 470 |
|
| 471 |
-
def _update_learning(self, user_input
|
| 472 |
"""Update learning from interaction"""
|
| 473 |
-
# Extract key phrases for pattern learning
|
| 474 |
-
words = [
|
| 475 |
if words:
|
| 476 |
pattern = ' '.join(words)
|
| 477 |
|
|
@@ -482,58 +490,78 @@ class WebEnhancedBot:
|
|
| 482 |
'count': 1
|
| 483 |
}
|
| 484 |
else:
|
| 485 |
-
|
| 486 |
-
new_score = (
|
| 487 |
self.learned_patterns[pattern]['score'] = new_score
|
| 488 |
self.learned_patterns[pattern]['count'] += 1
|
| 489 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
# Store reward
|
| 491 |
self.reward_history.append(reward)
|
| 492 |
|
| 493 |
-
#
|
| 494 |
if len(self.conversation_memory) % 10 == 0:
|
| 495 |
self.save_state()
|
| 496 |
|
| 497 |
-
def learn_from_feedback(self, user_input
|
| 498 |
-
"""Learn from explicit feedback"""
|
| 499 |
if self.conversation_memory:
|
| 500 |
-
recent
|
| 501 |
-
|
| 502 |
-
|
|
|
|
| 503 |
|
| 504 |
-
def get_learning_stats(self)
|
| 505 |
"""Get learning statistics"""
|
| 506 |
recent_rewards = list(self.reward_history)[-10:] or [0.5]
|
| 507 |
|
| 508 |
return {
|
| 509 |
'patterns': len(self.learned_patterns),
|
| 510 |
'memory_size': len(self.conversation_memory),
|
| 511 |
-
'avg_score': np.mean(recent_rewards),
|
| 512 |
'recent_rewards': len([r for r in recent_rewards if r > 0.7])
|
| 513 |
}
|
| 514 |
|
| 515 |
def save_state(self):
|
| 516 |
-
"""Save learning state"""
|
| 517 |
try:
|
| 518 |
state = {
|
| 519 |
'learned_patterns': self.learned_patterns,
|
|
|
|
| 520 |
'conversation_memory': list(self.conversation_memory),
|
| 521 |
-
'reward_history': list(self.reward_history)
|
|
|
|
| 522 |
}
|
| 523 |
with open(self.state_file, 'w') as f:
|
| 524 |
json.dump(state, f, indent=2)
|
| 525 |
except Exception as e:
|
| 526 |
-
print(f"
|
| 527 |
|
| 528 |
def load_state(self):
|
| 529 |
-
"""Load learning state"""
|
| 530 |
try:
|
| 531 |
if os.path.exists(self.state_file):
|
| 532 |
with open(self.state_file, 'r') as f:
|
| 533 |
state = json.load(f)
|
| 534 |
|
| 535 |
self.learned_patterns = state.get('learned_patterns', {})
|
| 536 |
-
self.
|
| 537 |
-
self.
|
| 538 |
-
|
| 539 |
-
|
|
|
|
|
|
|
|
|
| 8 |
import hashlib
|
| 9 |
import requests
|
| 10 |
from collections import deque
|
|
|
|
|
|
|
| 11 |
from bs4 import BeautifulSoup
|
| 12 |
import urllib.parse
|
| 13 |
+
import feedparser
|
| 14 |
|
| 15 |
+
class FreeWebBot:
|
| 16 |
+
def __init__(self, state_file="/tmp/free_bot_state.json"):
|
| 17 |
self.state_file = state_file
|
| 18 |
+
self.conversation_memory = deque(maxlen=200)
|
| 19 |
self.learned_patterns = {}
|
| 20 |
self.response_memory = {}
|
| 21 |
+
self.reward_history = deque(maxlen=300)
|
|
|
|
| 22 |
|
| 23 |
# Learning parameters
|
| 24 |
self.learning_rate = 0.3
|
| 25 |
self.exploration_rate = 0.1
|
| 26 |
|
| 27 |
+
# Web settings
|
| 28 |
+
self.search_timeout = 10
|
| 29 |
+
self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 30 |
+
|
| 31 |
+
# Free news sources (RSS feeds)
|
| 32 |
+
self.news_feeds = {
|
| 33 |
+
"general": [
|
| 34 |
+
"https://feeds.bbci.co.uk/news/rss.xml",
|
| 35 |
+
"https://rss.cnn.com/rss/edition.rss",
|
| 36 |
+
"https://feeds.reuters.com/reuters/topNews",
|
| 37 |
+
],
|
| 38 |
+
"technology": [
|
| 39 |
+
"https://feeds.arstechnica.com/arstechnica/index",
|
| 40 |
+
"https://techcrunch.com/feed/",
|
| 41 |
+
],
|
| 42 |
+
"sports": [
|
| 43 |
+
"https://feeds.espn.com/espn/rss/news",
|
| 44 |
+
]
|
| 45 |
+
}
|
| 46 |
|
| 47 |
# Load existing state
|
| 48 |
self.load_state()
|
| 49 |
|
| 50 |
+
print(f"Free web bot initialized with {len(self.learned_patterns)} learned patterns")
|
| 51 |
|
| 52 |
+
def chat(self, user_input, use_web_search=True):
|
| 53 |
+
"""Main chat method - returns (response, search_used, sources)"""
|
| 54 |
+
user_input = user_input.strip()
|
| 55 |
+
if not user_input:
|
| 56 |
+
return "Please enter a message.", False, []
|
| 57 |
+
|
| 58 |
+
# First, try factual responses
|
| 59 |
+
factual_response = self._get_factual_response(user_input)
|
| 60 |
+
if factual_response:
|
| 61 |
+
return factual_response, False, []
|
| 62 |
+
|
| 63 |
+
# Try free web search for current information
|
| 64 |
+
if use_web_search and self._should_search_web(user_input):
|
| 65 |
+
web_content, sources = self._free_web_search(user_input)
|
| 66 |
+
if web_content and web_content.strip():
|
| 67 |
+
response = self._create_web_answer(user_input, web_content, sources)
|
| 68 |
+
self._store_interaction(user_input, response, 0.8, sources)
|
| 69 |
+
return response, True, sources
|
| 70 |
|
| 71 |
# Fallback to learned responses
|
| 72 |
response = self._get_learned_response(user_input)
|
| 73 |
+
self._store_interaction(user_input, response, 0.5, [])
|
| 74 |
+
return response, False, []
|
| 75 |
|
| 76 |
+
def _should_search_web(self, user_input):
|
| 77 |
"""Determine if we should search the web for this query"""
|
| 78 |
input_lower = user_input.lower()
|
| 79 |
|
| 80 |
+
search_triggers = [
|
| 81 |
+
'news', 'current', 'latest', 'today', 'recent', 'update',
|
| 82 |
+
'weather', 'forecast', 'temperature',
|
| 83 |
+
'sports', 'score', 'game', 'match',
|
| 84 |
+
'stock', 'crypto', 'bitcoin', 'price',
|
| 85 |
+
'how to', 'tutorial', 'guide', 'explain',
|
| 86 |
+
'what is', 'who is', 'where is', 'when was',
|
| 87 |
+
'breaking', 'headlines'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
]
|
| 89 |
|
| 90 |
+
return any(trigger in input_lower for trigger in search_triggers)
|
| 91 |
+
|
| 92 |
+
def _free_web_search(self, query):
|
| 93 |
+
"""Perform free web search using multiple methods"""
|
| 94 |
+
sources = []
|
| 95 |
+
all_content = []
|
| 96 |
+
|
| 97 |
+
# Method 1: RSS Feeds for news/current events
|
| 98 |
+
if any(topic in query.lower() for topic in ['news', 'current', 'latest', 'today', 'breaking']):
|
| 99 |
+
feed_content = self._search_rss_feeds(query)
|
| 100 |
+
all_content.extend(feed_content)
|
| 101 |
+
if feed_content:
|
| 102 |
+
sources.append("News Feeds")
|
| 103 |
+
|
| 104 |
+
# Method 2: Wikipedia for factual information
|
| 105 |
+
if any(word in query.lower() for word in ['what is', 'who is', 'explain', 'definition']):
|
| 106 |
+
wiki_content = self._search_wikipedia(query)
|
| 107 |
+
if wiki_content:
|
| 108 |
+
all_content.append(wiki_content)
|
| 109 |
+
sources.append("Wikipedia")
|
| 110 |
+
|
| 111 |
+
# Method 3: DuckDuckGo for general search
|
| 112 |
+
ddg_content = self._search_duckduckgo(query)
|
| 113 |
+
if ddg_content:
|
| 114 |
+
all_content.append(ddg_content)
|
| 115 |
+
sources.append("Web Search")
|
| 116 |
+
|
| 117 |
+
# Method 4: Weather information
|
| 118 |
+
if any(word in query.lower() for word in ['weather', 'temperature', 'forecast']):
|
| 119 |
+
weather_content = self._get_weather_info(query)
|
| 120 |
+
if weather_content:
|
| 121 |
+
all_content.append(weather_content)
|
| 122 |
+
sources.append("Weather Service")
|
| 123 |
|
| 124 |
+
# Combine all content
|
| 125 |
+
combined_content = " ".join(all_content)
|
| 126 |
+
return combined_content, sources
|
| 127 |
+
|
| 128 |
+
def _search_rss_feeds(self, query):
|
| 129 |
+
"""Search RSS feeds for current information"""
|
| 130 |
+
content = []
|
| 131 |
+
query_words = query.lower().split()
|
| 132 |
+
|
| 133 |
+
# Determine feed category based on query
|
| 134 |
+
category = "general"
|
| 135 |
+
if any(word in query.lower() for word in ['tech', 'technology', 'ai', 'computer', 'software']):
|
| 136 |
+
category = "technology"
|
| 137 |
+
elif any(word in query.lower() for word in ['sports', 'game', 'score', 'match', 'team']):
|
| 138 |
+
category = "sports"
|
| 139 |
+
|
| 140 |
try:
|
| 141 |
+
for feed_url in self.news_feeds.get(category, self.news_feeds["general"]):
|
| 142 |
+
try:
|
| 143 |
+
feed = feedparser.parse(feed_url)
|
| 144 |
+
for entry in feed.entries[:5]: # Top 5 entries
|
| 145 |
+
title = entry.get('title', '')
|
| 146 |
+
summary = entry.get('summary', '')
|
| 147 |
+
|
| 148 |
+
# Check if entry matches query
|
| 149 |
+
entry_text = f"{title} {summary}".lower()
|
| 150 |
+
if any(word in entry_text for word in query_words) or len(query_words) < 2:
|
| 151 |
+
content.append(f"{title}: {summary}")
|
| 152 |
+
|
| 153 |
+
if len(content) >= 3: # Limit to 3 results
|
| 154 |
+
break
|
| 155 |
+
except Exception as e:
|
| 156 |
+
print(f"Error parsing feed {feed_url}: {e}")
|
| 157 |
+
continue
|
| 158 |
+
|
| 159 |
+
if content:
|
| 160 |
+
break
|
| 161 |
+
|
| 162 |
except Exception as e:
|
| 163 |
+
print(f"RSS feed error: {e}")
|
| 164 |
|
| 165 |
+
return content
|
| 166 |
|
| 167 |
+
def _search_wikipedia(self, query):
|
| 168 |
+
"""Search Wikipedia for factual information"""
|
| 169 |
try:
|
| 170 |
+
# Clean query for Wikipedia
|
| 171 |
+
clean_query = re.sub(r'(what is|who is|explain|definition of)', '', query, flags=re.IGNORECASE).strip()
|
| 172 |
+
clean_query = clean_query.replace('?', '').strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
if not clean_query:
|
| 175 |
+
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
+
# Wikipedia API (completely free)
|
| 178 |
+
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{urllib.parse.quote(clean_query)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
+
response = requests.get(
|
| 181 |
+
url,
|
| 182 |
+
headers={'User-Agent': self.user_agent},
|
| 183 |
+
timeout=self.search_timeout
|
| 184 |
+
)
|
| 185 |
|
| 186 |
+
if response.status_code == 200:
|
| 187 |
+
data = response.json()
|
| 188 |
+
extract = data.get('extract', '')
|
| 189 |
+
if extract:
|
| 190 |
+
return f"According to Wikipedia: {extract}"
|
| 191 |
+
|
| 192 |
except Exception as e:
|
| 193 |
+
print(f"Wikipedia search error: {e}")
|
| 194 |
+
|
| 195 |
+
return ""
|
| 196 |
|
| 197 |
+
def _search_duckduckgo(self, query):
|
| 198 |
+
"""Search DuckDuckGo for general information"""
|
| 199 |
try:
|
| 200 |
url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
|
| 201 |
headers = {
|
| 202 |
+
'User-Agent': self.user_agent,
|
| 203 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
|
|
| 204 |
}
|
| 205 |
|
| 206 |
response = requests.get(url, headers=headers, timeout=self.search_timeout)
|
| 207 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 208 |
|
| 209 |
results = []
|
| 210 |
+
for result in soup.find_all('div', class_='result')[:3]:
|
| 211 |
title_elem = result.find('a', class_='result__a')
|
| 212 |
snippet_elem = result.find('a', class_='result__snippet')
|
| 213 |
|
| 214 |
if title_elem and snippet_elem:
|
| 215 |
+
title = title_elem.get_text().strip()
|
| 216 |
+
snippet = snippet_elem.get_text().strip()
|
| 217 |
+
results.append(f"{title}: {snippet}")
|
|
|
|
|
|
|
| 218 |
|
| 219 |
+
return " ".join(results) if results else ""
|
| 220 |
|
| 221 |
except Exception as e:
|
| 222 |
+
print(f"DuckDuckGo search error: {e}")
|
| 223 |
+
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
+
def _get_weather_info(self, query):
|
| 226 |
+
"""Get weather information from free sources"""
|
| 227 |
+
try:
|
| 228 |
+
# Extract location from query
|
| 229 |
+
location = self._extract_location(query)
|
| 230 |
+
if not location:
|
| 231 |
+
location = "New York" # Default location
|
| 232 |
|
| 233 |
+
# Use free weather API
|
| 234 |
+
url = f"http://wttr.in/{urllib.parse.quote(location)}?format=%C+%t+%w+%h"
|
| 235 |
+
|
| 236 |
+
response = requests.get(url, timeout=self.search_timeout)
|
| 237 |
+
if response.status_code == 200:
|
| 238 |
+
weather_data = response.text.strip()
|
| 239 |
+
return f"Weather in {location}: {weather_data}"
|
| 240 |
+
|
| 241 |
+
except Exception as e:
|
| 242 |
+
print(f"Weather error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
+
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
+
def _extract_location(self, text):
|
| 247 |
+
"""Extract location from text"""
|
|
|
|
| 248 |
locations = {
|
| 249 |
'new york', 'london', 'paris', 'tokyo', 'berlin', 'sydney', 'toronto',
|
| 250 |
'mumbai', 'beijing', 'moscow', 'dubai', 'rome', 'madrid', 'amsterdam',
|
|
|
|
| 254 |
text_lower = text.lower()
|
| 255 |
for location in locations:
|
| 256 |
if location in text_lower:
|
| 257 |
+
return location
|
| 258 |
+
return ""
|
| 259 |
+
|
| 260 |
+
def _create_web_answer(self, user_input, web_content, sources):
|
| 261 |
+
"""Create answer using web content"""
|
| 262 |
+
if not web_content or not web_content.strip():
|
| 263 |
+
return "I searched but couldn't find current information about that. Could you try rephrasing your question?"
|
| 264 |
+
|
| 265 |
+
# Clean and format the content
|
| 266 |
+
sentences = re.split(r'[.!?]+', web_content)
|
| 267 |
+
meaningful_sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
|
| 268 |
+
|
| 269 |
+
if not meaningful_sentences:
|
| 270 |
+
return "I found some information but couldn't extract a clear answer. Try asking more specifically."
|
| 271 |
+
|
| 272 |
+
# Use the most relevant sentences
|
| 273 |
+
answer_sentences = meaningful_sentences[:3]
|
| 274 |
+
answer = ". ".join(answer_sentences)
|
| 275 |
+
|
| 276 |
+
# Ensure the answer ends with proper punctuation
|
| 277 |
+
if not answer.endswith(('.', '!', '?')):
|
| 278 |
+
answer += "."
|
| 279 |
+
|
| 280 |
+
# Add source attribution if available
|
| 281 |
+
if sources:
|
| 282 |
+
source_text = ", ".join(sources)
|
| 283 |
+
answer += f"\n\nSources: {source_text}"
|
| 284 |
+
|
| 285 |
+
return answer
|
| 286 |
+
|
| 287 |
+
def _get_factual_response(self, user_input):
|
| 288 |
+
"""Provide factual responses without web search"""
|
| 289 |
+
input_lower = user_input.lower()
|
| 290 |
+
|
| 291 |
+
# Time and date responses
|
| 292 |
+
if any(word in input_lower for word in ['time', 'clock', 'hour']):
|
| 293 |
+
current_time = datetime.now().strftime("%I:%M %p")
|
| 294 |
+
return f"The current time is {current_time}."
|
| 295 |
+
|
| 296 |
+
if any(word in input_lower for word in ['date', 'today', 'day month']):
|
| 297 |
+
current_date = date.today().strftime("%A, %B %d, %Y")
|
| 298 |
+
return f"Today is {current_date}."
|
| 299 |
+
|
| 300 |
+
if any(word in input_lower for word in ['day of week', 'what day']):
|
| 301 |
+
current_day = date.today().strftime("%A")
|
| 302 |
+
return f"Today is {current_day}."
|
| 303 |
+
|
| 304 |
+
# Math calculations
|
| 305 |
+
math_result = self._solve_math(user_input)
|
| 306 |
+
if math_result:
|
| 307 |
+
return math_result
|
| 308 |
+
|
| 309 |
+
# About the bot
|
| 310 |
+
if any(word in input_lower for word in ['your name', 'who are you']):
|
| 311 |
+
return "I'm Phoenix AI, a completely free chatbot with web search capabilities!"
|
| 312 |
+
|
| 313 |
+
if any(word in input_lower for word in ['what can you do', 'capabilities']):
|
| 314 |
+
return "I can: Answer questions, search the web for free, do math, tell time/date, and learn from our conversations!"
|
| 315 |
+
|
| 316 |
+
if any(word in input_lower for word in ['help', 'what can you help with']):
|
| 317 |
+
return "I can help you with: current news, weather information, factual questions, calculations, and general conversation. I learn from our chats too!"
|
| 318 |
|
| 319 |
return ""
|
| 320 |
|
| 321 |
+
def _solve_math(self, user_input):
|
| 322 |
+
"""Solve mathematical expressions"""
|
| 323 |
+
try:
|
| 324 |
+
# Simple arithmetic
|
| 325 |
+
if re.search(r'\d+\s*[\+\-\*\/]\s*\d+', user_input):
|
| 326 |
+
numbers = re.findall(r'\d+', user_input)
|
| 327 |
+
if len(numbers) >= 2:
|
| 328 |
+
a, b = int(numbers[0]), int(numbers[1])
|
| 329 |
+
|
| 330 |
+
if '+' in user_input:
|
| 331 |
+
return f"{a} + {b} = {a + b}"
|
| 332 |
+
elif '-' in user_input:
|
| 333 |
+
return f"{a} - {b} = {a - b}"
|
| 334 |
+
elif '*' in user_input or '×' in user_input:
|
| 335 |
+
return f"{a} × {b} = {a * b}"
|
| 336 |
+
elif '/' in user_input or '÷' in user_input:
|
| 337 |
+
if b == 0:
|
| 338 |
+
return "I cannot divide by zero - that's mathematically undefined!"
|
| 339 |
+
result = a / b
|
| 340 |
+
return f"{a} ÷ {b} = {result:.2f}"
|
| 341 |
+
|
| 342 |
+
# Square roots
|
| 343 |
+
sqrt_match = re.search(r'sqrt\(?(\d+)\)?', user_input)
|
| 344 |
+
if sqrt_match:
|
| 345 |
+
num = int(sqrt_match.group(1))
|
| 346 |
+
if num < 0:
|
| 347 |
+
return "I cannot calculate square roots of negative numbers!"
|
| 348 |
+
result = math.sqrt(num)
|
| 349 |
+
return f"√{num} = {result:.2f}"
|
| 350 |
+
|
| 351 |
+
# Powers
|
| 352 |
+
power_match = re.search(r'(\d+)\s*\^\s*(\d+)', user_input)
|
| 353 |
+
if power_match:
|
| 354 |
+
base, exponent = int(power_match.group(1)), int(power_match.group(2))
|
| 355 |
+
result = base ** exponent
|
| 356 |
+
return f"{base}^{exponent} = {result}"
|
| 357 |
+
|
| 358 |
+
except Exception as e:
|
| 359 |
+
print(f"Math solving error: {e}")
|
| 360 |
|
| 361 |
+
return ""
|
| 362 |
+
|
| 363 |
+
def _get_learned_response(self, user_input):
|
| 364 |
+
"""Get response from learned patterns or generate contextual response"""
|
| 365 |
+
# Find similar patterns
|
| 366 |
similar_patterns = self._find_similar_patterns(user_input)
|
| 367 |
if similar_patterns:
|
| 368 |
best_pattern = max(similar_patterns, key=lambda x: x[1]['score'])
|
|
|
|
| 370 |
return best_pattern[1]['response']
|
| 371 |
|
| 372 |
# Generate contextual response
|
| 373 |
+
return self._generate_contextual_response(user_input)
|
| 374 |
|
| 375 |
+
def _find_similar_patterns(self, text):
|
| 376 |
+
"""Find similar learned patterns"""
|
| 377 |
+
similar = []
|
| 378 |
+
text_words = set(text.lower().split())
|
| 379 |
+
|
| 380 |
+
for pattern, data in self.learned_patterns.items():
|
| 381 |
+
pattern_words = set(pattern.lower().split())
|
| 382 |
+
common_words = text_words.intersection(pattern_words)
|
| 383 |
+
if common_words:
|
| 384 |
+
similarity = len(common_words) / len(text_words.union(pattern_words))
|
| 385 |
+
if similarity > 0.3:
|
| 386 |
+
similar.append((pattern, data))
|
| 387 |
+
|
| 388 |
+
return similar
|
| 389 |
+
|
| 390 |
+
def _generate_contextual_response(self, user_input):
|
| 391 |
+
"""Generate contextual response when no specific pattern matches"""
|
| 392 |
+
context = self._analyze_input(user_input)
|
| 393 |
+
|
| 394 |
if context['has_question']:
|
| 395 |
responses = [
|
| 396 |
+
"That's an interesting question. Based on what I know, ",
|
| 397 |
+
"I appreciate your question. From my understanding, ",
|
| 398 |
+
"That's a great question. I've been learning that ",
|
| 399 |
]
|
| 400 |
+
base_response = random.choice(responses)
|
| 401 |
|
| 402 |
if context['topics']:
|
| 403 |
+
topic = random.choice(context['topics'])
|
| 404 |
+
return base_response + f"{topic} is quite fascinating. What specifically would you like to know?"
|
| 405 |
else:
|
| 406 |
+
return base_response + "this topic has many interesting aspects. Could you tell me more about what you're curious about?"
|
| 407 |
+
|
| 408 |
+
# Conversational responses for statements
|
| 409 |
+
conversational_responses = [
|
| 410 |
+
"I understand. Tell me more about that.",
|
| 411 |
+
"That's interesting. What are your thoughts on this?",
|
| 412 |
+
"I appreciate you sharing that. How do you feel about it?",
|
| 413 |
+
"That's fascinating. I'm learning from our conversation.",
|
| 414 |
+
"I see. Could you elaborate on that?",
|
| 415 |
]
|
| 416 |
+
|
| 417 |
+
return random.choice(conversational_responses)
|
| 418 |
|
| 419 |
+
def _analyze_input(self, text):
|
| 420 |
+
"""Analyze user input for context"""
|
| 421 |
+
words = text.split()
|
| 422 |
+
|
| 423 |
return {
|
| 424 |
+
'words': words,
|
| 425 |
'topics': self._extract_topics(text),
|
| 426 |
'has_question': '?' in text,
|
| 427 |
+
'sentiment': self._analyze_sentiment(text),
|
| 428 |
+
'word_count': len(words)
|
| 429 |
}
|
| 430 |
|
| 431 |
+
def _extract_topics(self, text):
|
| 432 |
"""Extract topics from text"""
|
| 433 |
topics = []
|
| 434 |
+
text_lower = text.lower()
|
| 435 |
+
|
| 436 |
+
topic_categories = {
|
| 437 |
+
'technology': ['tech', 'computer', 'ai', 'software', 'code', 'internet', 'programming'],
|
| 438 |
+
'science': ['science', 'research', 'discover', 'physics', 'biology', 'chemistry'],
|
| 439 |
+
'sports': ['sports', 'game', 'team', 'player', 'score', 'match', 'tournament'],
|
| 440 |
+
'entertainment': ['movie', 'music', 'show', 'celebrity', 'film', 'song'],
|
| 441 |
+
'health': ['health', 'medical', 'fitness', 'diet', 'exercise', 'nutrition']
|
| 442 |
}
|
| 443 |
|
| 444 |
+
for topic, keywords in topic_categories.items():
|
|
|
|
| 445 |
if any(keyword in text_lower for keyword in keywords):
|
| 446 |
topics.append(topic)
|
| 447 |
|
| 448 |
return topics
|
| 449 |
|
| 450 |
+
def _analyze_sentiment(self, text):
|
| 451 |
"""Basic sentiment analysis"""
|
| 452 |
+
positive_words = ['love', 'like', 'good', 'great', 'awesome', 'happy', 'excited', 'amazing']
|
| 453 |
+
negative_words = ['hate', 'bad', 'terrible', 'awful', 'sad', 'angry', 'upset']
|
| 454 |
|
| 455 |
+
text_lower = text.lower()
|
| 456 |
+
positive_count = sum(1 for word in positive_words if word in text_lower)
|
| 457 |
+
negative_count = sum(1 for word in negative_words if word in text_lower)
|
| 458 |
|
| 459 |
+
if positive_count > negative_count:
|
| 460 |
return "positive"
|
| 461 |
+
elif negative_count > positive_count:
|
| 462 |
return "negative"
|
| 463 |
else:
|
| 464 |
return "neutral"
|
| 465 |
|
| 466 |
+
def _store_interaction(self, user_input, response, reward, sources):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
"""Store interaction in memory"""
|
| 468 |
interaction = {
|
| 469 |
'input': user_input,
|
| 470 |
'response': response,
|
| 471 |
'reward': reward,
|
| 472 |
+
'sources': sources,
|
| 473 |
'timestamp': datetime.now().isoformat()
|
| 474 |
}
|
| 475 |
|
| 476 |
self.conversation_memory.append(interaction)
|
|
|
|
|
|
|
| 477 |
self._update_learning(user_input, response, reward)
|
| 478 |
|
| 479 |
+
def _update_learning(self, user_input, response, reward):
|
| 480 |
"""Update learning from interaction"""
|
| 481 |
+
# Extract key phrases from input for pattern learning
|
| 482 |
+
words = [word for word in user_input.split() if len(word) > 3][:4]
|
| 483 |
if words:
|
| 484 |
pattern = ' '.join(words)
|
| 485 |
|
|
|
|
| 490 |
'count': 1
|
| 491 |
}
|
| 492 |
else:
|
| 493 |
+
old_data = self.learned_patterns[pattern]
|
| 494 |
+
new_score = (old_data['score'] * old_data['count'] + reward) / (old_data['count'] + 1)
|
| 495 |
self.learned_patterns[pattern]['score'] = new_score
|
| 496 |
self.learned_patterns[pattern]['count'] += 1
|
| 497 |
|
| 498 |
+
# Store in response memory
|
| 499 |
+
response_hash = hashlib.md5(response.encode()).hexdigest()[:8]
|
| 500 |
+
if response_hash not in self.response_memory:
|
| 501 |
+
self.response_memory[response_hash] = {
|
| 502 |
+
'response': response,
|
| 503 |
+
'total_score': reward,
|
| 504 |
+
'count': 1,
|
| 505 |
+
'avg_score': reward
|
| 506 |
+
}
|
| 507 |
+
else:
|
| 508 |
+
memory = self.response_memory[response_hash]
|
| 509 |
+
memory['total_score'] += reward
|
| 510 |
+
memory['count'] += 1
|
| 511 |
+
memory['avg_score'] = memory['total_score'] / memory['count']
|
| 512 |
+
|
| 513 |
# Store reward
|
| 514 |
self.reward_history.append(reward)
|
| 515 |
|
| 516 |
+
# Save state periodically
|
| 517 |
if len(self.conversation_memory) % 10 == 0:
|
| 518 |
self.save_state()
|
| 519 |
|
| 520 |
+
def learn_from_feedback(self, user_input, reward):
|
| 521 |
+
"""Learn from explicit user feedback"""
|
| 522 |
if self.conversation_memory:
|
| 523 |
+
# Update the most recent interaction
|
| 524 |
+
recent_interaction = self.conversation_memory[-1]
|
| 525 |
+
recent_interaction['reward'] = reward
|
| 526 |
+
self._update_learning(recent_interaction['input'], recent_interaction['response'], reward)
|
| 527 |
|
| 528 |
+
def get_learning_stats(self):
|
| 529 |
"""Get learning statistics"""
|
| 530 |
recent_rewards = list(self.reward_history)[-10:] or [0.5]
|
| 531 |
|
| 532 |
return {
|
| 533 |
'patterns': len(self.learned_patterns),
|
| 534 |
'memory_size': len(self.conversation_memory),
|
| 535 |
+
'avg_score': float(np.mean(recent_rewards)),
|
| 536 |
'recent_rewards': len([r for r in recent_rewards if r > 0.7])
|
| 537 |
}
|
| 538 |
|
| 539 |
def save_state(self):
|
| 540 |
+
"""Save learning state to file"""
|
| 541 |
try:
|
| 542 |
state = {
|
| 543 |
'learned_patterns': self.learned_patterns,
|
| 544 |
+
'response_memory': self.response_memory,
|
| 545 |
'conversation_memory': list(self.conversation_memory),
|
| 546 |
+
'reward_history': list(self.reward_history),
|
| 547 |
+
'last_saved': datetime.now().isoformat()
|
| 548 |
}
|
| 549 |
with open(self.state_file, 'w') as f:
|
| 550 |
json.dump(state, f, indent=2)
|
| 551 |
except Exception as e:
|
| 552 |
+
print(f"Error saving state: {e}")
|
| 553 |
|
| 554 |
def load_state(self):
|
| 555 |
+
"""Load learning state from file"""
|
| 556 |
try:
|
| 557 |
if os.path.exists(self.state_file):
|
| 558 |
with open(self.state_file, 'r') as f:
|
| 559 |
state = json.load(f)
|
| 560 |
|
| 561 |
self.learned_patterns = state.get('learned_patterns', {})
|
| 562 |
+
self.response_memory = state.get('response_memory', {})
|
| 563 |
+
self.conversation_memory = deque(state.get('conversation_memory', []), maxlen=200)
|
| 564 |
+
self.reward_history = deque(state.get('reward_history', []), maxlen=300)
|
| 565 |
+
print(f"Loaded state with {len(self.learned_patterns)} patterns")
|
| 566 |
+
except Exception as e:
|
| 567 |
+
print(f"Error loading state: {e}")
|