dicksinyass commited on
Commit
1be1f90
·
verified ·
1 Parent(s): 14ad231

Update self_learning_bot.py

Browse files
Files changed (1) hide show
  1. self_learning_bot.py +389 -361
self_learning_bot.py CHANGED
@@ -8,345 +8,243 @@ import math
8
  import hashlib
9
  import requests
10
  from collections import deque
11
- import time
12
- from typing import Dict, List, Any, Tuple
13
  from bs4 import BeautifulSoup
14
  import urllib.parse
 
15
 
16
- class WebEnhancedBot:
17
- def __init__(self, state_file="/tmp/web_bot_state.json"):
18
  self.state_file = state_file
19
- self.conversation_memory = deque(maxlen=150)
20
  self.learned_patterns = {}
21
  self.response_memory = {}
22
- self.reward_history = deque(maxlen=200)
23
- self.web_cache = {}
24
 
25
  # Learning parameters
26
  self.learning_rate = 0.3
27
  self.exploration_rate = 0.1
28
 
29
- # Web search settings
30
- self.search_timeout = 15
31
- self.max_results = 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Load existing state
34
  self.load_state()
35
 
36
- print(f"Web-enhanced bot initialized with {len(self.learned_patterns)} learned patterns")
37
 
38
- def chat_with_web_search(self, user_input: str, use_search: bool = True) -> Tuple[str, bool]:
39
- """Main chat method that actually uses web search to answer questions"""
40
-
41
- # Check if this is a question that needs web search
42
- needs_search = self._should_search_web(user_input) and use_search
43
-
44
- if needs_search:
45
- # Actually search and get real answers
46
- web_content = self._get_web_content(user_input)
47
- if web_content and web_content.get('content'):
48
- response = self._create_web_answer(user_input, web_content)
49
- self._store_interaction(user_input, response, 0.8, web_content)
50
- return response, True
 
 
 
 
 
51
 
52
  # Fallback to learned responses
53
  response = self._get_learned_response(user_input)
54
- self._store_interaction(user_input, response, 0.5, {})
55
- return response, False
56
 
57
- def _should_search_web(self, user_input: str) -> bool:
58
  """Determine if we should search the web for this query"""
59
  input_lower = user_input.lower()
60
 
61
- # Questions that need current information
62
- current_info_indicators = [
63
- 'current', 'recent', 'latest', 'today', 'now', 'breaking', 'news',
64
- 'what happened', 'when did', 'update on', 'new', 'just happened'
65
- ]
66
-
67
- # Factual questions that might need verification
68
- factual_questions = [
69
- 'what is', 'who is', 'where is', 'when was', 'how to', 'why does',
70
- 'explain', 'tell me about', 'information about', 'details about'
71
- ]
72
-
73
- # Specific topics that change frequently
74
- dynamic_topics = [
75
- 'weather', 'temperature', 'forecast', 'stock', 'price', 'crypto',
76
- 'sports', 'game', 'score', 'election', 'politics', 'celebrity'
77
  ]
78
 
79
- # Check if input matches any search criteria
80
- if any(indicator in input_lower for indicator in current_info_indicators):
81
- return True
82
-
83
- if any(question in input_lower for question in factual_questions):
84
- return True
85
-
86
- if any(topic in input_lower for topic in dynamic_topics):
87
- return True
88
-
89
- # Questions with question words
90
- if any(word in input_lower for word in ['what', 'who', 'where', 'when', 'how', 'why']) and '?' in user_input:
91
- return True
92
-
93
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- def _get_web_content(self, query: str) -> Dict[str, Any]:
96
- """Get actual web content for answering questions"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  try:
98
- # Try multiple search methods
99
- search_results = self._search_brave(query) or self._search_duckduckgo(query)
100
-
101
- if search_results and search_results.get('results'):
102
- # Extract actual content from the top result
103
- content = self._extract_meaningful_content(search_results['results'][0])
104
- return {
105
- 'content': content,
106
- 'source': search_results['results'][0].get('url', ''),
107
- 'query': query,
108
- 'results_count': len(search_results['results'])
109
- }
 
 
 
 
 
 
 
 
 
110
  except Exception as e:
111
- print(f"Web search error: {e}")
112
 
113
- return {}
114
 
115
- def _search_brave(self, query: str) -> Dict[str, Any]:
116
- """Search using Brave Search (free tier)"""
117
  try:
118
- # Brave Search API (free for limited use)
119
- url = f"https://api.search.brave.com/res/v1/web/search"
120
- headers = {
121
- "Accept": "application/json",
122
- "X-Subscription-Token": "BSA-Your-Free-Key-Here" # Get free key from brave.com
123
- }
124
- params = {
125
- "q": query,
126
- "count": self.max_results
127
- }
128
 
129
- response = requests.get(url, headers=headers, params=params, timeout=self.search_timeout)
130
- if response.status_code == 200:
131
- data = response.json()
132
- results = []
133
- for web_result in data.get('web', {}).get('results', [])[:self.max_results]:
134
- results.append({
135
- 'title': web_result.get('title', ''),
136
- 'url': web_result.get('url', ''),
137
- 'description': web_result.get('description', '')
138
- })
139
- return {'results': results}
140
- except:
141
- pass
142
- return {}
143
-
144
- def _search_duckduckgo(self, query: str) -> Dict[str, Any]:
145
- """Fallback to DuckDuckGo instant answers and web results"""
146
- try:
147
- # DuckDuckGo Instant Answer API
148
- ia_url = f"https://api.duckduckgo.com/"
149
- params = {
150
- "q": query,
151
- "format": "json",
152
- "no_html": "1",
153
- "skip_disambig": "1"
154
- }
155
 
156
- response = requests.get(ia_url, params=params, timeout=self.search_timeout)
157
- if response.status_code == 200:
158
- data = response.json()
159
-
160
- # Check for instant answer
161
- if data.get('AbstractText'):
162
- return {
163
- 'results': [{
164
- 'title': data.get('Heading', 'Instant Answer'),
165
- 'url': data.get('AbstractURL', ''),
166
- 'description': data.get('AbstractText', '')
167
- }]
168
- }
169
-
170
- # Check for related topics
171
- if data.get('RelatedTopics'):
172
- for topic in data['RelatedTopics'][:self.max_results]:
173
- if topic.get('Text'):
174
- return {
175
- 'results': [{
176
- 'title': topic.get('FirstURL', '').split('/')[-1].replace('_', ' '),
177
- 'url': topic.get('FirstURL', ''),
178
- 'description': topic.get('Text', '')
179
- }]
180
- }
181
 
182
- # Fallback to HTML scraping
183
- return self._scrape_duckduckgo_html(query)
 
 
 
184
 
 
 
 
 
 
 
185
  except Exception as e:
186
- print(f"DuckDuckGo search error: {e}")
187
- return {}
 
188
 
189
- def _scrape_duckduckgo_html(self, query: str) -> Dict[str, Any]:
190
- """Scrape DuckDuckGo HTML results as final fallback"""
191
  try:
192
  url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
193
  headers = {
194
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
195
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
196
- 'Accept-Language': 'en-US,en;q=0.5',
197
  }
198
 
199
  response = requests.get(url, headers=headers, timeout=self.search_timeout)
200
  soup = BeautifulSoup(response.text, 'html.parser')
201
 
202
  results = []
203
- for result in soup.find_all('div', class_='result')[:self.max_results]:
204
  title_elem = result.find('a', class_='result__a')
205
  snippet_elem = result.find('a', class_='result__snippet')
206
 
207
  if title_elem and snippet_elem:
208
- results.append({
209
- 'title': title_elem.get_text().strip(),
210
- 'url': title_elem.get('href', ''),
211
- 'description': snippet_elem.get_text().strip()
212
- })
213
 
214
- return {'results': results} if results else {}
215
 
216
  except Exception as e:
217
- print(f"DDG HTML scraping error: {e}")
218
- return {}
219
-
220
- def _extract_meaningful_content(self, result: Dict) -> str:
221
- """Extract meaningful content from search result"""
222
- title = result.get('title', '')
223
- description = result.get('description', '')
224
- url = result.get('url', '')
225
-
226
- # Combine title and description for context
227
- content_parts = []
228
- if title:
229
- content_parts.append(title)
230
- if description:
231
- content_parts.append(description)
232
-
233
- full_content = ". ".join(content_parts)
234
-
235
- # Clean up the content
236
- full_content = re.sub(r'\[\d+\]', '', full_content) # Remove citation numbers
237
- full_content = re.sub(r'\s+', ' ', full_content) # Normalize whitespace
238
-
239
- return full_content.strip()
240
-
241
- def _create_web_answer(self, user_input: str, web_content: Dict) -> str:
242
- """Create an actual answer using web content"""
243
- content = web_content.get('content', '')
244
- source = web_content.get('source', '')
245
-
246
- if not content:
247
- return "I searched but couldn't find specific information about that. Could you try rephrasing your question?"
248
-
249
- # Analyze the type of question and create appropriate response
250
- question_type = self._analyze_question_type(user_input)
251
-
252
- if question_type == "factual":
253
- return self._format_factual_answer(user_input, content, source)
254
- elif question_type == "current_events":
255
- return self._format_current_events_answer(user_input, content, source)
256
- elif question_type == "how_to":
257
- return self._format_how_to_answer(user_input, content, source)
258
- elif question_type == "weather":
259
- return self._format_weather_answer(user_input, content, source)
260
- else:
261
- return self._format_general_answer(user_input, content, source)
262
-
263
- def _analyze_question_type(self, user_input: str) -> str:
264
- """Analyze what type of question this is"""
265
- input_lower = user_input.lower()
266
-
267
- if any(word in input_lower for word in ['weather', 'temperature', 'forecast']):
268
- return "weather"
269
- elif any(word in input_lower for word in ['how to', 'how do i', 'tutorial', 'guide']):
270
- return "how_to"
271
- elif any(word in input_lower for word in ['news', 'current', 'recent', 'breaking', 'today']):
272
- return "current_events"
273
- elif any(word in input_lower for word in ['what is', 'who is', 'where is', 'when was']):
274
- return "factual"
275
- else:
276
- return "general"
277
-
278
- def _format_factual_answer(self, question: str, content: str, source: str) -> str:
279
- """Format factual answers"""
280
- # Extract the most relevant sentence
281
- sentences = content.split('. ')
282
- relevant_sentence = sentences[0] if sentences else content
283
-
284
- answer = f"**According to web sources:** {relevant_sentence}"
285
-
286
- if len(sentences) > 1:
287
- additional_info = '. '.join(sentences[1:3])
288
- answer += f" {additional_info}."
289
-
290
- return answer
291
 
292
- def _format_current_events_answer(self, question: str, content: str, source: str) -> str:
293
- """Format current events answers"""
294
- sentences = content.split('. ')
295
-
296
- answer = f"**Latest information:** {content[:400]}"
297
- if len(content) > 400:
298
- answer += "..."
299
 
300
- return answer
301
-
302
- def _format_how_to_answer(self, question: str, content: str, source: str) -> str:
303
- """Format how-to answers"""
304
- # Look for instructional language
305
- instructions = []
306
- sentences = content.split('. ')
307
-
308
- for sentence in sentences[:4]: # Take first 4 sentences
309
- if any(word in sentence.lower() for word in ['step', 'first', 'then', 'next', 'after']):
310
- instructions.append(sentence)
311
-
312
- if instructions:
313
- answer = "**Here's what I found:**\n" + "\n".join(f"• {inst}" for inst in instructions[:3])
314
- else:
315
- answer = f"**Based on available information:** {sentences[0] if sentences else content}"
316
-
317
- return answer
318
-
319
- def _format_weather_answer(self, question: str, content: str, source: str) -> str:
320
- """Format weather-related answers"""
321
- # Extract location from question
322
- location = self._extract_location(question)
323
-
324
- # Look for temperature and conditions in content
325
- temp_match = re.search(r'(\d+)\s*°?[CF]', content)
326
- condition_match = re.search(r'(sunny|rain|cloud|snow|clear|storm)', content.lower())
327
-
328
- answer_parts = []
329
- if location:
330
- answer_parts.append(f"**Weather for {location}:**")
331
-
332
- if temp_match:
333
- answer_parts.append(f"Temperature around {temp_match.group(1)}°F")
334
-
335
- if condition_match:
336
- answer_parts.append(f"Conditions: {condition_match.group(1).title()}")
337
 
338
- if answer_parts:
339
- return " ".join(answer_parts) + f"\n*Source: {source}*" if source else ""
340
- else:
341
- return f"**Weather information:** {content[:300]}"
342
-
343
- def _format_general_answer(self, question: str, content: str, source: str) -> str:
344
- """Format general answers"""
345
- return f"**I found this information:** {content[:500]}" + ("..." if len(content) > 500 else "")
346
 
347
- def _extract_location(self, text: str) -> str:
348
- """Extract location from text (simple version)"""
349
- # Common city/country names
350
  locations = {
351
  'new york', 'london', 'paris', 'tokyo', 'berlin', 'sydney', 'toronto',
352
  'mumbai', 'beijing', 'moscow', 'dubai', 'rome', 'madrid', 'amsterdam',
@@ -356,15 +254,115 @@ class WebEnhancedBot:
356
  text_lower = text.lower()
357
  for location in locations:
358
  if location in text_lower:
359
- return location.title()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
361
  return ""
362
 
363
- def _get_learned_response(self, user_input: str) -> str:
364
- """Get response from learned patterns"""
365
- context = self._analyze_input(user_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- # Try learned patterns first
 
 
 
 
368
  similar_patterns = self._find_similar_patterns(user_input)
369
  if similar_patterns:
370
  best_pattern = max(similar_patterns, key=lambda x: x[1]['score'])
@@ -372,106 +370,116 @@ class WebEnhancedBot:
372
  return best_pattern[1]['response']
373
 
374
  # Generate contextual response
375
- return self._generate_contextual_response(user_input, context)
376
 
377
- def _generate_contextual_response(self, user_input: str, context: dict) -> str:
378
- """Generate contextual response when no web results"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  if context['has_question']:
380
  responses = [
381
- "That's an interesting question. Based on my knowledge, ",
382
- "I understand you're asking about ",
383
- "That's a great question. From what I've learned, "
384
  ]
385
- base = random.choice(responses)
386
 
387
  if context['topics']:
388
- return base + f"{random.choice(context['topics'])}. Could you tell me more about what specifically interests you?"
 
389
  else:
390
- return base + "this topic. I'm constantly learning from our conversations."
391
-
392
- # Conversational responses
393
- conversational = [
394
- "I appreciate you sharing that. What are your thoughts on this?",
395
- "That's interesting. Tell me more about your perspective.",
396
- "I understand. How does that relate to your experiences?",
397
- "That's fascinating. I'm learning from our conversation."
 
398
  ]
399
- return random.choice(conversational)
 
400
 
401
- def _analyze_input(self, text: str) -> dict:
402
- """Analyze user input"""
 
 
403
  return {
404
- 'words': text.split(),
405
  'topics': self._extract_topics(text),
406
  'has_question': '?' in text,
407
- 'sentiment': self._analyze_sentiment(text)
 
408
  }
409
 
410
- def _extract_topics(self, text: str) -> list:
411
  """Extract topics from text"""
412
  topics = []
413
- topic_keywords = {
414
- 'technology': ['computer', 'tech', 'software', 'ai', 'program', 'code'],
415
- 'science': ['space', 'physics', 'biology', 'research', 'discover'],
416
- 'sports': ['game', 'sports', 'team', 'player', 'score'],
417
- 'entertainment': ['movie', 'music', 'show', 'celebrity'],
418
- 'health': ['health', 'medical', 'fitness', 'diet']
 
 
419
  }
420
 
421
- text_lower = text.lower()
422
- for topic, keywords in topic_keywords.items():
423
  if any(keyword in text_lower for keyword in keywords):
424
  topics.append(topic)
425
 
426
  return topics
427
 
428
- def _analyze_sentiment(self, text: str) -> str:
429
  """Basic sentiment analysis"""
430
- positive = ['love', 'like', 'good', 'great', 'awesome', 'happy']
431
- negative = ['hate', 'bad', 'terrible', 'awful', 'sad', 'angry']
432
 
433
- pos_count = sum(1 for word in positive if word in text)
434
- neg_count = sum(1 for word in negative if word in text)
 
435
 
436
- if pos_count > neg_count:
437
  return "positive"
438
- elif neg_count > pos_count:
439
  return "negative"
440
  else:
441
  return "neutral"
442
 
443
- def _find_similar_patterns(self, text: str) -> list:
444
- """Find similar learned patterns"""
445
- similar = []
446
- text_words = set(text.split())
447
-
448
- for pattern, data in self.learned_patterns.items():
449
- pattern_words = set(pattern.split())
450
- similarity = len(text_words & pattern_words) / len(text_words | pattern_words)
451
- if similarity > 0.3:
452
- similar.append((pattern, data))
453
-
454
- return similar
455
-
456
- def _store_interaction(self, user_input: str, response: str, reward: float, web_context: dict):
457
  """Store interaction in memory"""
458
  interaction = {
459
  'input': user_input,
460
  'response': response,
461
  'reward': reward,
462
- 'web_context': web_context,
463
  'timestamp': datetime.now().isoformat()
464
  }
465
 
466
  self.conversation_memory.append(interaction)
467
-
468
- # Learn from this interaction
469
  self._update_learning(user_input, response, reward)
470
 
471
- def _update_learning(self, user_input: str, response: str, reward: float):
472
  """Update learning from interaction"""
473
- # Extract key phrases for pattern learning
474
- words = [w for w in user_input.split() if len(w) > 3][:4]
475
  if words:
476
  pattern = ' '.join(words)
477
 
@@ -482,58 +490,78 @@ class WebEnhancedBot:
482
  'count': 1
483
  }
484
  else:
485
- old = self.learned_patterns[pattern]
486
- new_score = (old['score'] * old['count'] + reward) / (old['count'] + 1)
487
  self.learned_patterns[pattern]['score'] = new_score
488
  self.learned_patterns[pattern]['count'] += 1
489
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  # Store reward
491
  self.reward_history.append(reward)
492
 
493
- # Periodic save
494
  if len(self.conversation_memory) % 10 == 0:
495
  self.save_state()
496
 
497
- def learn_from_feedback(self, user_input: str, reward: float):
498
- """Learn from explicit feedback"""
499
  if self.conversation_memory:
500
- recent = self.conversation_memory[-1]
501
- recent['reward'] = reward
502
- self._update_learning(recent['input'], recent['response'], reward)
 
503
 
504
- def get_learning_stats(self) -> dict:
505
  """Get learning statistics"""
506
  recent_rewards = list(self.reward_history)[-10:] or [0.5]
507
 
508
  return {
509
  'patterns': len(self.learned_patterns),
510
  'memory_size': len(self.conversation_memory),
511
- 'avg_score': np.mean(recent_rewards),
512
  'recent_rewards': len([r for r in recent_rewards if r > 0.7])
513
  }
514
 
515
  def save_state(self):
516
- """Save learning state"""
517
  try:
518
  state = {
519
  'learned_patterns': self.learned_patterns,
 
520
  'conversation_memory': list(self.conversation_memory),
521
- 'reward_history': list(self.reward_history)
 
522
  }
523
  with open(self.state_file, 'w') as f:
524
  json.dump(state, f, indent=2)
525
  except Exception as e:
526
- print(f"Save error: {e}")
527
 
528
  def load_state(self):
529
- """Load learning state"""
530
  try:
531
  if os.path.exists(self.state_file):
532
  with open(self.state_file, 'r') as f:
533
  state = json.load(f)
534
 
535
  self.learned_patterns = state.get('learned_patterns', {})
536
- self.conversation_memory = deque(state.get('conversation_memory', []), maxlen=150)
537
- self.reward_history = deque(state.get('reward_history', []), maxlen=200)
538
- except:
539
- pass
 
 
 
8
  import hashlib
9
  import requests
10
  from collections import deque
 
 
11
  from bs4 import BeautifulSoup
12
  import urllib.parse
13
+ import feedparser
14
 
15
+ class FreeWebBot:
16
+ def __init__(self, state_file="/tmp/free_bot_state.json"):
17
  self.state_file = state_file
18
+ self.conversation_memory = deque(maxlen=200)
19
  self.learned_patterns = {}
20
  self.response_memory = {}
21
+ self.reward_history = deque(maxlen=300)
 
22
 
23
  # Learning parameters
24
  self.learning_rate = 0.3
25
  self.exploration_rate = 0.1
26
 
27
+ # Web settings
28
+ self.search_timeout = 10
29
+ self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
30
+
31
+ # Free news sources (RSS feeds)
32
+ self.news_feeds = {
33
+ "general": [
34
+ "https://feeds.bbci.co.uk/news/rss.xml",
35
+ "https://rss.cnn.com/rss/edition.rss",
36
+ "https://feeds.reuters.com/reuters/topNews",
37
+ ],
38
+ "technology": [
39
+ "https://feeds.arstechnica.com/arstechnica/index",
40
+ "https://techcrunch.com/feed/",
41
+ ],
42
+ "sports": [
43
+ "https://feeds.espn.com/espn/rss/news",
44
+ ]
45
+ }
46
 
47
  # Load existing state
48
  self.load_state()
49
 
50
+ print(f"Free web bot initialized with {len(self.learned_patterns)} learned patterns")
51
 
52
+ def chat(self, user_input, use_web_search=True):
53
+ """Main chat method - returns (response, search_used, sources)"""
54
+ user_input = user_input.strip()
55
+ if not user_input:
56
+ return "Please enter a message.", False, []
57
+
58
+ # First, try factual responses
59
+ factual_response = self._get_factual_response(user_input)
60
+ if factual_response:
61
+ return factual_response, False, []
62
+
63
+ # Try free web search for current information
64
+ if use_web_search and self._should_search_web(user_input):
65
+ web_content, sources = self._free_web_search(user_input)
66
+ if web_content and web_content.strip():
67
+ response = self._create_web_answer(user_input, web_content, sources)
68
+ self._store_interaction(user_input, response, 0.8, sources)
69
+ return response, True, sources
70
 
71
  # Fallback to learned responses
72
  response = self._get_learned_response(user_input)
73
+ self._store_interaction(user_input, response, 0.5, [])
74
+ return response, False, []
75
 
76
+ def _should_search_web(self, user_input):
77
  """Determine if we should search the web for this query"""
78
  input_lower = user_input.lower()
79
 
80
+ search_triggers = [
81
+ 'news', 'current', 'latest', 'today', 'recent', 'update',
82
+ 'weather', 'forecast', 'temperature',
83
+ 'sports', 'score', 'game', 'match',
84
+ 'stock', 'crypto', 'bitcoin', 'price',
85
+ 'how to', 'tutorial', 'guide', 'explain',
86
+ 'what is', 'who is', 'where is', 'when was',
87
+ 'breaking', 'headlines'
 
 
 
 
 
 
 
 
88
  ]
89
 
90
+ return any(trigger in input_lower for trigger in search_triggers)
91
+
92
+ def _free_web_search(self, query):
93
+ """Perform free web search using multiple methods"""
94
+ sources = []
95
+ all_content = []
96
+
97
+ # Method 1: RSS Feeds for news/current events
98
+ if any(topic in query.lower() for topic in ['news', 'current', 'latest', 'today', 'breaking']):
99
+ feed_content = self._search_rss_feeds(query)
100
+ all_content.extend(feed_content)
101
+ if feed_content:
102
+ sources.append("News Feeds")
103
+
104
+ # Method 2: Wikipedia for factual information
105
+ if any(word in query.lower() for word in ['what is', 'who is', 'explain', 'definition']):
106
+ wiki_content = self._search_wikipedia(query)
107
+ if wiki_content:
108
+ all_content.append(wiki_content)
109
+ sources.append("Wikipedia")
110
+
111
+ # Method 3: DuckDuckGo for general search
112
+ ddg_content = self._search_duckduckgo(query)
113
+ if ddg_content:
114
+ all_content.append(ddg_content)
115
+ sources.append("Web Search")
116
+
117
+ # Method 4: Weather information
118
+ if any(word in query.lower() for word in ['weather', 'temperature', 'forecast']):
119
+ weather_content = self._get_weather_info(query)
120
+ if weather_content:
121
+ all_content.append(weather_content)
122
+ sources.append("Weather Service")
123
 
124
+ # Combine all content
125
+ combined_content = " ".join(all_content)
126
+ return combined_content, sources
127
+
128
+ def _search_rss_feeds(self, query):
129
+ """Search RSS feeds for current information"""
130
+ content = []
131
+ query_words = query.lower().split()
132
+
133
+ # Determine feed category based on query
134
+ category = "general"
135
+ if any(word in query.lower() for word in ['tech', 'technology', 'ai', 'computer', 'software']):
136
+ category = "technology"
137
+ elif any(word in query.lower() for word in ['sports', 'game', 'score', 'match', 'team']):
138
+ category = "sports"
139
+
140
  try:
141
+ for feed_url in self.news_feeds.get(category, self.news_feeds["general"]):
142
+ try:
143
+ feed = feedparser.parse(feed_url)
144
+ for entry in feed.entries[:5]: # Top 5 entries
145
+ title = entry.get('title', '')
146
+ summary = entry.get('summary', '')
147
+
148
+ # Check if entry matches query
149
+ entry_text = f"{title} {summary}".lower()
150
+ if any(word in entry_text for word in query_words) or len(query_words) < 2:
151
+ content.append(f"{title}: {summary}")
152
+
153
+ if len(content) >= 3: # Limit to 3 results
154
+ break
155
+ except Exception as e:
156
+ print(f"Error parsing feed {feed_url}: {e}")
157
+ continue
158
+
159
+ if content:
160
+ break
161
+
162
  except Exception as e:
163
+ print(f"RSS feed error: {e}")
164
 
165
+ return content
166
 
167
+ def _search_wikipedia(self, query):
168
+ """Search Wikipedia for factual information"""
169
  try:
170
+ # Clean query for Wikipedia
171
+ clean_query = re.sub(r'(what is|who is|explain|definition of)', '', query, flags=re.IGNORECASE).strip()
172
+ clean_query = clean_query.replace('?', '').strip()
 
 
 
 
 
 
 
173
 
174
+ if not clean_query:
175
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
+ # Wikipedia API (completely free)
178
+ url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{urllib.parse.quote(clean_query)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
+ response = requests.get(
181
+ url,
182
+ headers={'User-Agent': self.user_agent},
183
+ timeout=self.search_timeout
184
+ )
185
 
186
+ if response.status_code == 200:
187
+ data = response.json()
188
+ extract = data.get('extract', '')
189
+ if extract:
190
+ return f"According to Wikipedia: {extract}"
191
+
192
  except Exception as e:
193
+ print(f"Wikipedia search error: {e}")
194
+
195
+ return ""
196
 
197
+ def _search_duckduckgo(self, query):
198
+ """Search DuckDuckGo for general information"""
199
  try:
200
  url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
201
  headers = {
202
+ 'User-Agent': self.user_agent,
203
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 
204
  }
205
 
206
  response = requests.get(url, headers=headers, timeout=self.search_timeout)
207
  soup = BeautifulSoup(response.text, 'html.parser')
208
 
209
  results = []
210
+ for result in soup.find_all('div', class_='result')[:3]:
211
  title_elem = result.find('a', class_='result__a')
212
  snippet_elem = result.find('a', class_='result__snippet')
213
 
214
  if title_elem and snippet_elem:
215
+ title = title_elem.get_text().strip()
216
+ snippet = snippet_elem.get_text().strip()
217
+ results.append(f"{title}: {snippet}")
 
 
218
 
219
+ return " ".join(results) if results else ""
220
 
221
  except Exception as e:
222
+ print(f"DuckDuckGo search error: {e}")
223
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
+ def _get_weather_info(self, query):
226
+ """Get weather information from free sources"""
227
+ try:
228
+ # Extract location from query
229
+ location = self._extract_location(query)
230
+ if not location:
231
+ location = "New York" # Default location
232
 
233
+ # Use free weather API
234
+ url = f"http://wttr.in/{urllib.parse.quote(location)}?format=%C+%t+%w+%h"
235
+
236
+ response = requests.get(url, timeout=self.search_timeout)
237
+ if response.status_code == 200:
238
+ weather_data = response.text.strip()
239
+ return f"Weather in {location}: {weather_data}"
240
+
241
+ except Exception as e:
242
+ print(f"Weather error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
+ return ""
 
 
 
 
 
 
 
245
 
246
+ def _extract_location(self, text):
247
+ """Extract location from text"""
 
248
  locations = {
249
  'new york', 'london', 'paris', 'tokyo', 'berlin', 'sydney', 'toronto',
250
  'mumbai', 'beijing', 'moscow', 'dubai', 'rome', 'madrid', 'amsterdam',
 
254
  text_lower = text.lower()
255
  for location in locations:
256
  if location in text_lower:
257
+ return location
258
+ return ""
259
+
260
+ def _create_web_answer(self, user_input, web_content, sources):
261
+ """Create answer using web content"""
262
+ if not web_content or not web_content.strip():
263
+ return "I searched but couldn't find current information about that. Could you try rephrasing your question?"
264
+
265
+ # Clean and format the content
266
+ sentences = re.split(r'[.!?]+', web_content)
267
+ meaningful_sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
268
+
269
+ if not meaningful_sentences:
270
+ return "I found some information but couldn't extract a clear answer. Try asking more specifically."
271
+
272
+ # Use the most relevant sentences
273
+ answer_sentences = meaningful_sentences[:3]
274
+ answer = ". ".join(answer_sentences)
275
+
276
+ # Ensure the answer ends with proper punctuation
277
+ if not answer.endswith(('.', '!', '?')):
278
+ answer += "."
279
+
280
+ # Add source attribution if available
281
+ if sources:
282
+ source_text = ", ".join(sources)
283
+ answer += f"\n\nSources: {source_text}"
284
+
285
+ return answer
286
+
287
+ def _get_factual_response(self, user_input):
288
+ """Provide factual responses without web search"""
289
+ input_lower = user_input.lower()
290
+
291
+ # Time and date responses
292
+ if any(word in input_lower for word in ['time', 'clock', 'hour']):
293
+ current_time = datetime.now().strftime("%I:%M %p")
294
+ return f"The current time is {current_time}."
295
+
296
+ if any(word in input_lower for word in ['date', 'today', 'day month']):
297
+ current_date = date.today().strftime("%A, %B %d, %Y")
298
+ return f"Today is {current_date}."
299
+
300
+ if any(word in input_lower for word in ['day of week', 'what day']):
301
+ current_day = date.today().strftime("%A")
302
+ return f"Today is {current_day}."
303
+
304
+ # Math calculations
305
+ math_result = self._solve_math(user_input)
306
+ if math_result:
307
+ return math_result
308
+
309
+ # About the bot
310
+ if any(word in input_lower for word in ['your name', 'who are you']):
311
+ return "I'm Phoenix AI, a completely free chatbot with web search capabilities!"
312
+
313
+ if any(word in input_lower for word in ['what can you do', 'capabilities']):
314
+ return "I can: Answer questions, search the web for free, do math, tell time/date, and learn from our conversations!"
315
+
316
+ if any(word in input_lower for word in ['help', 'what can you help with']):
317
+ return "I can help you with: current news, weather information, factual questions, calculations, and general conversation. I learn from our chats too!"
318
 
319
  return ""
320
 
321
+ def _solve_math(self, user_input):
322
+ """Solve mathematical expressions"""
323
+ try:
324
+ # Simple arithmetic
325
+ if re.search(r'\d+\s*[\+\-\*\/]\s*\d+', user_input):
326
+ numbers = re.findall(r'\d+', user_input)
327
+ if len(numbers) >= 2:
328
+ a, b = int(numbers[0]), int(numbers[1])
329
+
330
+ if '+' in user_input:
331
+ return f"{a} + {b} = {a + b}"
332
+ elif '-' in user_input:
333
+ return f"{a} - {b} = {a - b}"
334
+ elif '*' in user_input or '×' in user_input:
335
+ return f"{a} × {b} = {a * b}"
336
+ elif '/' in user_input or '÷' in user_input:
337
+ if b == 0:
338
+ return "I cannot divide by zero - that's mathematically undefined!"
339
+ result = a / b
340
+ return f"{a} ÷ {b} = {result:.2f}"
341
+
342
+ # Square roots
343
+ sqrt_match = re.search(r'sqrt\(?(\d+)\)?', user_input)
344
+ if sqrt_match:
345
+ num = int(sqrt_match.group(1))
346
+ if num < 0:
347
+ return "I cannot calculate square roots of negative numbers!"
348
+ result = math.sqrt(num)
349
+ return f"√{num} = {result:.2f}"
350
+
351
+ # Powers
352
+ power_match = re.search(r'(\d+)\s*\^\s*(\d+)', user_input)
353
+ if power_match:
354
+ base, exponent = int(power_match.group(1)), int(power_match.group(2))
355
+ result = base ** exponent
356
+ return f"{base}^{exponent} = {result}"
357
+
358
+ except Exception as e:
359
+ print(f"Math solving error: {e}")
360
 
361
+ return ""
362
+
363
+ def _get_learned_response(self, user_input):
364
+ """Get response from learned patterns or generate contextual response"""
365
+ # Find similar patterns
366
  similar_patterns = self._find_similar_patterns(user_input)
367
  if similar_patterns:
368
  best_pattern = max(similar_patterns, key=lambda x: x[1]['score'])
 
370
  return best_pattern[1]['response']
371
 
372
  # Generate contextual response
373
+ return self._generate_contextual_response(user_input)
374
 
375
+ def _find_similar_patterns(self, text):
376
+ """Find similar learned patterns"""
377
+ similar = []
378
+ text_words = set(text.lower().split())
379
+
380
+ for pattern, data in self.learned_patterns.items():
381
+ pattern_words = set(pattern.lower().split())
382
+ common_words = text_words.intersection(pattern_words)
383
+ if common_words:
384
+ similarity = len(common_words) / len(text_words.union(pattern_words))
385
+ if similarity > 0.3:
386
+ similar.append((pattern, data))
387
+
388
+ return similar
389
+
390
+ def _generate_contextual_response(self, user_input):
391
+ """Generate contextual response when no specific pattern matches"""
392
+ context = self._analyze_input(user_input)
393
+
394
  if context['has_question']:
395
  responses = [
396
+ "That's an interesting question. Based on what I know, ",
397
+ "I appreciate your question. From my understanding, ",
398
+ "That's a great question. I've been learning that ",
399
  ]
400
+ base_response = random.choice(responses)
401
 
402
  if context['topics']:
403
+ topic = random.choice(context['topics'])
404
+ return base_response + f"{topic} is quite fascinating. What specifically would you like to know?"
405
  else:
406
+ return base_response + "this topic has many interesting aspects. Could you tell me more about what you're curious about?"
407
+
408
+ # Conversational responses for statements
409
+ conversational_responses = [
410
+ "I understand. Tell me more about that.",
411
+ "That's interesting. What are your thoughts on this?",
412
+ "I appreciate you sharing that. How do you feel about it?",
413
+ "That's fascinating. I'm learning from our conversation.",
414
+ "I see. Could you elaborate on that?",
415
  ]
416
+
417
+ return random.choice(conversational_responses)
418
 
419
+ def _analyze_input(self, text):
420
+ """Analyze user input for context"""
421
+ words = text.split()
422
+
423
  return {
424
+ 'words': words,
425
  'topics': self._extract_topics(text),
426
  'has_question': '?' in text,
427
+ 'sentiment': self._analyze_sentiment(text),
428
+ 'word_count': len(words)
429
  }
430
 
431
+ def _extract_topics(self, text):
432
  """Extract topics from text"""
433
  topics = []
434
+ text_lower = text.lower()
435
+
436
+ topic_categories = {
437
+ 'technology': ['tech', 'computer', 'ai', 'software', 'code', 'internet', 'programming'],
438
+ 'science': ['science', 'research', 'discover', 'physics', 'biology', 'chemistry'],
439
+ 'sports': ['sports', 'game', 'team', 'player', 'score', 'match', 'tournament'],
440
+ 'entertainment': ['movie', 'music', 'show', 'celebrity', 'film', 'song'],
441
+ 'health': ['health', 'medical', 'fitness', 'diet', 'exercise', 'nutrition']
442
  }
443
 
444
+ for topic, keywords in topic_categories.items():
 
445
  if any(keyword in text_lower for keyword in keywords):
446
  topics.append(topic)
447
 
448
  return topics
449
 
450
+ def _analyze_sentiment(self, text):
451
  """Basic sentiment analysis"""
452
+ positive_words = ['love', 'like', 'good', 'great', 'awesome', 'happy', 'excited', 'amazing']
453
+ negative_words = ['hate', 'bad', 'terrible', 'awful', 'sad', 'angry', 'upset']
454
 
455
+ text_lower = text.lower()
456
+ positive_count = sum(1 for word in positive_words if word in text_lower)
457
+ negative_count = sum(1 for word in negative_words if word in text_lower)
458
 
459
+ if positive_count > negative_count:
460
  return "positive"
461
+ elif negative_count > positive_count:
462
  return "negative"
463
  else:
464
  return "neutral"
465
 
466
+ def _store_interaction(self, user_input, response, reward, sources):
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  """Store interaction in memory"""
468
  interaction = {
469
  'input': user_input,
470
  'response': response,
471
  'reward': reward,
472
+ 'sources': sources,
473
  'timestamp': datetime.now().isoformat()
474
  }
475
 
476
  self.conversation_memory.append(interaction)
 
 
477
  self._update_learning(user_input, response, reward)
478
 
479
+ def _update_learning(self, user_input, response, reward):
480
  """Update learning from interaction"""
481
+ # Extract key phrases from input for pattern learning
482
+ words = [word for word in user_input.split() if len(word) > 3][:4]
483
  if words:
484
  pattern = ' '.join(words)
485
 
 
490
  'count': 1
491
  }
492
  else:
493
+ old_data = self.learned_patterns[pattern]
494
+ new_score = (old_data['score'] * old_data['count'] + reward) / (old_data['count'] + 1)
495
  self.learned_patterns[pattern]['score'] = new_score
496
  self.learned_patterns[pattern]['count'] += 1
497
 
498
+ # Store in response memory
499
+ response_hash = hashlib.md5(response.encode()).hexdigest()[:8]
500
+ if response_hash not in self.response_memory:
501
+ self.response_memory[response_hash] = {
502
+ 'response': response,
503
+ 'total_score': reward,
504
+ 'count': 1,
505
+ 'avg_score': reward
506
+ }
507
+ else:
508
+ memory = self.response_memory[response_hash]
509
+ memory['total_score'] += reward
510
+ memory['count'] += 1
511
+ memory['avg_score'] = memory['total_score'] / memory['count']
512
+
513
  # Store reward
514
  self.reward_history.append(reward)
515
 
516
+ # Save state periodically
517
  if len(self.conversation_memory) % 10 == 0:
518
  self.save_state()
519
 
520
+ def learn_from_feedback(self, user_input, reward):
521
+ """Learn from explicit user feedback"""
522
  if self.conversation_memory:
523
+ # Update the most recent interaction
524
+ recent_interaction = self.conversation_memory[-1]
525
+ recent_interaction['reward'] = reward
526
+ self._update_learning(recent_interaction['input'], recent_interaction['response'], reward)
527
 
528
+ def get_learning_stats(self):
529
  """Get learning statistics"""
530
  recent_rewards = list(self.reward_history)[-10:] or [0.5]
531
 
532
  return {
533
  'patterns': len(self.learned_patterns),
534
  'memory_size': len(self.conversation_memory),
535
+ 'avg_score': float(np.mean(recent_rewards)),
536
  'recent_rewards': len([r for r in recent_rewards if r > 0.7])
537
  }
538
 
539
  def save_state(self):
540
+ """Save learning state to file"""
541
  try:
542
  state = {
543
  'learned_patterns': self.learned_patterns,
544
+ 'response_memory': self.response_memory,
545
  'conversation_memory': list(self.conversation_memory),
546
+ 'reward_history': list(self.reward_history),
547
+ 'last_saved': datetime.now().isoformat()
548
  }
549
  with open(self.state_file, 'w') as f:
550
  json.dump(state, f, indent=2)
551
  except Exception as e:
552
+ print(f"Error saving state: {e}")
553
 
554
  def load_state(self):
555
+ """Load learning state from file"""
556
  try:
557
  if os.path.exists(self.state_file):
558
  with open(self.state_file, 'r') as f:
559
  state = json.load(f)
560
 
561
  self.learned_patterns = state.get('learned_patterns', {})
562
+ self.response_memory = state.get('response_memory', {})
563
+ self.conversation_memory = deque(state.get('conversation_memory', []), maxlen=200)
564
+ self.reward_history = deque(state.get('reward_history', []), maxlen=300)
565
+ print(f"Loaded state with {len(self.learned_patterns)} patterns")
566
+ except Exception as e:
567
+ print(f"Error loading state: {e}")