dicksinyass commited on
Commit
dea9a55
·
verified ·
1 Parent(s): 653331f

Update self_learning_bot.py

Browse files
Files changed (1) hide show
  1. self_learning_bot.py +391 -476
self_learning_bot.py CHANGED
@@ -10,519 +10,412 @@ import requests
10
  from collections import deque
11
  import time
12
  from typing import Dict, List, Any, Tuple
13
- import markdown
14
  from bs4 import BeautifulSoup
 
15
 
16
- class EnhancedLearningBot:
17
- def __init__(self, state_file="/tmp/chatbot_enhanced_state.json"):
18
  self.state_file = state_file
19
- self.conversation_memory = deque(maxlen=200)
20
  self.learned_patterns = {}
21
  self.response_memory = {}
22
- self.reward_history = deque(maxlen=300)
23
- self.web_search_cache = {}
24
 
25
- # Enhanced learning parameters
26
- self.learning_rate = 0.4
27
- self.exploration_rate = 0.15
28
- self.min_confidence = 0.5
29
 
30
- # Web search configuration
31
- self.search_enabled = True
32
- self.search_timeout = 10
33
- self.max_context_length = 6000
34
 
35
  # Load existing state
36
  self.load_state()
37
 
38
- # Core knowledge that can be enhanced with web search
39
- self.factual_knowledge = {
40
- "time": self._get_current_time,
41
- "date": self._get_current_date,
42
- "day": self._get_current_day,
43
- "year": lambda: f"The current year is {datetime.now().year}",
44
- "name": "I'm Phoenix AI, your web-enhanced learning assistant!",
45
- "capabilities": self._get_capabilities_description
46
- }
47
-
48
- print(f"Enhanced bot initialized with {len(self.learned_patterns)} learned patterns")
49
 
50
- def chat(self, user_input: str, use_web_search: bool = True, conversation_history: list = None) -> Tuple[str, dict]:
51
- """Enhanced chat with web search capability"""
52
- user_input = user_input.lower().strip()
53
- search_context = {}
54
-
55
- # First, try factual responses
56
- factual_response = self._get_factual_response(user_input)
57
- if factual_response:
58
- self._store_interaction(user_input, factual_response, 0.8, search_context)
59
- return factual_response, search_context
60
-
61
- # Try web search for current information
62
- if use_web_search and self._requires_web_search(user_input):
63
- search_context = self._perform_web_search(user_input)
64
- if search_context.get('content'):
65
- web_response = self._generate_web_informed_response(user_input, search_context)
66
- self._store_interaction(user_input, web_response, 0.7, search_context)
67
- return web_response, search_context
68
-
69
- # Use learned patterns and memory
70
- return self._get_learned_response(user_input, conversation_history), search_context
71
 
72
- def _requires_web_search(self, user_input: str) -> bool:
73
- """Determine if query needs web search"""
74
- # Questions about current events, recent information, or complex topics
75
- current_indicators = [
76
- 'current', 'recent', 'latest', 'today\'s', 'new', 'update', 'breaking',
77
- 'news', '2025', '2024', 'now', 'what happened', 'when did',
78
- 'how to', 'tutorial', 'guide', 'explain'
 
79
  ]
80
 
81
- if any(indicator in user_input for indicator in current_indicators):
82
- return True
 
 
 
 
 
 
 
 
 
83
 
84
- # Complex questions that might need updated information
85
- if any(word in user_input for word in ['best', 'top', 'review', 'compare', 'versus']):
 
 
 
 
 
 
 
 
 
 
86
  return True
87
 
88
  return False
89
 
90
- def _perform_web_search(self, query: str) -> dict:
91
- """Perform web search using open-source alternatives"""
92
  try:
93
- # Method 1: Use SearXNG (open-source metasearch engine)
94
- search_results = self._search_searxng(query)
95
-
96
- # Method 2: Fallback to DuckDuckGo or other open APIs
97
- if not search_results.get('results'):
98
- search_results = self._search_duckduckgo(query)
99
-
100
- # Process and extract content from top results
101
- processed_content = self._process_search_results(search_results, query)
102
- return processed_content
103
 
 
 
 
 
 
 
 
 
 
104
  except Exception as e:
105
  print(f"Web search error: {e}")
106
- return {'content': '', 'sources': [], 'error': str(e)}
107
-
108
- def _search_searxng(self, query: str) -> dict:
109
- """Search using SearXNG instances"""
110
- # Public SearXNG instances (rotating for reliability)
111
- instances = [
112
- "https://searx.be/search?q={query}&format=json",
113
- "https://search.unlocked.link/search?q={query}&format=json",
114
- "https://searx.space/search?q={query}&format=json"
115
- ]
116
 
117
- for instance in instances:
118
- try:
119
- url = instance.format(query=query.replace(' ', '+'))
120
- response = requests.get(url, timeout=self.search_timeout)
121
- if response.status_code == 200:
122
- data = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  return {
124
- 'results': data.get('results', [])[:3], # Top 3 results
125
- 'instance': instance
 
 
 
126
  }
127
- except:
128
- continue
129
-
130
- return {'results': []}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- def _search_duckduckgo(self, query: str) -> dict:
133
- """Fallback to DuckDuckGo HTML scraping"""
134
  try:
135
- url = f"https://html.duckduckgo.com/html/?q={query.replace(' ', '+')}"
136
  headers = {
137
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
 
 
138
  }
139
- response = requests.get(url, headers=headers, timeout=self.search_timeout)
140
 
141
- # Simple HTML parsing for results
142
  soup = BeautifulSoup(response.text, 'html.parser')
143
- results = []
144
 
145
- for result in soup.find_all('.result', limit=3):
146
- title_elem = result.find('.result__title')
147
- link_elem = result.find('.result__url')
148
- snippet_elem = result.find('.result__snippet')
149
 
150
- if title_elem and link_elem:
151
  results.append({
152
  'title': title_elem.get_text().strip(),
153
- 'url': link_elem.get_text().strip(),
154
- 'snippet': snippet_elem.get_text().strip() if snippet_elem else ''
155
  })
156
 
157
- return {'results': results}
 
158
  except Exception as e:
159
- print(f"DuckDuckGo search error: {e}")
160
- return {'results': []}
161
 
162
- def _process_search_results(self, search_results: dict, original_query: str) -> dict:
163
- """Process and extract useful content from search results"""
 
 
 
 
 
164
  content_parts = []
165
- sources = []
166
-
167
- for i, result in enumerate(search_results.get('results', [])[:2]): # Process top 2 results
168
- try:
169
- # Extract basic information
170
- title = result.get('title', '')
171
- url = result.get('url', '')
172
- snippet = result.get('snippet', '')
173
-
174
- # Create content chunk
175
- content_chunk = f"Source {i+1}: {title}. {snippet}"
176
- content_parts.append(content_chunk)
177
- sources.append({'title': title, 'url': url})
178
-
179
- except Exception as e:
180
- print(f"Error processing result {i}: {e}")
181
- continue
182
 
183
- # Combine all content
184
- full_content = " ".join(content_parts)[:self.max_context_length]
185
 
186
- return {
187
- 'content': full_content,
188
- 'sources': sources,
189
- 'original_query': original_query,
190
- 'result_count': len(sources)
191
- }
192
-
193
- def _generate_web_informed_response(self, user_input: str, search_context: dict) -> str:
194
- """Generate response informed by web search results"""
195
 
196
- # Analyze the search content
197
- content = search_context.get('content', '')
198
- sources = search_context.get('sources', [])
 
 
 
199
 
200
  if not content:
201
- return "I tried to search for current information but couldn't find relevant results. Could you rephrase your question?"
202
-
203
- # Create source attribution
204
- source_attribution = ""
205
- if sources:
206
- source_names = [f"Source {i+1}" for i in range(len(sources))]
207
- source_attribution = f" [Based on search results including: {', '.join(source_names)}]"
208
-
209
- # Generate context-aware response
210
- response_templates = [
211
- "Based on current information I found: {content}.{sources}",
212
- "Here's what I learned from recent sources: {content}.{sources}",
213
- "According to available information: {content}.{sources}",
214
- "My search indicates: {content}.{sources}"
215
- ]
216
-
217
- template = random.choice(response_templates)
218
- response = template.format(
219
- content=content[:500] + "..." if len(content) > 500 else content,
220
- sources=source_attribution
221
- )
222
-
223
- return response
 
 
 
 
 
 
 
224
 
225
- def _get_learned_response(self, user_input: str, conversation_history: list = None) -> str:
226
- """Get response using enhanced learning system"""
227
- context = self._analyze_input(user_input, conversation_history)
228
- candidates = self._generate_enhanced_candidates(user_input, context)
 
229
 
230
- if not candidates:
231
- return self._generate_contextual_fallback(context)
232
 
233
- best_response = self._select_enhanced_response(user_input, candidates, context)
234
- self._store_interaction(user_input, best_response, 0.6, {})
 
235
 
236
- return best_response
237
 
238
- def _generate_enhanced_candidates(self, user_input: str, context: dict) -> list:
239
- """Generate enhanced response candidates"""
240
- candidates = []
241
-
242
- # 1. Learned patterns
243
- similar_patterns = context['similar_patterns']
244
- for pattern, data, similarity in similar_patterns:
245
- if data['score'] > self.min_confidence:
246
- candidates.append(data['response'])
247
-
248
- # 2. Web-informed patterns if available
249
- if context.get('web_context'):
250
- web_candidates = self._generate_web_context_candidates(context['web_context'])
251
- candidates.extend(web_candidates)
252
-
253
- # 3. Contextual templates
254
- if context['topics']:
255
- topic_candidates = self._generate_topic_candidates(context)
256
- candidates.extend(topic_candidates)
257
-
258
- # 4. Memory-based responses
259
- memory_candidates = self._generate_memory_candidates(context)
260
- candidates.extend(memory_candidates)
261
-
262
- return list(set(candidates))
263
-
264
- def _generate_web_context_candidates(self, web_context: dict) -> list:
265
- """Generate candidates based on web context"""
266
- candidates = []
267
- content = web_context.get('content', '')
268
-
269
- if content:
270
- templates = [
271
- "I found some relevant information: {content}",
272
- "Based on available sources: {content}",
273
- "Recent information suggests: {content}"
274
- ]
275
- for template in templates:
276
- candidate = template.format(content=content[:300])
277
- candidates.append(candidate)
278
 
279
- return candidates
 
 
 
 
280
 
281
- def _generate_topic_candidates(self, context: dict) -> list:
282
- """Generate topic-specific candidates"""
283
- candidates = []
284
- topics = context['topics']
285
-
286
- for topic in topics[:2]: # Use top 2 topics
287
- topic_responses = [
288
- f"I understand you're interested in {topic}. Based on my knowledge, ",
289
- f"Regarding {topic}, I can share that ",
290
- f"When it comes to {topic}, ",
291
- f"I've been learning about {topic}. From what I understand, "
292
- ]
293
- candidates.extend(topic_responses)
294
 
295
- return candidates
296
-
297
- def _generate_memory_candidates(self, context: dict) -> list:
298
- """Generate candidates from successful past interactions"""
299
- candidates = []
300
-
301
- # Find similar successful past interactions
302
- successful_memories = [
303
- m for m in self.conversation_memory
304
- if m.get('reward', 0) > 0.7 and
305
- set(m.get('context', {}).get('topics', [])) & set(context['topics'])
306
- ]
307
 
308
- for memory in successful_memories[:3]: # Top 3 similar successful memories
309
- candidates.append(memory['response'])
 
 
310
 
311
- return candidates
312
 
313
- def _select_enhanced_response(self, user_input: str, candidates: list, context: dict) -> str:
314
- """Select the best response using enhanced scoring"""
315
- if not candidates:
316
- return self._generate_contextual_fallback(context)
317
 
318
- scored_candidates = []
319
- for candidate in candidates:
320
- score = self._enhanced_response_score(candidate, user_input, context)
321
- scored_candidates.append((candidate, score))
322
 
323
- # Select best candidate with exploration
324
- best_candidate, best_score = max(scored_candidates, key=lambda x: x[1])
 
325
 
326
- if random.random() < self.exploration_rate and len(scored_candidates) > 1:
327
- scored_candidates.remove((best_candidate, best_score))
328
- second_best = max(scored_candidates, key=lambda x: x[1])
329
- return second_best[0]
330
 
331
- return best_candidate
332
-
333
- def _enhanced_response_score(self, response: str, user_input: str, context: dict) -> float:
334
- """Enhanced scoring algorithm"""
335
- score = 0.5
336
-
337
- # Length optimization
338
- word_count = len(response.split())
339
- if 10 <= word_count <= 50:
340
- score += 0.2
341
- elif 5 <= word_count <= 100:
342
- score += 0.1
343
-
344
- # Engagement scoring
345
- if any(marker in response for marker in ['?', 'tell me', 'what do you think']):
346
- score += 0.15
347
-
348
- # Topic relevance
349
- response_topics = self._extract_topics(response.lower())
350
- input_topics = context['topics']
351
- common_topics = set(response_topics) & set(input_topics)
352
- if common_topics:
353
- score += 0.2 * len(common_topics)
354
-
355
- # Historical performance
356
- response_hash = hashlib.md5(response.encode()).hexdigest()[:8]
357
- if response_hash in self.response_memory:
358
- historical_score = self.response_memory[response_hash]['avg_score']
359
- score += historical_score * 0.3
360
-
361
- # Variety bonus
362
- recent_responses = [m['response'] for m in list(self.conversation_memory)[-5:]]
363
- if response not in recent_responses:
364
- score += 0.1
365
-
366
- return min(score, 1.0)
367
-
368
- def learn_from_interaction(self, user_input: str, response: str, search_context: dict):
369
- """Learn from each interaction automatically"""
370
- # Calculate automatic reward based on response quality
371
- auto_reward = self._calculate_auto_reward(response, search_context)
372
- self.learn_from_feedback(user_input, auto_reward)
373
-
374
- def learn_from_feedback(self, user_input: str, reward: float):
375
- """Enhanced learning from feedback"""
376
- if not self.conversation_memory:
377
- return
378
-
379
- # Apply to recent interaction
380
- if self.conversation_memory:
381
- recent = self.conversation_memory[-1]
382
- recent['reward'] = reward
383
-
384
- # Enhanced pattern learning
385
- self._update_learned_patterns(recent['input'], recent['response'], reward)
386
-
387
- # Update response memory
388
- self._update_response_memory(recent['response'], reward)
389
-
390
- self.reward_history.append(reward)
391
-
392
- # Periodic saving
393
- if len(self.conversation_memory) % 5 == 0:
394
- self.save_state()
395
-
396
- def _update_learned_patterns(self, user_input: str, response: str, reward: float):
397
- """Update learned patterns with enhanced logic"""
398
- words = user_input.split()
399
- key_words = [w for w in words if len(w) > 3][:5] # More key words
400
 
401
- if key_words:
402
- pattern = ' '.join(sorted(set(key_words))) # Use sorted unique words
403
-
404
- if pattern not in self.learned_patterns:
405
- self.learned_patterns[pattern] = {
406
- 'response': response,
407
- 'score': reward,
408
- 'count': 1,
409
- 'last_used': datetime.now().isoformat(),
410
- 'usage_count': 1
411
- }
412
- else:
413
- old_data = self.learned_patterns[pattern]
414
- # Weighted average with decay
415
- new_score = (old_data['score'] * 0.7 + reward * 0.3)
416
- self.learned_patterns[pattern]['score'] = new_score
417
- self.learned_patterns[pattern]['count'] += 1
418
- self.learned_patterns[pattern]['usage_count'] += 1
419
-
420
- # Update response if significantly better
421
- if reward > old_data['score'] + 0.2:
422
- self.learned_patterns[pattern]['response'] = response
423
-
424
- def _update_response_memory(self, response: str, reward: float):
425
- """Update response memory with enhanced tracking"""
426
- response_hash = hashlib.md5(response.encode()).hexdigest()[:8]
427
-
428
- if response_hash not in self.response_memory:
429
- self.response_memory[response_hash] = {
430
- 'response': response,
431
- 'total_score': reward,
432
- 'count': 1,
433
- 'avg_score': reward,
434
- 'last_used': datetime.now().isoformat()
435
- }
436
  else:
437
- memory = self.response_memory[response_hash]
438
- memory['total_score'] += reward
439
- memory['count'] += 1
440
- memory['avg_score'] = memory['total_score'] / memory['count']
441
- memory['last_used'] = datetime.now().isoformat()
442
-
443
- def _calculate_auto_reward(self, response: str, search_context: dict) -> float:
444
- """Calculate automatic reward based on response quality"""
445
- reward = 0.5
446
-
447
- # Reward for good length
448
- if 15 <= len(response.split()) <= 100:
449
- reward += 0.2
450
-
451
- # Reward for using web search effectively
452
- if search_context and search_context.get('content'):
453
- reward += 0.15
454
-
455
- # Reward for engagement markers
456
- if any(marker in response for marker in ['?', 'according to', 'based on', 'research']):
457
- reward += 0.1
458
 
459
- return min(reward, 1.0)
460
-
461
- def get_learning_stats(self) -> dict:
462
- """Get comprehensive learning statistics"""
463
- recent_rewards = list(self.reward_history)[-10:] or [0.5]
464
 
465
- return {
466
- 'patterns': len(self.learned_patterns),
467
- 'memory_size': len(self.conversation_memory),
468
- 'avg_score': np.mean(recent_rewards),
469
- 'recent_rewards': len([r for r in recent_rewards if r > 0.7]),
470
- 'web_searches': len([m for m in self.conversation_memory if m.get('search_context')]),
471
- 'exploration_rate': self.exploration_rate
472
- }
473
-
474
- # Existing helper methods from previous implementation (_get_current_time, _get_current_date, etc.)
475
- def _get_current_time(self):
476
- current_time = datetime.now().strftime("%I:%M %p")
477
- return f"The current time is {current_time}. What would you like to know?"
478
-
479
- def _get_current_date(self):
480
- current_date = date.today().strftime("%A, %B %d, %Y")
481
- return f"Today is {current_date}. How can I assist you?"
482
-
483
- def _get_current_day(self):
484
- current_day = date.today().strftime("%A")
485
- return f"Today is {current_day}. What would you like to discuss?"
486
-
487
- def _get_capabilities_description(self):
488
- return "I can answer questions, search the web for current information, learn from our conversations, and improve over time. I support mathematical calculations, factual queries, and open-ended discussions."
489
-
490
- def _get_factual_response(self, user_input: str) -> str:
491
- """Provide factual responses (same as before)"""
492
- # ... (include all the factual response methods from previous implementation)
493
- if any(word in user_input for word in ['time', 'clock', 'hour']):
494
- return self._get_current_time()
495
- if any(word in user_input for word in ['date', 'today', 'day month']):
496
- return self._get_current_date()
497
- # ... include all other factual response logic
498
  return ""
499
 
500
- def _analyze_input(self, text: str, conversation_history: list = None) -> dict:
501
- """Enhanced input analysis"""
502
- words = text.split()
503
- topics = self._extract_topics(text)
504
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  return {
506
- 'words': words,
507
- 'topics': topics,
508
- 'length': len(words),
509
  'has_question': '?' in text,
510
- 'sentiment': self._analyze_sentiment(text),
511
- 'similar_patterns': self._find_similar_patterns(text),
512
- 'conversation_length': len(conversation_history) if conversation_history else 0,
513
- 'input_hash': hashlib.md5(text.encode()).hexdigest()[:8]
514
  }
515
 
516
  def _extract_topics(self, text: str) -> list:
517
  """Extract topics from text"""
518
  topics = []
519
  topic_keywords = {
520
- 'technology': ['computer', 'tech', 'software', 'ai', 'program', 'code', 'internet', 'phone', 'app'],
521
- 'science': ['space', 'physics', 'biology', 'research', 'discover', 'experiment', 'study'],
522
- 'health': ['health', 'medical', 'medicine', 'doctor', 'fitness', 'diet', 'exercise'],
523
- 'education': ['learn', 'study', 'school', 'university', 'course', 'education'],
524
- 'business': ['business', 'company', 'market', 'finance', 'investment', 'startup'],
525
- 'entertainment': ['movie', 'music', 'game', 'entertainment', 'show', 'celebrity']
526
  }
527
 
528
  text_lower = text.lower()
@@ -534,8 +427,8 @@ class EnhancedLearningBot:
534
 
535
  def _analyze_sentiment(self, text: str) -> str:
536
  """Basic sentiment analysis"""
537
- positive = ['love', 'like', 'good', 'great', 'awesome', 'happy', 'excited', 'amazing', 'wonderful']
538
- negative = ['hate', 'bad', 'terrible', 'awful', 'sad', 'angry', 'upset', 'horrible', 'boring']
539
 
540
  pos_count = sum(1 for word in positive if word in text)
541
  neg_count = sum(1 for word in negative if word in text)
@@ -556,69 +449,91 @@ class EnhancedLearningBot:
556
  pattern_words = set(pattern.split())
557
  similarity = len(text_words & pattern_words) / len(text_words | pattern_words)
558
  if similarity > 0.3:
559
- similar.append((pattern, data, similarity))
560
 
561
- return sorted(similar, key=lambda x: x[2], reverse=True)[:3]
562
-
563
- def _generate_contextual_fallback(self, context: dict) -> str:
564
- """Generate contextual fallback response"""
565
- fallbacks = [
566
- "I'm continuously learning from our conversations. Could you tell me more about what you're looking for?",
567
- "I'm developing my understanding of this topic. What specific aspect interests you?",
568
- "This helps me learn and improve. Could you rephrase or provide more context?",
569
- "I'm building my knowledge base through our discussions. What would you like to explore?"
570
- ]
571
- return random.choice(fallbacks)
572
 
573
- def _store_interaction(self, user_input: str, response: str, initial_reward: float, search_context: dict):
574
  """Store interaction in memory"""
575
  interaction = {
576
  'input': user_input,
577
  'response': response,
578
- 'context': self._analyze_input(user_input),
579
- 'search_context': search_context,
580
- 'timestamp': datetime.now().isoformat(),
581
- 'reward': initial_reward,
582
- 'confidence': self._calculate_confidence(user_input, response)
583
  }
584
 
585
  self.conversation_memory.append(interaction)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
586
 
587
- def _calculate_confidence(self, user_input: str, response: str) -> float:
588
- """Calculate confidence in response"""
589
- similar = self._find_similar_patterns(user_input)
590
- if similar:
591
- avg_score = np.mean([data['score'] for _, data, _ in similar])
592
- return min(avg_score * 1.2, 1.0)
593
- return 0.3
 
 
 
594
 
595
  def save_state(self):
596
- """Save enhanced learning state"""
597
  try:
598
  state = {
599
  'learned_patterns': self.learned_patterns,
600
- 'response_memory': self.response_memory,
601
  'conversation_memory': list(self.conversation_memory),
602
- 'reward_history': list(self.reward_history),
603
- 'web_search_cache': self.web_search_cache,
604
- 'last_saved': datetime.now().isoformat()
605
  }
606
  with open(self.state_file, 'w') as f:
607
  json.dump(state, f, indent=2)
608
  except Exception as e:
609
- print(f"Error saving state: {e}")
610
 
611
  def load_state(self):
612
- """Load enhanced learning state"""
613
  try:
614
  if os.path.exists(self.state_file):
615
  with open(self.state_file, 'r') as f:
616
  state = json.load(f)
617
 
618
  self.learned_patterns = state.get('learned_patterns', {})
619
- self.response_memory = state.get('response_memory', {})
620
- self.conversation_memory = deque(state.get('conversation_memory', []), maxlen=200)
621
- self.reward_history = deque(state.get('reward_history', []), maxlen=300)
622
- self.web_search_cache = state.get('web_search_cache', {})
623
  except:
624
- pass # Start fresh if loading fails
 
10
  from collections import deque
11
  import time
12
  from typing import Dict, List, Any, Tuple
 
13
  from bs4 import BeautifulSoup
14
+ import urllib.parse
15
 
16
+ class WebEnhancedBot:
17
+ def __init__(self, state_file="/tmp/web_bot_state.json"):
18
  self.state_file = state_file
19
+ self.conversation_memory = deque(maxlen=150)
20
  self.learned_patterns = {}
21
  self.response_memory = {}
22
+ self.reward_history = deque(maxlen=200)
23
+ self.web_cache = {}
24
 
25
+ # Learning parameters
26
+ self.learning_rate = 0.3
27
+ self.exploration_rate = 0.1
 
28
 
29
+ # Web search settings
30
+ self.search_timeout = 15
31
+ self.max_results = 3
 
32
 
33
  # Load existing state
34
  self.load_state()
35
 
36
+ print(f"Web-enhanced bot initialized with {len(self.learned_patterns)} learned patterns")
 
 
 
 
 
 
 
 
 
 
37
 
38
+ def chat_with_web_search(self, user_input: str, use_search: bool = True) -> Tuple[str, bool]:
39
+ """Main chat method that actually uses web search to answer questions"""
40
+
41
+ # Check if this is a question that needs web search
42
+ needs_search = self._should_search_web(user_input) and use_search
43
+
44
+ if needs_search:
45
+ # Actually search and get real answers
46
+ web_content = self._get_web_content(user_input)
47
+ if web_content and web_content.get('content'):
48
+ response = self._create_web_answer(user_input, web_content)
49
+ self._store_interaction(user_input, response, 0.8, web_content)
50
+ return response, True
51
+
52
+ # Fallback to learned responses
53
+ response = self._get_learned_response(user_input)
54
+ self._store_interaction(user_input, response, 0.5, {})
55
+ return response, False
 
 
 
56
 
57
+ def _should_search_web(self, user_input: str) -> bool:
58
+ """Determine if we should search the web for this query"""
59
+ input_lower = user_input.lower()
60
+
61
+ # Questions that need current information
62
+ current_info_indicators = [
63
+ 'current', 'recent', 'latest', 'today', 'now', 'breaking', 'news',
64
+ 'what happened', 'when did', 'update on', 'new', 'just happened'
65
  ]
66
 
67
+ # Factual questions that might need verification
68
+ factual_questions = [
69
+ 'what is', 'who is', 'where is', 'when was', 'how to', 'why does',
70
+ 'explain', 'tell me about', 'information about', 'details about'
71
+ ]
72
+
73
+ # Specific topics that change frequently
74
+ dynamic_topics = [
75
+ 'weather', 'temperature', 'forecast', 'stock', 'price', 'crypto',
76
+ 'sports', 'game', 'score', 'election', 'politics', 'celebrity'
77
+ ]
78
 
79
+ # Check if input matches any search criteria
80
+ if any(indicator in input_lower for indicator in current_info_indicators):
81
+ return True
82
+
83
+ if any(question in input_lower for question in factual_questions):
84
+ return True
85
+
86
+ if any(topic in input_lower for topic in dynamic_topics):
87
+ return True
88
+
89
+ # Questions with question words
90
+ if any(word in input_lower for word in ['what', 'who', 'where', 'when', 'how', 'why']) and '?' in user_input:
91
  return True
92
 
93
  return False
94
 
95
+ def _get_web_content(self, query: str) -> Dict[str, Any]:
96
+ """Get actual web content for answering questions"""
97
  try:
98
+ # Try multiple search methods
99
+ search_results = self._search_brave(query) or self._search_duckduckgo(query)
 
 
 
 
 
 
 
 
100
 
101
+ if search_results and search_results.get('results'):
102
+ # Extract actual content from the top result
103
+ content = self._extract_meaningful_content(search_results['results'][0])
104
+ return {
105
+ 'content': content,
106
+ 'source': search_results['results'][0].get('url', ''),
107
+ 'query': query,
108
+ 'results_count': len(search_results['results'])
109
+ }
110
  except Exception as e:
111
  print(f"Web search error: {e}")
 
 
 
 
 
 
 
 
 
 
112
 
113
+ return {}
114
+
115
+ def _search_brave(self, query: str) -> Dict[str, Any]:
116
+ """Search using Brave Search (free tier)"""
117
+ try:
118
+ # Brave Search API (free for limited use)
119
+ url = f"https://api.search.brave.com/res/v1/web/search"
120
+ headers = {
121
+ "Accept": "application/json",
122
+ "X-Subscription-Token": "BSA-Your-Free-Key-Here" # Get free key from brave.com
123
+ }
124
+ params = {
125
+ "q": query,
126
+ "count": self.max_results
127
+ }
128
+
129
+ response = requests.get(url, headers=headers, params=params, timeout=self.search_timeout)
130
+ if response.status_code == 200:
131
+ data = response.json()
132
+ results = []
133
+ for web_result in data.get('web', {}).get('results', [])[:self.max_results]:
134
+ results.append({
135
+ 'title': web_result.get('title', ''),
136
+ 'url': web_result.get('url', ''),
137
+ 'description': web_result.get('description', '')
138
+ })
139
+ return {'results': results}
140
+ except:
141
+ pass
142
+ return {}
143
+
144
+ def _search_duckduckgo(self, query: str) -> Dict[str, Any]:
145
+ """Fallback to DuckDuckGo instant answers and web results"""
146
+ try:
147
+ # DuckDuckGo Instant Answer API
148
+ ia_url = f"https://api.duckduckgo.com/"
149
+ params = {
150
+ "q": query,
151
+ "format": "json",
152
+ "no_html": "1",
153
+ "skip_disambig": "1"
154
+ }
155
+
156
+ response = requests.get(ia_url, params=params, timeout=self.search_timeout)
157
+ if response.status_code == 200:
158
+ data = response.json()
159
+
160
+ # Check for instant answer
161
+ if data.get('AbstractText'):
162
  return {
163
+ 'results': [{
164
+ 'title': data.get('Heading', 'Instant Answer'),
165
+ 'url': data.get('AbstractURL', ''),
166
+ 'description': data.get('AbstractText', '')
167
+ }]
168
  }
169
+
170
+ # Check for related topics
171
+ if data.get('RelatedTopics'):
172
+ for topic in data['RelatedTopics'][:self.max_results]:
173
+ if topic.get('Text'):
174
+ return {
175
+ 'results': [{
176
+ 'title': topic.get('FirstURL', '').split('/')[-1].replace('_', ' '),
177
+ 'url': topic.get('FirstURL', ''),
178
+ 'description': topic.get('Text', '')
179
+ }]
180
+ }
181
+
182
+ # Fallback to HTML scraping
183
+ return self._scrape_duckduckgo_html(query)
184
+
185
+ except Exception as e:
186
+ print(f"DuckDuckGo search error: {e}")
187
+ return {}
188
 
189
+ def _scrape_duckduckgo_html(self, query: str) -> Dict[str, Any]:
190
+ """Scrape DuckDuckGo HTML results as final fallback"""
191
  try:
192
+ url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
193
  headers = {
194
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
195
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
196
+ 'Accept-Language': 'en-US,en;q=0.5',
197
  }
 
198
 
199
+ response = requests.get(url, headers=headers, timeout=self.search_timeout)
200
  soup = BeautifulSoup(response.text, 'html.parser')
 
201
 
202
+ results = []
203
+ for result in soup.find_all('div', class_='result')[:self.max_results]:
204
+ title_elem = result.find('a', class_='result__a')
205
+ snippet_elem = result.find('a', class_='result__snippet')
206
 
207
+ if title_elem and snippet_elem:
208
  results.append({
209
  'title': title_elem.get_text().strip(),
210
+ 'url': title_elem.get('href', ''),
211
+ 'description': snippet_elem.get_text().strip()
212
  })
213
 
214
+ return {'results': results} if results else {}
215
+
216
  except Exception as e:
217
+ print(f"DDG HTML scraping error: {e}")
218
+ return {}
219
 
220
+ def _extract_meaningful_content(self, result: Dict) -> str:
221
+ """Extract meaningful content from search result"""
222
+ title = result.get('title', '')
223
+ description = result.get('description', '')
224
+ url = result.get('url', '')
225
+
226
+ # Combine title and description for context
227
  content_parts = []
228
+ if title:
229
+ content_parts.append(title)
230
+ if description:
231
+ content_parts.append(description)
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
+ full_content = ". ".join(content_parts)
 
234
 
235
+ # Clean up the content
236
+ full_content = re.sub(r'\[\d+\]', '', full_content) # Remove citation numbers
237
+ full_content = re.sub(r'\s+', ' ', full_content) # Normalize whitespace
 
 
 
 
 
 
238
 
239
+ return full_content.strip()
240
+
241
+ def _create_web_answer(self, user_input: str, web_content: Dict) -> str:
242
+ """Create an actual answer using web content"""
243
+ content = web_content.get('content', '')
244
+ source = web_content.get('source', '')
245
 
246
  if not content:
247
+ return "I searched but couldn't find specific information about that. Could you try rephrasing your question?"
248
+
249
+ # Analyze the type of question and create appropriate response
250
+ question_type = self._analyze_question_type(user_input)
251
+
252
+ if question_type == "factual":
253
+ return self._format_factual_answer(user_input, content, source)
254
+ elif question_type == "current_events":
255
+ return self._format_current_events_answer(user_input, content, source)
256
+ elif question_type == "how_to":
257
+ return self._format_how_to_answer(user_input, content, source)
258
+ elif question_type == "weather":
259
+ return self._format_weather_answer(user_input, content, source)
260
+ else:
261
+ return self._format_general_answer(user_input, content, source)
262
+
263
+ def _analyze_question_type(self, user_input: str) -> str:
264
+ """Analyze what type of question this is"""
265
+ input_lower = user_input.lower()
266
+
267
+ if any(word in input_lower for word in ['weather', 'temperature', 'forecast']):
268
+ return "weather"
269
+ elif any(word in input_lower for word in ['how to', 'how do i', 'tutorial', 'guide']):
270
+ return "how_to"
271
+ elif any(word in input_lower for word in ['news', 'current', 'recent', 'breaking', 'today']):
272
+ return "current_events"
273
+ elif any(word in input_lower for word in ['what is', 'who is', 'where is', 'when was']):
274
+ return "factual"
275
+ else:
276
+ return "general"
277
 
278
+ def _format_factual_answer(self, question: str, content: str, source: str) -> str:
279
+ """Format factual answers"""
280
+ # Extract the most relevant sentence
281
+ sentences = content.split('. ')
282
+ relevant_sentence = sentences[0] if sentences else content
283
 
284
+ answer = f"**According to web sources:** {relevant_sentence}"
 
285
 
286
+ if len(sentences) > 1:
287
+ additional_info = '. '.join(sentences[1:3])
288
+ answer += f" {additional_info}."
289
 
290
+ return answer
291
 
292
+ def _format_current_events_answer(self, question: str, content: str, source: str) -> str:
293
+ """Format current events answers"""
294
+ sentences = content.split('. ')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
+ answer = f"**Latest information:** {content[:400]}"
297
+ if len(content) > 400:
298
+ answer += "..."
299
+
300
+ return answer
301
 
302
+ def _format_how_to_answer(self, question: str, content: str, source: str) -> str:
303
+ """Format how-to answers"""
304
+ # Look for instructional language
305
+ instructions = []
306
+ sentences = content.split('. ')
 
 
 
 
 
 
 
 
307
 
308
+ for sentence in sentences[:4]: # Take first 4 sentences
309
+ if any(word in sentence.lower() for word in ['step', 'first', 'then', 'next', 'after']):
310
+ instructions.append(sentence)
 
 
 
 
 
 
 
 
 
311
 
312
+ if instructions:
313
+ answer = "**Here's what I found:**\n" + "\n".join(f"• {inst}" for inst in instructions[:3])
314
+ else:
315
+ answer = f"**Based on available information:** {sentences[0] if sentences else content}"
316
 
317
+ return answer
318
 
319
+ def _format_weather_answer(self, question: str, content: str, source: str) -> str:
320
+ """Format weather-related answers"""
321
+ # Extract location from question
322
+ location = self._extract_location(question)
323
 
324
+ # Look for temperature and conditions in content
325
+ temp_match = re.search(r'(\d+)\s*°?[CF]', content)
326
+ condition_match = re.search(r'(sunny|rain|cloud|snow|clear|storm)', content.lower())
 
327
 
328
+ answer_parts = []
329
+ if location:
330
+ answer_parts.append(f"**Weather for {location}:**")
331
 
332
+ if temp_match:
333
+ answer_parts.append(f"Temperature around {temp_match.group(1)}°F")
 
 
334
 
335
+ if condition_match:
336
+ answer_parts.append(f"Conditions: {condition_match.group(1).title()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
 
338
+ if answer_parts:
339
+ return " ".join(answer_parts) + f"\n*Source: {source}*" if source else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  else:
341
+ return f"**Weather information:** {content[:300]}"
342
+
343
+ def _format_general_answer(self, question: str, content: str, source: str) -> str:
344
+ """Format general answers"""
345
+ return f"**I found this information:** {content[:500]}" + ("..." if len(content) > 500 else "")
346
+
347
+ def _extract_location(self, text: str) -> str:
348
+ """Extract location from text (simple version)"""
349
+ # Common city/country names
350
+ locations = {
351
+ 'new york', 'london', 'paris', 'tokyo', 'berlin', 'sydney', 'toronto',
352
+ 'mumbai', 'beijing', 'moscow', 'dubai', 'rome', 'madrid', 'amsterdam',
353
+ 'chicago', 'los angeles', 'san francisco', 'seattle', 'boston', 'miami'
354
+ }
 
 
 
 
 
 
 
355
 
356
+ text_lower = text.lower()
357
+ for location in locations:
358
+ if location in text_lower:
359
+ return location.title()
 
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  return ""
362
 
363
+ def _get_learned_response(self, user_input: str) -> str:
364
+ """Get response from learned patterns"""
365
+ context = self._analyze_input(user_input)
366
+
367
+ # Try learned patterns first
368
+ similar_patterns = self._find_similar_patterns(user_input)
369
+ if similar_patterns:
370
+ best_pattern = max(similar_patterns, key=lambda x: x[1]['score'])
371
+ if best_pattern[1]['score'] > 0.6:
372
+ return best_pattern[1]['response']
373
+
374
+ # Generate contextual response
375
+ return self._generate_contextual_response(user_input, context)
376
+
377
+ def _generate_contextual_response(self, user_input: str, context: dict) -> str:
378
+ """Generate contextual response when no web results"""
379
+ if context['has_question']:
380
+ responses = [
381
+ "That's an interesting question. Based on my knowledge, ",
382
+ "I understand you're asking about ",
383
+ "That's a great question. From what I've learned, "
384
+ ]
385
+ base = random.choice(responses)
386
+
387
+ if context['topics']:
388
+ return base + f"{random.choice(context['topics'])}. Could you tell me more about what specifically interests you?"
389
+ else:
390
+ return base + "this topic. I'm constantly learning from our conversations."
391
+
392
+ # Conversational responses
393
+ conversational = [
394
+ "I appreciate you sharing that. What are your thoughts on this?",
395
+ "That's interesting. Tell me more about your perspective.",
396
+ "I understand. How does that relate to your experiences?",
397
+ "That's fascinating. I'm learning from our conversation."
398
+ ]
399
+ return random.choice(conversational)
400
+
401
+ def _analyze_input(self, text: str) -> dict:
402
+ """Analyze user input"""
403
  return {
404
+ 'words': text.split(),
405
+ 'topics': self._extract_topics(text),
 
406
  'has_question': '?' in text,
407
+ 'sentiment': self._analyze_sentiment(text)
 
 
 
408
  }
409
 
410
  def _extract_topics(self, text: str) -> list:
411
  """Extract topics from text"""
412
  topics = []
413
  topic_keywords = {
414
+ 'technology': ['computer', 'tech', 'software', 'ai', 'program', 'code'],
415
+ 'science': ['space', 'physics', 'biology', 'research', 'discover'],
416
+ 'sports': ['game', 'sports', 'team', 'player', 'score'],
417
+ 'entertainment': ['movie', 'music', 'show', 'celebrity'],
418
+ 'health': ['health', 'medical', 'fitness', 'diet']
 
419
  }
420
 
421
  text_lower = text.lower()
 
427
 
428
  def _analyze_sentiment(self, text: str) -> str:
429
  """Basic sentiment analysis"""
430
+ positive = ['love', 'like', 'good', 'great', 'awesome', 'happy']
431
+ negative = ['hate', 'bad', 'terrible', 'awful', 'sad', 'angry']
432
 
433
  pos_count = sum(1 for word in positive if word in text)
434
  neg_count = sum(1 for word in negative if word in text)
 
449
  pattern_words = set(pattern.split())
450
  similarity = len(text_words & pattern_words) / len(text_words | pattern_words)
451
  if similarity > 0.3:
452
+ similar.append((pattern, data))
453
 
454
+ return similar
 
 
 
 
 
 
 
 
 
 
455
 
456
+ def _store_interaction(self, user_input: str, response: str, reward: float, web_context: dict):
457
  """Store interaction in memory"""
458
  interaction = {
459
  'input': user_input,
460
  'response': response,
461
+ 'reward': reward,
462
+ 'web_context': web_context,
463
+ 'timestamp': datetime.now().isoformat()
 
 
464
  }
465
 
466
  self.conversation_memory.append(interaction)
467
+
468
+ # Learn from this interaction
469
+ self._update_learning(user_input, response, reward)
470
+
471
+ def _update_learning(self, user_input: str, response: str, reward: float):
472
+ """Update learning from interaction"""
473
+ # Extract key phrases for pattern learning
474
+ words = [w for w in user_input.split() if len(w) > 3][:4]
475
+ if words:
476
+ pattern = ' '.join(words)
477
+
478
+ if pattern not in self.learned_patterns:
479
+ self.learned_patterns[pattern] = {
480
+ 'response': response,
481
+ 'score': reward,
482
+ 'count': 1
483
+ }
484
+ else:
485
+ old = self.learned_patterns[pattern]
486
+ new_score = (old['score'] * old['count'] + reward) / (old['count'] + 1)
487
+ self.learned_patterns[pattern]['score'] = new_score
488
+ self.learned_patterns[pattern]['count'] += 1
489
+
490
+ # Store reward
491
+ self.reward_history.append(reward)
492
+
493
+ # Periodic save
494
+ if len(self.conversation_memory) % 10 == 0:
495
+ self.save_state()
496
+
497
+ def learn_from_feedback(self, user_input: str, reward: float):
498
+ """Learn from explicit feedback"""
499
+ if self.conversation_memory:
500
+ recent = self.conversation_memory[-1]
501
+ recent['reward'] = reward
502
+ self._update_learning(recent['input'], recent['response'], reward)
503
 
504
+ def get_learning_stats(self) -> dict:
505
+ """Get learning statistics"""
506
+ recent_rewards = list(self.reward_history)[-10:] or [0.5]
507
+
508
+ return {
509
+ 'patterns': len(self.learned_patterns),
510
+ 'memory_size': len(self.conversation_memory),
511
+ 'avg_score': np.mean(recent_rewards),
512
+ 'recent_rewards': len([r for r in recent_rewards if r > 0.7])
513
+ }
514
 
515
  def save_state(self):
516
+ """Save learning state"""
517
  try:
518
  state = {
519
  'learned_patterns': self.learned_patterns,
 
520
  'conversation_memory': list(self.conversation_memory),
521
+ 'reward_history': list(self.reward_history)
 
 
522
  }
523
  with open(self.state_file, 'w') as f:
524
  json.dump(state, f, indent=2)
525
  except Exception as e:
526
+ print(f"Save error: {e}")
527
 
528
  def load_state(self):
529
+ """Load learning state"""
530
  try:
531
  if os.path.exists(self.state_file):
532
  with open(self.state_file, 'r') as f:
533
  state = json.load(f)
534
 
535
  self.learned_patterns = state.get('learned_patterns', {})
536
+ self.conversation_memory = deque(state.get('conversation_memory', []), maxlen=150)
537
+ self.reward_history = deque(state.get('reward_history', []), maxlen=200)
 
 
538
  except:
539
+ pass