import gradio as gr import requests import re import xml.etree.ElementTree as ET import random from datetime import datetime from collections import defaultdict, Counter class SimpleQAAI: def __init__(self): self.knowledge_base = defaultdict(list) self.qa_patterns = {} self.vocabulary = set() self.total_tokens = 0 self.is_trained = False # Initialize with basic Q&A patterns self.initialize_basic_knowledge() def initialize_basic_knowledge(self): """Initialize with basic Q&A knowledge""" basic_qa = { "what is artificial intelligence": "Artificial intelligence is a technology that enables machines to perform tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.", "how do computers work": "Computers work by processing data through electronic circuits, following programmed instructions to perform calculations and operations.", "where is paris": "Paris is located in France and serves as the capital city of the country.", "why is education important": "Education is important because it develops knowledge, critical thinking skills, and prepares people for careers and civic participation.", "what is machine learning": "Machine learning is a subset of artificial intelligence that allows systems to automatically learn and improve from data without being explicitly programmed.", "how does the internet work": "The internet works through a global network of interconnected computers that communicate using standardized protocols to share information.", "what is climate change": "Climate change refers to long-term shifts in global weather patterns and temperatures, largely attributed to human activities.", "why renewable energy": "Renewable energy is important because it provides sustainable power sources that don't deplete natural resources and help reduce environmental impact." } for question, answer in basic_qa.items(): self.qa_patterns[question] = answer words = question.split() + answer.split() self.vocabulary.update(words) self.total_tokens = sum(len(answer.split()) for answer in basic_qa.values()) print(f"🧠 Initialized with {len(basic_qa)} Q&A patterns") def collect_training_data(self): """Collect training data from public sources""" print("🕷️ Collecting training data...") collected_data = [] # Try to collect from news sources news_data = self.fetch_news_data() collected_data.extend(news_data) # Process collected data if collected_data: self.process_collected_data(collected_data) self.is_trained = True return f"✅ Training completed! Collected {len(collected_data)} articles and {self.total_tokens} total tokens." else: # Use fallback training self.is_trained = True return "✅ Training completed using built-in knowledge patterns!" def fetch_news_data(self): """Fetch data from news sources""" news_sources = [ "https://feeds.reuters.com/reuters/worldNews", "https://feeds.bbci.co.uk/news/world/rss.xml" ] articles = [] for source in news_sources: try: response = requests.get(source, timeout=5) if response.status_code == 200: root = ET.fromstring(response.content) for item in root.findall(".//item")[:3]: # Limit to 3 per source title = item.find("title") if title is not None and title.text: clean_title = re.sub(r'[^\w\s]', ' ', title.text).strip() if len(clean_title) > 10: articles.append(clean_title) print(f"📰 Collected {len(articles)} articles from {source}") except Exception as e: print(f"⚠️ Failed to collect from {source}: {str(e)}") continue return articles def process_collected_data(self, data): """Process collected data into knowledge base""" for text in data: # Extract key topics and add to knowledge base words = text.lower().split() self.vocabulary.update(words) # Simple topic extraction if any(word in text.lower() for word in ['technology', 'ai', 'computer']): self.knowledge_base['technology'].append(text) elif any(word in text.lower() for word in ['climate', 'environment', 'energy']): self.knowledge_base['environment'].append(text) elif any(word in text.lower() for word in ['economy', 'market', 'business']): self.knowledge_base['economy'].append(text) else: self.knowledge_base['general'].append(text) # Update token count self.total_tokens += sum(len(text.split()) for text in data) print(f"📚 Processed data into {len(self.knowledge_base)} knowledge categories") def answer_question(self, question): """Answer a question using available knowledge""" if not question.strip(): return "Hello! I'm an AI that learns from data. Ask me a question and I'll try to answer based on what I've learned!" question_clean = question.lower().strip() # Direct pattern matching for pattern, answer in self.qa_patterns.items(): if self.calculate_similarity(question_clean, pattern) > 0.6: return f"Based on my training: {answer}" # Topic-based responses topic_response = self.get_topic_response(question_clean) if topic_response: return topic_response # Fallback response return self.generate_fallback_response(question_clean) def calculate_similarity(self, text1, text2): """Calculate similarity between two texts""" words1 = set(text1.split()) words2 = set(text2.split()) if not words1 or not words2: return 0.0 intersection = len(words1.intersection(words2)) union = len(words1.union(words2)) return intersection / union if union > 0 else 0.0 def get_topic_response(self, question): """Get response based on topic matching""" topic_keywords = { 'technology': ['technology', 'computer', 'ai', 'artificial', 'machine', 'internet', 'digital'], 'environment': ['climate', 'environment', 'energy', 'renewable', 'carbon', 'sustainability'], 'economy': ['economy', 'economic', 'market', 'business', 'finance', 'money'], 'education': ['education', 'learning', 'school', 'university', 'knowledge', 'study'] } # Find matching topic for topic, keywords in topic_keywords.items(): if any(keyword in question for keyword in keywords): if topic in self.knowledge_base and self.knowledge_base[topic]: return f"Based on recent information about {topic}: {self.knowledge_base[topic][0][:150]}..." else: return self.get_topic_template_response(topic, question) return None def get_topic_template_response(self, topic, question): """Get template response for a topic""" templates = { 'technology': "Technology is rapidly evolving and transforming how we work, communicate, and solve problems. Modern technological advances include artificial intelligence, machine learning, and digital innovations.", 'environment': "Environmental issues like climate change require urgent attention. Solutions include renewable energy adoption, sustainable practices, and reduced carbon emissions.", 'economy': "Economic factors influence global markets, employment, and business growth. Understanding economic principles helps in making informed decisions.", 'education': "Education plays a crucial role in personal development and societal progress. It provides knowledge, skills, and opportunities for growth." } base_response = templates.get(topic, "This is an important topic that involves multiple factors and considerations.") if '?' in question: return f"Regarding your question about {topic}: {base_response}" else: return f"About {topic}: {base_response}" def generate_fallback_response(self, question): """Generate fallback response for unknown questions""" fallback_responses = [ "That's an interesting question. Based on general knowledge, this topic involves various factors that need consideration.", "From what I understand, this subject has multiple aspects worth exploring further.", "This is a complex topic that relates to several areas of knowledge and research.", "Based on my training data, this question touches on important concepts that merit detailed analysis." ] return random.choice(fallback_responses) def get_system_status(self): """Get current system status""" status = "🤖 **SIMPLE Q&A AI STATUS**\n\n" if self.is_trained: status += "✅ **System is trained and ready**\n\n" else: status += "⏳ **System ready for training**\n\n" status += "**📊 Statistics:**\n" status += f"• **Total tokens processed:** {self.total_tokens:,}\n" status += f"• **Vocabulary size:** {len(self.vocabulary):,} words\n" status += f"• **Q&A patterns:** {len(self.qa_patterns)} direct patterns\n" status += f"• **Knowledge categories:** {len(self.knowledge_base)}\n" status += f"• **Training status:** {'Completed' if self.is_trained else 'Pending'}\n" status += "\n**🎯 Capabilities:**\n" status += "• Answers questions using pattern matching\n" status += "• Learns from news articles and data\n" status += "• Handles multiple topics and domains\n" status += "• Provides fallback responses for unknown queries\n" return status # Initialize the AI system ai_system = SimpleQAAI() def start_training(): """Start the training process""" try: result = ai_system.collect_training_data() return result except Exception as e: return f"❌ Training failed: {str(e)}" def chat_function(message, history): """Handle chat interactions""" if not message: return history, "" try: response = ai_system.answer_question(message) history.append([message, response]) return history, "" except Exception as e: error_response = f"Sorry, I encountered an error: {str(e)}" history.append([message, error_response]) return history, "" def refresh_status(): """Refresh system status""" return ai_system.get_system_status() # Create Gradio interface with gr.Blocks(theme=gr.themes.Soft(), title="Simple Q&A AI") as app: gr.HTML("""
Learn from data and answer questions intelligently
Stable • Fast • Reliable