geoai / app.py
mset's picture
Update app.py
b95ca5f verified
raw
history blame
14.1 kB
import gradio as gr
import requests
import re
import xml.etree.ElementTree as ET
import random
from datetime import datetime
from collections import defaultdict, Counter
class SimpleQAAI:
def __init__(self):
self.knowledge_base = defaultdict(list)
self.qa_patterns = {}
self.vocabulary = set()
self.total_tokens = 0
self.is_trained = False
# Initialize with basic Q&A patterns
self.initialize_basic_knowledge()
def initialize_basic_knowledge(self):
"""Initialize with basic Q&A knowledge"""
basic_qa = {
"what is artificial intelligence": "Artificial intelligence is a technology that enables machines to perform tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.",
"how do computers work": "Computers work by processing data through electronic circuits, following programmed instructions to perform calculations and operations.",
"where is paris": "Paris is located in France and serves as the capital city of the country.",
"why is education important": "Education is important because it develops knowledge, critical thinking skills, and prepares people for careers and civic participation.",
"what is machine learning": "Machine learning is a subset of artificial intelligence that allows systems to automatically learn and improve from data without being explicitly programmed.",
"how does the internet work": "The internet works through a global network of interconnected computers that communicate using standardized protocols to share information.",
"what is climate change": "Climate change refers to long-term shifts in global weather patterns and temperatures, largely attributed to human activities.",
"why renewable energy": "Renewable energy is important because it provides sustainable power sources that don't deplete natural resources and help reduce environmental impact."
}
for question, answer in basic_qa.items():
self.qa_patterns[question] = answer
words = question.split() + answer.split()
self.vocabulary.update(words)
self.total_tokens = sum(len(answer.split()) for answer in basic_qa.values())
print(f"🧠 Initialized with {len(basic_qa)} Q&A patterns")
def collect_training_data(self):
"""Collect training data from public sources"""
print("πŸ•·οΈ Collecting training data...")
collected_data = []
# Try to collect from news sources
news_data = self.fetch_news_data()
collected_data.extend(news_data)
# Process collected data
if collected_data:
self.process_collected_data(collected_data)
self.is_trained = True
return f"βœ… Training completed! Collected {len(collected_data)} articles and {self.total_tokens} total tokens."
else:
# Use fallback training
self.is_trained = True
return "βœ… Training completed using built-in knowledge patterns!"
def fetch_news_data(self):
"""Fetch data from news sources"""
news_sources = [
"https://feeds.reuters.com/reuters/worldNews",
"https://feeds.bbci.co.uk/news/world/rss.xml"
]
articles = []
for source in news_sources:
try:
response = requests.get(source, timeout=5)
if response.status_code == 200:
root = ET.fromstring(response.content)
for item in root.findall(".//item")[:3]: # Limit to 3 per source
title = item.find("title")
if title is not None and title.text:
clean_title = re.sub(r'[^\w\s]', ' ', title.text).strip()
if len(clean_title) > 10:
articles.append(clean_title)
print(f"πŸ“° Collected {len(articles)} articles from {source}")
except Exception as e:
print(f"⚠️ Failed to collect from {source}: {str(e)}")
continue
return articles
def process_collected_data(self, data):
"""Process collected data into knowledge base"""
for text in data:
# Extract key topics and add to knowledge base
words = text.lower().split()
self.vocabulary.update(words)
# Simple topic extraction
if any(word in text.lower() for word in ['technology', 'ai', 'computer']):
self.knowledge_base['technology'].append(text)
elif any(word in text.lower() for word in ['climate', 'environment', 'energy']):
self.knowledge_base['environment'].append(text)
elif any(word in text.lower() for word in ['economy', 'market', 'business']):
self.knowledge_base['economy'].append(text)
else:
self.knowledge_base['general'].append(text)
# Update token count
self.total_tokens += sum(len(text.split()) for text in data)
print(f"πŸ“š Processed data into {len(self.knowledge_base)} knowledge categories")
def answer_question(self, question):
"""Answer a question using available knowledge"""
if not question.strip():
return "Hello! I'm an AI that learns from data. Ask me a question and I'll try to answer based on what I've learned!"
question_clean = question.lower().strip()
# Direct pattern matching
for pattern, answer in self.qa_patterns.items():
if self.calculate_similarity(question_clean, pattern) > 0.6:
return f"Based on my training: {answer}"
# Topic-based responses
topic_response = self.get_topic_response(question_clean)
if topic_response:
return topic_response
# Fallback response
return self.generate_fallback_response(question_clean)
def calculate_similarity(self, text1, text2):
"""Calculate similarity between two texts"""
words1 = set(text1.split())
words2 = set(text2.split())
if not words1 or not words2:
return 0.0
intersection = len(words1.intersection(words2))
union = len(words1.union(words2))
return intersection / union if union > 0 else 0.0
def get_topic_response(self, question):
"""Get response based on topic matching"""
topic_keywords = {
'technology': ['technology', 'computer', 'ai', 'artificial', 'machine', 'internet', 'digital'],
'environment': ['climate', 'environment', 'energy', 'renewable', 'carbon', 'sustainability'],
'economy': ['economy', 'economic', 'market', 'business', 'finance', 'money'],
'education': ['education', 'learning', 'school', 'university', 'knowledge', 'study']
}
# Find matching topic
for topic, keywords in topic_keywords.items():
if any(keyword in question for keyword in keywords):
if topic in self.knowledge_base and self.knowledge_base[topic]:
return f"Based on recent information about {topic}: {self.knowledge_base[topic][0][:150]}..."
else:
return self.get_topic_template_response(topic, question)
return None
def get_topic_template_response(self, topic, question):
"""Get template response for a topic"""
templates = {
'technology': "Technology is rapidly evolving and transforming how we work, communicate, and solve problems. Modern technological advances include artificial intelligence, machine learning, and digital innovations.",
'environment': "Environmental issues like climate change require urgent attention. Solutions include renewable energy adoption, sustainable practices, and reduced carbon emissions.",
'economy': "Economic factors influence global markets, employment, and business growth. Understanding economic principles helps in making informed decisions.",
'education': "Education plays a crucial role in personal development and societal progress. It provides knowledge, skills, and opportunities for growth."
}
base_response = templates.get(topic, "This is an important topic that involves multiple factors and considerations.")
if '?' in question:
return f"Regarding your question about {topic}: {base_response}"
else:
return f"About {topic}: {base_response}"
def generate_fallback_response(self, question):
"""Generate fallback response for unknown questions"""
fallback_responses = [
"That's an interesting question. Based on general knowledge, this topic involves various factors that need consideration.",
"From what I understand, this subject has multiple aspects worth exploring further.",
"This is a complex topic that relates to several areas of knowledge and research.",
"Based on my training data, this question touches on important concepts that merit detailed analysis."
]
return random.choice(fallback_responses)
def get_system_status(self):
"""Get current system status"""
status = "πŸ€– **SIMPLE Q&A AI STATUS**\n\n"
if self.is_trained:
status += "βœ… **System is trained and ready**\n\n"
else:
status += "⏳ **System ready for training**\n\n"
status += "**πŸ“Š Statistics:**\n"
status += f"β€’ **Total tokens processed:** {self.total_tokens:,}\n"
status += f"β€’ **Vocabulary size:** {len(self.vocabulary):,} words\n"
status += f"β€’ **Q&A patterns:** {len(self.qa_patterns)} direct patterns\n"
status += f"β€’ **Knowledge categories:** {len(self.knowledge_base)}\n"
status += f"β€’ **Training status:** {'Completed' if self.is_trained else 'Pending'}\n"
status += "\n**🎯 Capabilities:**\n"
status += "β€’ Answers questions using pattern matching\n"
status += "β€’ Learns from news articles and data\n"
status += "β€’ Handles multiple topics and domains\n"
status += "β€’ Provides fallback responses for unknown queries\n"
return status
# Initialize the AI system
ai_system = SimpleQAAI()
def start_training():
"""Start the training process"""
try:
result = ai_system.collect_training_data()
return result
except Exception as e:
return f"❌ Training failed: {str(e)}"
def chat_function(message, history):
"""Handle chat interactions"""
if not message:
return history, ""
try:
response = ai_system.answer_question(message)
history.append([message, response])
return history, ""
except Exception as e:
error_response = f"Sorry, I encountered an error: {str(e)}"
history.append([message, error_response])
return history, ""
def refresh_status():
"""Refresh system status"""
return ai_system.get_system_status()
# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), title="Simple Q&A AI") as app:
gr.HTML("""
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
<h1>πŸ€– Simple Question Answering AI</h1>
<p><b>Learn from data and answer questions intelligently</b></p>
<p>Stable β€’ Fast β€’ Reliable</p>
</div>
""")
with gr.Row():
with gr.Column(scale=3):
gr.Markdown("### πŸ’¬ Chat with AI")
chatbot = gr.Chatbot(
value=[],
label="AI Assistant",
height=400
)
msg_input = gr.Textbox(
label="Your Question",
placeholder="Ask me anything: What is AI? How does technology work?",
lines=2
)
with gr.Row():
send_btn = gr.Button("πŸ’¬ Send", variant="primary")
clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
with gr.Column(scale=1):
gr.Markdown("### βš™οΈ System Control")
status_box = gr.Textbox(
label="System Status",
value=ai_system.get_system_status(),
lines=16,
interactive=False
)
train_btn = gr.Button("πŸš€ Start Training", variant="primary")
refresh_btn = gr.Button("πŸ”„ Refresh Status", variant="secondary")
# Example questions
gr.Examples(
examples=[
"What is artificial intelligence?",
"How do computers work?",
"Why is education important?",
"What is climate change?",
"How does the internet work?",
"What is machine learning?"
],
inputs=msg_input,
label="πŸ“ Try these questions"
)
# Event handlers
send_btn.click(
fn=chat_function,
inputs=[msg_input, chatbot],
outputs=[chatbot, msg_input]
)
msg_input.submit(
fn=chat_function,
inputs=[msg_input, chatbot],
outputs=[chatbot, msg_input]
)
clear_btn.click(
fn=lambda: ([], ""),
outputs=[chatbot, msg_input]
)
train_btn.click(
fn=start_training,
outputs=[status_box]
)
refresh_btn.click(
fn=refresh_status,
outputs=[status_box]
)
# Launch the app
if __name__ == "__main__":
app.launch()