import gradio as gr
import requests
import re
import xml.etree.ElementTree as ET
import random
from datetime import datetime
from collections import defaultdict, Counter

class SimpleQAAI:
    def __init__(self):
        self.knowledge_base = defaultdict(list)
        self.qa_patterns = {}
        self.vocabulary = set()
        self.total_tokens = 0
        self.is_trained = False
        
        # Initialize with basic Q&A patterns
        self.initialize_basic_knowledge()
        
    def initialize_basic_knowledge(self):
        """Initialize with basic Q&A knowledge"""
        basic_qa = {
            "what is artificial intelligence": "Artificial intelligence is a technology that enables machines to perform tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.",
            "how do computers work": "Computers work by processing data through electronic circuits, following programmed instructions to perform calculations and operations.",
            "where is paris": "Paris is located in France and serves as the capital city of the country.",
            "why is education important": "Education is important because it develops knowledge, critical thinking skills, and prepares people for careers and civic participation.",
            "what is machine learning": "Machine learning is a subset of artificial intelligence that allows systems to automatically learn and improve from data without being explicitly programmed.",
            "how does the internet work": "The internet works through a global network of interconnected computers that communicate using standardized protocols to share information.",
            "what is climate change": "Climate change refers to long-term shifts in global weather patterns and temperatures, largely attributed to human activities.",
            "why renewable energy": "Renewable energy is important because it provides sustainable power sources that don't deplete natural resources and help reduce environmental impact."
        }
        
        for question, answer in basic_qa.items():
            self.qa_patterns[question] = answer
            words = question.split() + answer.split()
            self.vocabulary.update(words)
        
        self.total_tokens = sum(len(answer.split()) for answer in basic_qa.values())
        print(f"🧠 Initialized with {len(basic_qa)} Q&A patterns")
    
    def collect_training_data(self):
        """Collect training data from public sources"""
        print("🕷️ Collecting training data...")
        
        collected_data = []
        
        # Try to collect from news sources
        news_data = self.fetch_news_data()
        collected_data.extend(news_data)
        
        # Process collected data
        if collected_data:
            self.process_collected_data(collected_data)
            self.is_trained = True
            return f"✅ Training completed! Collected {len(collected_data)} articles and {self.total_tokens} total tokens."
        else:
            # Use fallback training
            self.is_trained = True
            return "✅ Training completed using built-in knowledge patterns!"
    
    def fetch_news_data(self):
        """Fetch data from news sources"""
        news_sources = [
            "https://feeds.reuters.com/reuters/worldNews",
            "https://feeds.bbci.co.uk/news/world/rss.xml"
        ]
        
        articles = []
        
        for source in news_sources:
            try:
                response = requests.get(source, timeout=5)
                if response.status_code == 200:
                    root = ET.fromstring(response.content)
                    for item in root.findall(".//item")[:3]:  # Limit to 3 per source
                        title = item.find("title")
                        if title is not None and title.text:
                            clean_title = re.sub(r'[^\w\s]', ' ', title.text).strip()
                            if len(clean_title) > 10:
                                articles.append(clean_title)
                print(f"📰 Collected {len(articles)} articles from {source}")
            except Exception as e:
                print(f"⚠️ Failed to collect from {source}: {str(e)}")
                continue
        
        return articles
    
    def process_collected_data(self, data):
        """Process collected data into knowledge base"""
        for text in data:
            # Extract key topics and add to knowledge base
            words = text.lower().split()
            self.vocabulary.update(words)
            
            # Simple topic extraction
            if any(word in text.lower() for word in ['technology', 'ai', 'computer']):
                self.knowledge_base['technology'].append(text)
            elif any(word in text.lower() for word in ['climate', 'environment', 'energy']):
                self.knowledge_base['environment'].append(text)
            elif any(word in text.lower() for word in ['economy', 'market', 'business']):
                self.knowledge_base['economy'].append(text)
            else:
                self.knowledge_base['general'].append(text)
        
        # Update token count
        self.total_tokens += sum(len(text.split()) for text in data)
        print(f"📚 Processed data into {len(self.knowledge_base)} knowledge categories")
    
    def answer_question(self, question):
        """Answer a question using available knowledge"""
        if not question.strip():
            return "Hello! I'm an AI that learns from data. Ask me a question and I'll try to answer based on what I've learned!"
        
        question_clean = question.lower().strip()
        
        # Direct pattern matching
        for pattern, answer in self.qa_patterns.items():
            if self.calculate_similarity(question_clean, pattern) > 0.6:
                return f"Based on my training: {answer}"
        
        # Topic-based responses
        topic_response = self.get_topic_response(question_clean)
        if topic_response:
            return topic_response
        
        # Fallback response
        return self.generate_fallback_response(question_clean)
    
    def calculate_similarity(self, text1, text2):
        """Calculate similarity between two texts"""
        words1 = set(text1.split())
        words2 = set(text2.split())
        
        if not words1 or not words2:
            return 0.0
        
        intersection = len(words1.intersection(words2))
        union = len(words1.union(words2))
        
        return intersection / union if union > 0 else 0.0
    
    def get_topic_response(self, question):
        """Get response based on topic matching"""
        topic_keywords = {
            'technology': ['technology', 'computer', 'ai', 'artificial', 'machine', 'internet', 'digital'],
            'environment': ['climate', 'environment', 'energy', 'renewable', 'carbon', 'sustainability'],
            'economy': ['economy', 'economic', 'market', 'business', 'finance', 'money'],
            'education': ['education', 'learning', 'school', 'university', 'knowledge', 'study']
        }
        
        # Find matching topic
        for topic, keywords in topic_keywords.items():
            if any(keyword in question for keyword in keywords):
                if topic in self.knowledge_base and self.knowledge_base[topic]:
                    return f"Based on recent information about {topic}: {self.knowledge_base[topic][0][:150]}..."
                else:
                    return self.get_topic_template_response(topic, question)
        
        return None
    
    def get_topic_template_response(self, topic, question):
        """Get template response for a topic"""
        templates = {
            'technology': "Technology is rapidly evolving and transforming how we work, communicate, and solve problems. Modern technological advances include artificial intelligence, machine learning, and digital innovations.",
            'environment': "Environmental issues like climate change require urgent attention. Solutions include renewable energy adoption, sustainable practices, and reduced carbon emissions.",
            'economy': "Economic factors influence global markets, employment, and business growth. Understanding economic principles helps in making informed decisions.",
            'education': "Education plays a crucial role in personal development and societal progress. It provides knowledge, skills, and opportunities for growth."
        }
        
        base_response = templates.get(topic, "This is an important topic that involves multiple factors and considerations.")
        
        if '?' in question:
            return f"Regarding your question about {topic}: {base_response}"
        else:
            return f"About {topic}: {base_response}"
    
    def generate_fallback_response(self, question):
        """Generate fallback response for unknown questions"""
        fallback_responses = [
            "That's an interesting question. Based on general knowledge, this topic involves various factors that need consideration.",
            "From what I understand, this subject has multiple aspects worth exploring further.",
            "This is a complex topic that relates to several areas of knowledge and research.",
            "Based on my training data, this question touches on important concepts that merit detailed analysis."
        ]
        
        return random.choice(fallback_responses)
    
    def get_system_status(self):
        """Get current system status"""
        status = "🤖 **SIMPLE Q&A AI STATUS**\n\n"
        
        if self.is_trained:
            status += "✅ **System is trained and ready**\n\n"
        else:
            status += "⏳ **System ready for training**\n\n"
        
        status += "**📊 Statistics:**\n"
        status += f"• **Total tokens processed:** {self.total_tokens:,}\n"
        status += f"• **Vocabulary size:** {len(self.vocabulary):,} words\n"
        status += f"• **Q&A patterns:** {len(self.qa_patterns)} direct patterns\n"
        status += f"• **Knowledge categories:** {len(self.knowledge_base)}\n"
        status += f"• **Training status:** {'Completed' if self.is_trained else 'Pending'}\n"
        
        status += "\n**🎯 Capabilities:**\n"
        status += "• Answers questions using pattern matching\n"
        status += "• Learns from news articles and data\n"
        status += "• Handles multiple topics and domains\n"
        status += "• Provides fallback responses for unknown queries\n"
        
        return status

# Initialize the AI system
ai_system = SimpleQAAI()

def start_training():
    """Start the training process"""
    try:
        result = ai_system.collect_training_data()
        return result
    except Exception as e:
        return f"❌ Training failed: {str(e)}"

def chat_function(message, history):
    """Handle chat interactions"""
    if not message:
        return history, ""
    
    try:
        response = ai_system.answer_question(message)
        history.append([message, response])
        return history, ""
    except Exception as e:
        error_response = f"Sorry, I encountered an error: {str(e)}"
        history.append([message, error_response])
        return history, ""

def refresh_status():
    """Refresh system status"""
    return ai_system.get_system_status()

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), title="Simple Q&A AI") as app:
    
    gr.HTML("""
    <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
        <h1>🤖 Simple Question Answering AI</h1>
        <p><b>Learn from data and answer questions intelligently</b></p>
        <p>Stable • Fast • Reliable</p>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=3):
            gr.Markdown("### 💬 Chat with AI")
            
            chatbot = gr.Chatbot(
                value=[],
                label="AI Assistant",
                height=400
            )
            
            msg_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask me anything: What is AI? How does technology work?",
                lines=2
            )
            
            with gr.Row():
                send_btn = gr.Button("💬 Send", variant="primary")
                clear_btn = gr.Button("🗑️ Clear", variant="secondary")
        
        with gr.Column(scale=1):
            gr.Markdown("### ⚙️ System Control")
            
            status_box = gr.Textbox(
                label="System Status",
                value=ai_system.get_system_status(),
                lines=16,
                interactive=False
            )
            
            train_btn = gr.Button("🚀 Start Training", variant="primary")
            refresh_btn = gr.Button("🔄 Refresh Status", variant="secondary")
    
    # Example questions
    gr.Examples(
        examples=[
            "What is artificial intelligence?",
            "How do computers work?",
            "Why is education important?",
            "What is climate change?",
            "How does the internet work?",
            "What is machine learning?"
        ],
        inputs=msg_input,
        label="📝 Try these questions"
    )
    
    # Event handlers
    send_btn.click(
        fn=chat_function,
        inputs=[msg_input, chatbot],
        outputs=[chatbot, msg_input]
    )
    
    msg_input.submit(
        fn=chat_function,
        inputs=[msg_input, chatbot],
        outputs=[chatbot, msg_input]
    )
    
    clear_btn.click(
        fn=lambda: ([], ""),
        outputs=[chatbot, msg_input]
    )
    
    train_btn.click(
        fn=start_training,
        outputs=[status_box]
    )
    
    refresh_btn.click(
        fn=refresh_status,
        outputs=[status_box]
    )

# Launch the app
if __name__ == "__main__":
    app.launch()