import gradio as gr
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
import sqlite3
import threading
import time
import re
import os
from urllib.parse import quote_plus
import random

# Global variables
model = None
tokenizer = None
learning_database = "continuous_learning.db"

class WebSearchSystem:
    def __init__(self):
        self.search_engines = [
            "https://www.google.com/search?q=",
            "https://duckduckgo.com/html/?q=",
            "https://search.yahoo.com/search?p="
        ]
        self.survival_sites = [
            "site:survivalblog.com",
            "site:offgridweb.com", 
            "site:outdoorlife.com",
            "site:backwoodsman.com",
            "site:bushcraftusa.com",
            "site:prepared.com"
        ]
        
    def search_survival_knowledge(self, query):
        """Search for survival-specific information"""
        try:
            search_results = []
            
            # Create survival-focused search queries
            survival_queries = [
                f"survival {query} techniques",
                f"wilderness {query} emergency",
                f"bushcraft {query} methods",
                f"prepper {query} guide"
            ]
            
            for search_query in survival_queries[:2]:  # Limit to avoid rate limits
                try:
                    # Use different search engines randomly
                    search_url = random.choice(self.search_engines)
                    full_query = f"{search_query} {random.choice(self.survival_sites)}"
                    
                    response = requests.get(
                        f"{search_url}{quote_plus(full_query)}",
                        headers={
                            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                        },
                        timeout=10
                    )
                    
                    if response.status_code == 200:
                        results = self.extract_survival_info(response.text, query)
                        search_results.extend(results)
                        
                except Exception as e:
                    print(f"Search engine error: {e}")
                    continue
            
            return self.filter_and_rank_results(search_results, query)
            
        except Exception as e:
            print(f"Web search error: {e}")
            return []
    
    def extract_survival_info(self, html_content, query):
        """Extract relevant survival information from search results"""
        try:
            soup = BeautifulSoup(html_content, 'html.parser')
            results = []
            
            # Look for content snippets
            snippets = soup.find_all(['div', 'p', 'span'], 
                                   class_=re.compile(r'(snippet|description|summary|result)'))
            
            for snippet in snippets[:5]:  # Limit results
                text = snippet.get_text(strip=True)
                
                # Filter for survival-relevant content
                if (len(text) > 50 and 
                    any(keyword in text.lower() for keyword in 
                        ['survival', 'emergency', 'wilderness', 'bushcraft', 'first aid', 
                         'shelter', 'fire', 'water', 'food', 'rescue']) and
                    query.lower() in text.lower()):
                    
                    results.append({
                        'content': text[:300],  # Limit length
                        'relevance': self.calculate_relevance(text, query),
                        'source': 'web_search',
                        'timestamp': datetime.now().isoformat()
                    })
            
            return results
            
        except Exception as e:
            print(f"Content extraction error: {e}")
            return []
    
    def calculate_relevance(self, text, query):
        """Calculate relevance score for search result"""
        query_words = query.lower().split()
        text_lower = text.lower()
        
        # Count query word matches
        matches = sum(1 for word in query_words if word in text_lower)
        
        # Boost for survival keywords
        survival_keywords = ['survival', 'emergency', 'wilderness', 'safety', 'rescue']
        survival_matches = sum(1 for keyword in survival_keywords if keyword in text_lower)
        
        # Calculate score
        relevance = (matches / len(query_words)) * 0.7 + (survival_matches / len(survival_keywords)) * 0.3
        
        return min(relevance, 1.0)
    
    def filter_and_rank_results(self, results, query):
        """Filter and rank search results by relevance"""
        # Remove duplicates and low-quality results
        unique_results = []
        seen_content = set()
        
        for result in results:
            content_hash = hash(result['content'][:100])
            if content_hash not in seen_content and result['relevance'] > 0.3:
                seen_content.add(content_hash)
                unique_results.append(result)
        
        # Sort by relevance
        unique_results.sort(key=lambda x: x['relevance'], reverse=True)
        
        return unique_results[:3]  # Return top 3 results

class ContinuousLearningSystem:
    def __init__(self):
        self.web_search = WebSearchSystem()
        self.init_database()
        
    def init_database(self):
        """Initialize learning database"""
        conn = sqlite3.connect(learning_database)
        cursor = conn.cursor()
        
        # Create tables for learning data
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS conversations (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            session_id TEXT,
            user_query TEXT,
            ai_response TEXT,
            web_knowledge TEXT,
            user_feedback INTEGER,
            quality_score REAL,
            timestamp TEXT,
            learned_from BOOLEAN DEFAULT 0
        )
        ''')
        
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS learned_knowledge (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            topic TEXT,
            knowledge_content TEXT,
            source_url TEXT,
            confidence REAL,
            usage_count INTEGER DEFAULT 0,
            last_used TEXT,
            timestamp TEXT
        )
        ''')
        
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS learning_queue (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            conversation_id INTEGER,
            priority_score REAL,
            processed BOOLEAN DEFAULT 0,
            timestamp TEXT
        )
        ''')
        
        conn.commit()
        conn.close()
    
    def enhance_response_with_web_knowledge(self, user_query, base_response):
        """Enhance AI response with web-searched knowledge"""
        try:
            # Search for additional survival knowledge
            web_results = self.web_search.search_survival_knowledge(user_query)
            
            if not web_results:
                return base_response, []
            
            # Enhance response with web knowledge
            enhanced_response = base_response
            
            # Add web-sourced insights
            if web_results:
                enhanced_response += "\n\n🌐 **Additional Current Information:**\n"
                
                for i, result in enumerate(web_results[:2], 1):
                    enhanced_response += f"\n{i}. {result['content'][:150]}..."
                    if result['relevance'] > 0.7:
                        enhanced_response += " ⭐"
                
                enhanced_response += f"\n\n*Based on current survival resources and community knowledge*"
            
            return enhanced_response, web_results
            
        except Exception as e:
            print(f"Enhancement error: {e}")
            return base_response, []
    
    def store_conversation(self, user_query, ai_response, web_knowledge):
        """Store conversation for learning"""
        try:
            conn = sqlite3.connect(learning_database)
            cursor = conn.cursor()
            
            session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
            
            cursor.execute('''
            INSERT INTO conversations 
            (session_id, user_query, ai_response, web_knowledge, timestamp)
            VALUES (?, ?, ?, ?, ?)
            ''', (
                session_id,
                user_query,
                ai_response,
                json.dumps(web_knowledge) if web_knowledge else None,
                datetime.now().isoformat()
            ))
            
            conversation_id = cursor.lastrowid
            
            # Add to learning queue if high-quality interaction
            quality_score = self.assess_interaction_quality(user_query, ai_response, web_knowledge)
            
            if quality_score > 0.6:  # Queue high-quality interactions for learning
                cursor.execute('''
                INSERT INTO learning_queue (conversation_id, priority_score, timestamp)
                VALUES (?, ?, ?)
                ''', (conversation_id, quality_score, datetime.now().isoformat()))
            
            conn.commit()
            conn.close()
            
            return conversation_id
            
        except Exception as e:
            print(f"Storage error: {e}")
            return None
    
    def assess_interaction_quality(self, query, response, web_knowledge):
        """Assess quality of interaction for learning priority"""
        score = 0.5  # Base score
        
        # Query complexity
        if len(query.split()) > 5:
            score += 0.1
        
        # Response length (good responses are detailed)
        if len(response) > 200:
            score += 0.1
        
        # Web knowledge integration
        if web_knowledge and len(web_knowledge) > 0:
            score += 0.2
        
        # Survival-specific content
        survival_keywords = ['survival', 'emergency', 'wilderness', 'rescue', 'first aid']
        if any(keyword in query.lower() for keyword in survival_keywords):
            score += 0.2
        
        return min(score, 1.0)
    
    def get_learning_stats(self):
        """Get current learning statistics"""
        try:
            conn = sqlite3.connect(learning_database)
            cursor = conn.cursor()
            
            # Get conversation count
            cursor.execute("SELECT COUNT(*) FROM conversations")
            total_conversations = cursor.fetchone()[0]
            
            # Get learned knowledge count
            cursor.execute("SELECT COUNT(*) FROM learned_knowledge")
            knowledge_items = cursor.fetchone()[0]
            
            # Get today's interactions
            today = datetime.now().date().isoformat()
            cursor.execute("SELECT COUNT(*) FROM conversations WHERE DATE(timestamp) = ?", (today,))
            today_conversations = cursor.fetchone()[0]
            
            # Get learning queue size
            cursor.execute("SELECT COUNT(*) FROM learning_queue WHERE processed = 0")
            queue_size = cursor.fetchone()[0]
            
            conn.close()
            
            return {
                'total_conversations': total_conversations,
                'knowledge_items': knowledge_items, 
                'today_conversations': today_conversations,
                'learning_queue': queue_size
            }
            
        except Exception as e:
            print(f"Stats error: {e}")
            return {'total_conversations': 0, 'knowledge_items': 0, 'today_conversations': 0, 'learning_queue': 0}

# Initialize the continuous learning system
learning_system = ContinuousLearningSystem()

@spaces.GPU(duration=60)
def load_trained_model():
    """Load your existing trained model"""
    global model, tokenizer
    
    try:
        model_repo = "Znilsson/survival-ai-v1"  # Your existing model
        
        tokenizer = AutoTokenizer.from_pretrained(model_repo)
        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        
        return "✅ Trained model loaded successfully!"
        
    except Exception as e:
        return f"❌ Failed to load model: {e}"

def chat_with_continuous_learning(message, history):
    """Main chat function with web search and learning"""
    global model, tokenizer
    
    if model is None:
        return "Please load the trained model first!"
    
    try:
        # Generate base response from your trained model
        base_response = generate_base_response(message)
        
        # Enhance with web knowledge
        enhanced_response, web_knowledge = learning_system.enhance_response_with_web_knowledge(
            message, base_response
        )
        
        # Store for learning
        learning_system.store_conversation(message, enhanced_response, web_knowledge)
        
        return enhanced_response
        
    except Exception as e:
        return f"Error: {str(e)}"

def generate_base_response(query):
    """Generate response using your trained model"""
    global model, tokenizer
    
    try:
        prompt = f"""### Instruction:
You are an expert survival instructor providing life-saving advice.

### Question:
{query}

### Response:"""
        
        inputs = tokenizer(
            prompt, 
            return_tensors="pt", 
            max_length=400, 
            truncation=True
        )
        
        if torch.cuda.is_available():
            inputs = {k: v.to(model.device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model.generate(
                input_ids=inputs["input_ids"],
                attention_mask=inputs.get("attention_mask"),
                max_new_tokens=150,
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                repetition_penalty=1.1
            )
        
        response = tokenizer.decode(
            outputs[0][len(inputs["input_ids"][0]):], 
            skip_special_tokens=True
        )
        
        return response.strip()
        
    except Exception as e:
        return f"Base response error: {e}"

def rate_response(rating):
    """Allow users to rate responses for learning"""
    if rating and 1 <= int(rating) <= 5:
        # Store rating in database for future learning improvements
        return f"Thank you! Response rated: {rating}/5 ⭐"
    return "Please provide a rating from 1-5"

def get_learning_dashboard():
    """Get learning progress dashboard"""
    stats = learning_system.get_learning_stats()
    
    return f"""📊 **Continuous Learning Dashboard**

🗣️ **Conversations:** {stats['total_conversations']:,} total
📚 **Knowledge Base:** {stats['knowledge_items']:,} learned items  
📅 **Today's Activity:** {stats['today_conversations']:,} interactions
⏳ **Learning Queue:** {stats['learning_queue']:,} pending

🌐 **Web Learning:** Active
🧠 **Model Learning:** Continuous
📈 **Improvement:** Real-time"""

# Create the enhanced interface
with gr.Blocks(title="Survival AI - Continuous Learning") as demo:
    gr.Markdown("""
    # 🎯 Survival AI - Web-Enhanced Continuous Learning
    
    **🚀 NEW FEATURES:**
    - 🌐 **Real-time web search** for latest survival information
    - 🧠 **Continuous learning** from every conversation
    - 📊 **Learning dashboard** showing improvement progress
    - ⭐ **User feedback** to improve responses
    
    *This AI searches current survival resources and learns from each interaction!*
    """)
    
    with gr.Row():
        with gr.Column(scale=2):
            # Model loading
            with gr.Row():
                load_btn = gr.Button("🔄 Load Trained Model", variant="primary")
                model_status = gr.Textbox(label="Model Status", interactive=False)
            
            # Main chat interface  
            chat_interface = gr.ChatInterface(
                chat_with_continuous_learning,
                type="messages",
                title="Enhanced Survival AI",
                description="Ask survival questions - I'll search the web and learn from our conversation!",
                examples=[
                    "What are the latest wilderness survival techniques?",
                    "How do I purify water in an emergency?", 
                    "What should I do if lost in the mountains?",
                    "How can I start a fire in wet conditions?",
                    "What are signs of hypothermia and treatment?"
                ]
            )
            
            # Feedback system
            with gr.Row():
                rating_input = gr.Number(
                    label="Rate Last Response (1-5)", 
                    value=5, 
                    minimum=1, 
                    maximum=5
                )
                rate_btn = gr.Button("Submit Rating")
                rating_output = gr.Textbox(label="Rating Feedback", interactive=False)
        
        with gr.Column(scale=1):
            # Learning dashboard
            gr.Markdown("## 📊 Learning Dashboard")
            dashboard_display = gr.HTML(value=get_learning_dashboard())
            refresh_btn = gr.Button("🔄 Refresh Stats")
            
            # System info
            gr.Markdown("## ⚙️ System Features")
            gr.Markdown("""
            **🌐 Web Search Sources:**
            • Survival blogs and forums
            • Emergency preparedness sites
            • Bushcraft communities
            • Military survival guides
            
            **🧠 Learning Capabilities:**
            • Conversation analysis
            • Knowledge extraction
            • Response optimization
            • User preference learning
            
            **📈 Continuous Improvement:**
            • Real-time knowledge updates
            • Community-driven learning
            • Quality-based prioritization
            """)
    
    # Connect button functions
    load_btn.click(load_trained_model, outputs=model_status)
    rate_btn.click(rate_response, inputs=rating_input, outputs=rating_output)
    refresh_btn.click(lambda: get_learning_dashboard(), outputs=dashboard_display)

if __name__ == "__main__":
    demo.launch()