import sys
import os
import re
from pathlib import Path
import gradio as gr
import pandas as pd
import torch
import matplotlib.pyplot as plt
from googleapiclient.discovery import build

try:
    from data_preprocessing import Vocabulary, clean_text
    from model_bilstm_attention import BiLSTMAttention
except ImportError:
    sys.path.append(str(Path(__file__).parent))
    from src.data_preprocessing import Vocabulary, clean_text
    from src.model_bilstm_attention import BiLSTMAttention


API_KEY = "AIzaSyAlKTUhY9t3yaJvk0E2goCuLEtcsTOFMBM" 

# Modeller artik ana dizinde (root)
MODELS_DIR = Path(__file__).parent 
DEVICE = torch.device('cpu')

# ==================== LOAD MODEL ====================

def load_trained_model():
    try:
        # Dosyalar artik yan yana
        vocab_path = MODELS_DIR / "vocabulary.pkl"
        model_path = MODELS_DIR / "BiLSTM_Attention_best.pt"
        
        vocab = Vocabulary.load(vocab_path)
        
        checkpoint = torch.load(model_path, map_location=DEVICE)
        
        config = checkpoint['config'].copy()
        config.pop('bidirectional', None)

        model = BiLSTMAttention(**config)
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(DEVICE)
        model.eval()
        
        return model, vocab
    except Exception as e:
        print(f"Error loading model: {e}")
        return None, None

MODEL, VOCAB = load_trained_model()

def extract_video_id(url):
    patterns = [
        r"(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([^&]+)",
        r"(?:https?://)?(?:www\.)?youtube\.com/embed/([^?]+)",
        r"(?:https?://)?(?:www\.)?youtube\.com/v/([^?]+)",
        r"(?:https?://)?youtu\.be/([^?]+)"
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

def get_video_title(video_id):
    try:
        youtube = build("youtube", "v3", developerKey=API_KEY)
        request = youtube.videos().list(part="snippet", id=video_id)
        response = request.execute()
        if "items" in response and len(response["items"]) > 0:
            return response["items"][0]["snippet"]["title"]
        return "Unknown Video"
    except:
        return "Unknown Video"

def get_comments(video_id, max_results=100):
    try:
        youtube = build("youtube", "v3", developerKey=API_KEY)
        comments = []
        next_page_token = None
        while len(comments) < max_results:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=min(100, max_results - len(comments)),
                textFormat="plainText",
                pageToken=next_page_token
            )
            response = request.execute()
            for item in response.get("items", []):
                comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
                comments.append(comment)
            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break
        return comments[:max_results], None
    except Exception as e:
        return [], str(e)

def predict_sentiment(text, model, vocab, max_length=128):
    if model is None or vocab is None:
        return "neutral", [0.33, 0.34, 0.33]
    
    cleaned = clean_text(text)
    if not cleaned:
        return "neutral", [0.33, 0.34, 0.33]
    
    encoded = vocab.encode(cleaned)
    encoded = encoded + [0] * (max_length - len(encoded)) if len(encoded) < max_length else encoded[:max_length]
    
    input_ids = torch.tensor([encoded], dtype=torch.long).to(DEVICE)
    length = torch.tensor([min(len(vocab.encode(cleaned)), max_length)], dtype=torch.long).to(DEVICE)
    
    with torch.no_grad():
        logits = model(input_ids, length)
        probabilities = torch.softmax(logits, dim=1).cpu().numpy()[0]
    
    label_map = {0: 'negative', 1: 'neutral', 2: 'positive'}
    return label_map[probabilities.argmax()], probabilities.tolist()

def analyze_comments(comments):
    results = []
    sentiment_counts = {"positive": 0, "neutral": 0, "negative": 0}
    
    for comment in comments:
        sentiment, probs = predict_sentiment(comment, MODEL, VOCAB)
        sentiment_counts[sentiment] += 1
        results.append({
            "Comment": comment[:100] + "..." if len(comment) > 100 else comment,
            "Sentiment": sentiment.capitalize(),
            "Confidence": f"{max(probs):.2%}"
        })
    return results, sentiment_counts

def plot_pie_chart(sentiment_counts, video_title):
    fig, ax = plt.subplots(figsize=(10, 7))
    colors = {'positive': '#4CAF50', 'neutral': '#FFC107', 'negative': '#F44336'}
    chart_colors = [colors[label] for label in sentiment_counts.keys()]
    
    wedges, texts, autotexts = ax.pie(
        sentiment_counts.values(),
        labels=[label.capitalize() for label in sentiment_counts.keys()],
        autopct='%1.1f%%',
        startangle=140,
        colors=chart_colors,
        textprops={'fontsize': 12, 'weight': 'bold'}
    )
    for autotext in autotexts:
        autotext.set_color('white')
    
    ax.set_title(f"Sentiment Analysis\n{video_title[:60]}", fontsize=14, fontweight='bold', pad=20)
    return fig

def get_overall_sentiment(sentiment_counts):
    total = sum(sentiment_counts.values())
    if total == 0: return "No comments analyzed"
    dominant = max(sentiment_counts, key=sentiment_counts.get)
    return f"Overall Sentiment: {dominant.upper()} ({sentiment_counts[dominant]/total*100:.1f}%)"

def youtube_sentiment_analysis(url, num_of_comments):
    if MODEL is None or VOCAB is None:
        return "Model loading failed. Check logs.", None, None
    
    video_id = extract_video_id(url)
    if not video_id: return "Invalid URL", None, None
    
    video_title = get_video_title(video_id)
    comments, error = get_comments(video_id, int(num_of_comments))
    
    if error: return f"Error: {error}", None, None
    if not comments: return "No comments found", None, None
    
    results, sentiment_counts = analyze_comments(comments)
    chart = plot_pie_chart(sentiment_counts, video_title)
    summary = get_overall_sentiment(sentiment_counts)
    
    return summary, chart, pd.DataFrame(results).head(10)

example_urls = [
    ["https://www.youtube.com/watch?v=0e9WuB0Ua98"],
    ["https://www.youtube.com/watch?v=3JZ_D3ELwOQ"],
]

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.HTML("<h1 style='text-align: center;'>YouTube Sentiment Analysis</h1>")
    gr.HTML("<p style='text-align: center;'>Custom Bi-LSTM + Attention Model | Umut ABALI</p>")
    
    with gr.Row():
        with gr.Column():
            youtube_url = gr.Textbox(label="YouTube Video URL")
            num_comments = gr.Slider(10, 500, step=10, value=100, label="Max Comments")
            submit_btn = gr.Button("Analyze", variant="primary")
            gr.Examples(example_urls, inputs=youtube_url)
        
        with gr.Column():
            output_summary = gr.Textbox(label="Summary")
            output_chart = gr.Plot(label="Chart")
            output_table = gr.Dataframe(label="Top 10 Comments")
    
    submit_btn.click(
        youtube_sentiment_analysis,
        inputs=[youtube_url, num_comments],
        outputs=[output_summary, output_chart, output_table]
    )

if __name__ == "__main__":
    demo.launch(share=False)