Spaces:

lanretto
/

shakespeare-authenticator

Running

File size: 12,539 Bytes

# ==============================================================================
# Shakespeare Authenticator - Standalone Gradio Dashboard
# ==============================================================================

import gradio as gr
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import time
import os

print("🚀 Starting Shakespeare Authenticator...")
print(f"📦 PyTorch version: {torch.__version__}")
print(f"🔧 CUDA available: {torch.cuda.is_available()}")

# Configuration
MODEL_NAME = "lanretto/shakespeare-authenticator"  # Your model on HF Hub
TITLE = "🎭 Shakespeare Authenticator"
DESCRIPTION = """
Distinguish authentic Shakespearean text from modern imitations using AI. 
This model analyzes linguistic patterns, vocabulary, and stylistic elements 
to determine if text was written by William Shakespeare or is a modern creation.
"""

# Global variables for model caching
model = None
tokenizer = None
device = None

def load_model():
    """Load model and tokenizer with caching and error handling"""
    global model, tokenizer, device
    
    if model is not None:
        return model, tokenizer, device
        
    print("🔄 Loading model from Hugging Face Hub...")
    start_time = time.time()
    
    try:
        # Load model with explicit trust for remote code
        model = AutoModelForSequenceClassification.from_pretrained(
            MODEL_NAME,
            trust_remote_code=True
        )
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        
        # Set to evaluation mode
        model.eval()
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        
        load_time = time.time() - start_time
        print(f"✅ Model loaded successfully in {load_time:.2f}s")
        print(f"📊 Model device: {device}")
        print(f"🏷️ Model labels: {model.config.id2label}")
        
        return model, tokenizer, device
        
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        # Fallback to CPU if CUDA fails
        try:
            model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
            model.eval()
            device = torch.device('cpu')
            model = model.to(device)
            print(f"✅ Model loaded on CPU as fallback")
            return model, tokenizer, device
        except Exception as e2:
            print(f"❌ Complete failure loading model: {e2}")
            raise e2

# Pre-load model at startup
try:
    model, tokenizer, device = load_model()
    print("🎉 Model pre-loaded and ready for inference!")
except Exception as e:
    print(f"⚠️ Model loading failed: {e}")

def classify_shakespeare(text):
    """
    Classify whether text is authentic Shakespeare or modern imitation
    """
    if not text.strip():
        return {
            "error": "Please enter some text to analyze!",
            "prediction": None,
            "confidence": None,
            "detailed_breakdown": None
        }
    
    # Ensure model is loaded
    if model is None:
        try:
            load_model()
        except:
            return {
                "error": "Model failed to load. Please refresh the page.",
                "prediction": None,
                "confidence": None,
                "detailed_breakdown": None
            }
    
    try:
        # Tokenize the input text
        inputs = tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=512
        )

        # Move to device
        inputs = {k: v.to(device) for k, v in inputs.items()}

        # Make prediction
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            probabilities = torch.softmax(logits, dim=1)
            prediction = torch.argmax(logits, dim=1).item()
            confidence = probabilities[0][prediction].item()

        # Map prediction to labels (using your model's label mapping)
        labels = {0: "Modern Creation", 1: "Authentic Shakespeare"}
        result = labels[prediction]

        # Confidence scores
        confidence_pct = confidence * 100
        modern_confidence = probabilities[0][0].item() * 100
        shakespeare_confidence = probabilities[0][1].item() * 100

        return {
            "error": None,
            "prediction": result,
            "confidence": f"{confidence_pct:.1f}%",
            "detailed_breakdown": {
                "Modern Creation": f"{modern_confidence:.1f}%",
                "Authentic Shakespeare": f"{shakespeare_confidence:.1f}%"
            },
            "raw_scores": {
                "modern": modern_confidence,
                "shakespeare": shakespeare_confidence
            }
        }
        
    except Exception as e:
        return {
            "error": f"Prediction error: {str(e)}",
            "prediction": None,
            "confidence": None,
            "detailed_breakdown": None
        }

def create_visual_output(result):
    """Create beautiful visual output for the prediction"""
    if result["error"]:
        return f"""
        <div style="text-align: center; padding: 20px; color: #d63031;">
            <h3>❌ Error</h3>
            <p>{result['error']}</p>
        </div>
        """
    
    # Determine emoji and color based on prediction
    if "Authentic" in result["prediction"]:
        emoji = "✅"
        color = "#00b894"
        explanation = "This text exhibits characteristics of authentic Shakespearean writing."
    else:
        emoji = "🔄" 
        color = "#e17055"
        explanation = "This text appears to be a modern creation or imitation."
    
    # Create confidence bar visualization
    modern_score = result["raw_scores"]["modern"]
    shakespeare_score = result["raw_scores"]["shakespeare"]
    
    confidence_bars = f"""
    <div style="margin: 20px 0;">
        <div style="display: flex; justify-content: space-between; margin-bottom: 5px;">
            <span style="font-weight: 500;">Modern Creation</span>
            <span style="font-weight: 600;">{modern_score:.1f}%</span>
        </div>
        <div style="background: #e0e0e0; border-radius: 10px; height: 20px; overflow: hidden;">
            <div style="background: #ff6b6b; width: {modern_score}%; height: 100%; border-radius: 10px; transition: width 0.5s ease;"></div>
        </div>
        
        <div style="display: flex; justify-content: space-between; margin: 15px 0 5px 0;">
            <span style="font-weight: 500;">Authentic Shakespeare</span>
            <span style="font-weight: 600;">{shakespeare_score:.1f}%</span>
        </div>
        <div style="background: #e0e0e0; border-radius: 10px; height: 20px; overflow: hidden;">
            <div style="background: #4ecdc4; width: {shakespeare_score}%; height: 100%; border-radius: 10px; transition: width 0.5s ease;"></div>
        </div>
    </div>
    """
    
    output = f"""
    <div style="padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white;">
        <h2 style="margin: 0; text-align: center;">{emoji} Analysis Results</h2>
    </div>
    
    <div style="padding: 20px;">
        <div style="text-align: center; margin-bottom: 20px;">
            <h3 style="color: {color}; margin: 0;">{result['prediction']}</h3>
            <p style="font-size: 1.2em; font-weight: bold; margin: 10px 0;">Overall Confidence: {result['confidence']}</p>
        </div>
        
        <p style="text-align: center; color: #666; font-style: italic;">{explanation}</p>
        
        <h4>Confidence Breakdown:</h4>
        {confidence_bars}
        
        <div style="margin-top: 20px; padding-top: 20px; border-top: 1px solid #e0e0e0; text-align: center; color: #888; font-size: 0.9em;">
            Powered by fine-tuned BERT • 
            <a href="https://huggingface.co/{MODEL_NAME}" target="_blank" style="color: #667eea;">View Model on Hugging Face</a>
        </div>
    </div>
    """
    
    return output

def predict_shakespeare(text):
    """
    Main prediction function for Gradio interface
    """
    start_time = time.time()
    result = classify_shakespeare(text)
    processing_time = time.time() - start_time
    
    print(f"🔍 Processed text ({len(text)} chars) in {processing_time:.2f}s")
    
    return create_visual_output(result)

# Example texts
examples = [
    ["To be or not to be, that is the question"],
    ["Friends, Romans, countrymen, lend me your ears"],
    ["What light through yonder window breaks?"],
    ["Shall I compare thee to a summer's day?"],
    ["The meeting is scheduled for 2 PM in the conference room"],
    ["I think therefore I am - modern philosophical statement"],
    ["Now is the winter of our discontent made glorious summer by this sun of York"],
    ["O Romeo, Romeo, wherefore art thou Romeo?"]
]

# Create the Gradio interface
with gr.Blocks(
    theme=gr.themes.Soft(),
    title=TITLE,
    css="""
    .gradio-container {
        max-width: 800px !important;
        margin: 0 auto !important;
    }
    .example-text {
        font-style: italic;
        color: #666;
    }
    footer {
        display: none !important;
    }
    """
) as demo:
    
    # Header section
    gr.Markdown(f"""
    # {TITLE}
    
    {DESCRIPTION}
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            # Input section
            text_input = gr.Textbox(
                label="📝 Enter Text to Analyze",
                placeholder="Paste Shakespearean text or modern writing here...",
                lines=4,
                max_lines=6,
                elem_id="text-input"
            )
            
            with gr.Row():
                submit_btn = gr.Button("🔍 Analyze Text", variant="primary", scale=2)
                clear_btn = gr.Button("🗑️ Clear", variant="secondary", scale=1)
            
            # Examples
            gr.Examples(
                examples=examples,
                inputs=text_input,
                label="💡 Try these examples:",
                examples_per_page=4
            )
            
        with gr.Column(scale=1):
            # Output section
            output = gr.HTML(
                label="📊 Analysis Results",
                value="""<div style='text-align: center; color: #666; padding: 40px; border: 2px dashed #ddd; border-radius: 10px;'>
                    <h3>👆 Enter text to analyze</h3>
                    <p>Paste any text above and click "Analyze Text" to see if it's authentic Shakespeare!</p>
                </div>"""
            )
    
    # Model information
    with gr.Accordion("ℹ️ About This Model", open=False):
        gr.Markdown(f"""
        **Model Details**
        - **Model**: `{MODEL_NAME}` on Hugging Face Hub
        - **Architecture**: BERT-base fine-tuned on Shakespearean text classification
        - **Training Data**: 400,000+ samples of Shakespeare vs modern dialogue
        - **Task**: Binary text classification (Authentic Shakespeare vs Modern Creation)
        
        **How It Works**
        - Analyzes linguistic patterns, vocabulary, and stylistic elements
        - Uses transformer architecture to understand context and syntax
        - Returns confidence scores for both classification categories
        
        **Best Practices**
        - Works best with complete sentences or passages
        - More accurate with longer text samples
        - Designed for Early Modern English vs Contemporary English distinction
        """)
    
    # Event handlers
    submit_btn.click(
        fn=predict_shakespeare,
        inputs=text_input,
        outputs=output
    )
    
    text_input.submit(
        fn=predict_shakespeare,
        inputs=text_input,
        outputs=output
    )
    
    clear_btn.click(
        fn=lambda: ("", """<div style='text-align: center; color: #666; padding: 40px; border: 2px dashed #ddd; border-radius: 10px;'>
                    <h3>👆 Enter text to analyze</h3>
                    <p>Paste any text above and click "Analyze Text" to see if it's authentic Shakespeare!</p>
                </div>"""),
        inputs=[],
        outputs=[text_input, output]
    )

# Launch the application - SIMPLIFIED FOR SPACES
if __name__ == "__main__":
    demo.launch()