Spaces:

lanretto
/

shakespeare-authenticator

Sleeping

App Files Files Community

shakespeare-authenticator / app.py

lanretto

Update app.py

5bb3c1a verified about 2 months ago

raw

history blame contribute delete

12.5 kB

	# ==============================================================================
	# Shakespeare Authenticator - Standalone Gradio Dashboard
	# ==============================================================================

	import gradio as gr
	import torch
	import numpy as np
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import time
	import os

	print("🚀 Starting Shakespeare Authenticator...")
	print(f"📦 PyTorch version: {torch.__version__}")
	print(f"🔧 CUDA available: {torch.cuda.is_available()}")

	# Configuration
	MODEL_NAME = "lanretto/shakespeare-authenticator" # Your model on HF Hub
	TITLE = "🎭 Shakespeare Authenticator"
	DESCRIPTION = """
	Distinguish authentic Shakespearean text from modern imitations using AI.
	This model analyzes linguistic patterns, vocabulary, and stylistic elements
	to determine if text was written by William Shakespeare or is a modern creation.
	"""

	# Global variables for model caching
	model = None
	tokenizer = None
	device = None

	def load_model():
	"""Load model and tokenizer with caching and error handling"""
	global model, tokenizer, device

	if model is not None:
	return model, tokenizer, device

	print("🔄 Loading model from Hugging Face Hub...")
	start_time = time.time()

	try:
	# Load model with explicit trust for remote code
	model = AutoModelForSequenceClassification.from_pretrained(
	MODEL_NAME,
	trust_remote_code=True
	)
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	# Set to evaluation mode
	model.eval()
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model = model.to(device)

	load_time = time.time() - start_time
	print(f"✅ Model loaded successfully in {load_time:.2f}s")
	print(f"📊 Model device: {device}")
	print(f"🏷️ Model labels: {model.config.id2label}")

	return model, tokenizer, device

	except Exception as e:
	print(f"❌ Error loading model: {e}")
	# Fallback to CPU if CUDA fails
	try:
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model.eval()
	device = torch.device('cpu')
	model = model.to(device)
	print(f"✅ Model loaded on CPU as fallback")
	return model, tokenizer, device
	except Exception as e2:
	print(f"❌ Complete failure loading model: {e2}")
	raise e2

	# Pre-load model at startup
	try:
	model, tokenizer, device = load_model()
	print("🎉 Model pre-loaded and ready for inference!")
	except Exception as e:
	print(f"⚠️ Model loading failed: {e}")

	def classify_shakespeare(text):
	"""
	Classify whether text is authentic Shakespeare or modern imitation
	"""
	if not text.strip():
	return {
	"error": "Please enter some text to analyze!",
	"prediction": None,
	"confidence": None,
	"detailed_breakdown": None
	}

	# Ensure model is loaded
	if model is None:
	try:
	load_model()
	except:
	return {
	"error": "Model failed to load. Please refresh the page.",
	"prediction": None,
	"confidence": None,
	"detailed_breakdown": None
	}

	try:
	# Tokenize the input text
	inputs = tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	padding=True,
	max_length=512
	)

	# Move to device
	inputs = {k: v.to(device) for k, v in inputs.items()}

	# Make prediction
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	probabilities = torch.softmax(logits, dim=1)
	prediction = torch.argmax(logits, dim=1).item()
	confidence = probabilities[0][prediction].item()

	# Map prediction to labels (using your model's label mapping)
	labels = {0: "Modern Creation", 1: "Authentic Shakespeare"}
	result = labels[prediction]

	# Confidence scores
	confidence_pct = confidence * 100
	modern_confidence = probabilities[0][0].item() * 100
	shakespeare_confidence = probabilities[0][1].item() * 100

	return {
	"error": None,
	"prediction": result,
	"confidence": f"{confidence_pct:.1f}%",
	"detailed_breakdown": {
	"Modern Creation": f"{modern_confidence:.1f}%",
	"Authentic Shakespeare": f"{shakespeare_confidence:.1f}%"
	},
	"raw_scores": {
	"modern": modern_confidence,
	"shakespeare": shakespeare_confidence
	}
	}

	except Exception as e:
	return {
	"error": f"Prediction error: {str(e)}",
	"prediction": None,
	"confidence": None,
	"detailed_breakdown": None
	}

	def create_visual_output(result):
	"""Create beautiful visual output for the prediction"""
	if result["error"]:
	return f"""
	<div style="text-align: center; padding: 20px; color: #d63031;">
	<h3>❌ Error</h3>
	<p>{result['error']}</p>
	</div>
	"""

	# Determine emoji and color based on prediction
	if "Authentic" in result["prediction"]:
	emoji = "✅"
	color = "#00b894"
	explanation = "This text exhibits characteristics of authentic Shakespearean writing."
	else:
	emoji = "🔄"
	color = "#e17055"
	explanation = "This text appears to be a modern creation or imitation."

	# Create confidence bar visualization
	modern_score = result["raw_scores"]["modern"]
	shakespeare_score = result["raw_scores"]["shakespeare"]

	confidence_bars = f"""
	<div style="margin: 20px 0;">
	<div style="display: flex; justify-content: space-between; margin-bottom: 5px;">
	<span style="font-weight: 500;">Modern Creation</span>
	<span style="font-weight: 600;">{modern_score:.1f}%</span>
	</div>
	<div style="background: #e0e0e0; border-radius: 10px; height: 20px; overflow: hidden;">
	<div style="background: #ff6b6b; width: {modern_score}%; height: 100%; border-radius: 10px; transition: width 0.5s ease;"></div>
	</div>

	<div style="display: flex; justify-content: space-between; margin: 15px 0 5px 0;">
	<span style="font-weight: 500;">Authentic Shakespeare</span>
	<span style="font-weight: 600;">{shakespeare_score:.1f}%</span>
	</div>
	<div style="background: #e0e0e0; border-radius: 10px; height: 20px; overflow: hidden;">
	<div style="background: #4ecdc4; width: {shakespeare_score}%; height: 100%; border-radius: 10px; transition: width 0.5s ease;"></div>
	</div>
	</div>
	"""

	output = f"""
	<div style="padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white;">
	<h2 style="margin: 0; text-align: center;">{emoji} Analysis Results</h2>
	</div>

	<div style="padding: 20px;">
	<div style="text-align: center; margin-bottom: 20px;">
	<h3 style="color: {color}; margin: 0;">{result['prediction']}</h3>
	<p style="font-size: 1.2em; font-weight: bold; margin: 10px 0;">Overall Confidence: {result['confidence']}</p>
	</div>

	<p style="text-align: center; color: #666; font-style: italic;">{explanation}</p>

	<h4>Confidence Breakdown:</h4>
	{confidence_bars}

	<div style="margin-top: 20px; padding-top: 20px; border-top: 1px solid #e0e0e0; text-align: center; color: #888; font-size: 0.9em;">
	Powered by fine-tuned BERT •
	<a href="https://huggingface.co/{MODEL_NAME}" target="_blank" style="color: #667eea;">View Model on Hugging Face</a>
	</div>
	</div>
	"""

	return output

	def predict_shakespeare(text):
	"""
	Main prediction function for Gradio interface
	"""
	start_time = time.time()
	result = classify_shakespeare(text)
	processing_time = time.time() - start_time

	print(f"🔍 Processed text ({len(text)} chars) in {processing_time:.2f}s")

	return create_visual_output(result)

	# Example texts
	examples = [
	["To be or not to be, that is the question"],
	["Friends, Romans, countrymen, lend me your ears"],
	["What light through yonder window breaks?"],
	["Shall I compare thee to a summer's day?"],
	["The meeting is scheduled for 2 PM in the conference room"],
	["I think therefore I am - modern philosophical statement"],
	["Now is the winter of our discontent made glorious summer by this sun of York"],
	["O Romeo, Romeo, wherefore art thou Romeo?"]
	]

	# Create the Gradio interface
	with gr.Blocks(
	theme=gr.themes.Soft(),
	title=TITLE,
	css="""
	.gradio-container {
	max-width: 800px !important;
	margin: 0 auto !important;
	}
	.example-text {
	font-style: italic;
	color: #666;
	}
	footer {
	display: none !important;
	}
	"""
	) as demo:

	# Header section
	gr.Markdown(f"""
	# {TITLE}

	{DESCRIPTION}
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# Input section
	text_input = gr.Textbox(
	label="📝 Enter Text to Analyze",
	placeholder="Paste Shakespearean text or modern writing here...",
	lines=4,
	max_lines=6,
	elem_id="text-input"
	)

	with gr.Row():
	submit_btn = gr.Button("🔍 Analyze Text", variant="primary", scale=2)
	clear_btn = gr.Button("🗑️ Clear", variant="secondary", scale=1)

	# Examples
	gr.Examples(
	examples=examples,
	inputs=text_input,
	label="💡 Try these examples:",
	examples_per_page=4
	)

	with gr.Column(scale=1):
	# Output section
	output = gr.HTML(
	label="📊 Analysis Results",
	value="""<div style='text-align: center; color: #666; padding: 40px; border: 2px dashed #ddd; border-radius: 10px;'>
	<h3>👆 Enter text to analyze</h3>
	<p>Paste any text above and click "Analyze Text" to see if it's authentic Shakespeare!</p>
	</div>"""
	)

	# Model information
	with gr.Accordion("ℹ️ About This Model", open=False):
	gr.Markdown(f"""
	Model Details
	- Model: `{MODEL_NAME}` on Hugging Face Hub
	- Architecture: BERT-base fine-tuned on Shakespearean text classification
	- Training Data: 400,000+ samples of Shakespeare vs modern dialogue
	- Task: Binary text classification (Authentic Shakespeare vs Modern Creation)

	How It Works
	- Analyzes linguistic patterns, vocabulary, and stylistic elements
	- Uses transformer architecture to understand context and syntax
	- Returns confidence scores for both classification categories

	Best Practices
	- Works best with complete sentences or passages
	- More accurate with longer text samples
	- Designed for Early Modern English vs Contemporary English distinction
	""")

	# Event handlers
	submit_btn.click(
	fn=predict_shakespeare,
	inputs=text_input,
	outputs=output
	)

	text_input.submit(
	fn=predict_shakespeare,
	inputs=text_input,
	outputs=output
	)

	clear_btn.click(
	fn=lambda: ("", """<div style='text-align: center; color: #666; padding: 40px; border: 2px dashed #ddd; border-radius: 10px;'>
	<h3>👆 Enter text to analyze</h3>
	<p>Paste any text above and click "Analyze Text" to see if it's authentic Shakespeare!</p>
	</div>"""),
	inputs=[],
	outputs=[text_input, output]
	)

	# Launch the application - SIMPLIFIED FOR SPACES
	if __name__ == "__main__":
	demo.launch()