Spaces:

Nitin00043
/

HandwrittenMathsProblem

Runtime error

App Files Files Community

HandwrittenMathsProblem / app.py

Nitin00043

Update app.py

191e2cd verified 12 months ago

raw

history blame

3.18 kB

	from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
	import gradio as gr
	import torch
	from concurrent.futures import ThreadPoolExecutor
	from threading import Lock

	# Global cache settings and lock for thread-safety
	CACHE_SIZE = 100
	prediction_cache = {}
	cache_lock = Lock()

	# Function to load models with 8-bit quantization
	def load_quantized_model(model_name):
	try:
	model = AutoModelForSequenceClassification.from_pretrained(model_name, load_in_8bit=True)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	device = 0 if torch.cuda.is_available() else -1
	pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
	print(f"Loaded model: {model_name}")
	return pipe
	except Exception as e:
	print(f"Error loading model '{model_name}': {e}")
	raise e

	# Load both models concurrently at startup
	with ThreadPoolExecutor() as executor:
	sentiment_future = executor.submit(load_quantized_model, "cardiffnlp/twitter-roberta-base-sentiment")
	emotion_future = executor.submit(load_quantized_model, "bhadresh-savani/bert-base-uncased-emotion")

	sentiment_pipeline = sentiment_future.result()
	emotion_pipeline = emotion_future.result()

	def analyze_text(text):
	# Check cache first (using lock for thread-safety)
	with cache_lock:
	if text in prediction_cache:
	return prediction_cache[text]

	try:
	# Execute both model inferences in parallel
	with ThreadPoolExecutor() as executor:
	sentiment_future = executor.submit(sentiment_pipeline, text)
	emotion_future = executor.submit(emotion_pipeline, text)

	sentiment_result = sentiment_future.result()[0]
	emotion_result = emotion_future.result()[0]

	# Prepare a clear, rounded output
	result = {
	"Sentiment": {sentiment_result['label']: round(sentiment_result['score'], 4)},
	"Emotion": {emotion_result['label']: round(emotion_result['score'], 4)}
	}
	except Exception as e:
	result = {"error": str(e)}

	# Update cache with lock protection
	with cache_lock:
	if len(prediction_cache) >= CACHE_SIZE:
	prediction_cache.pop(next(iter(prediction_cache)))
	prediction_cache[text] = result

	return result

	# Gradio interface: using gr.JSON to display structured output


	demo = gr.Interface(
	fn=analyze_text,
	inputs=gr.Textbox(placeholder="Enter your text here...", label="Input Text"),
	outputs=gr.JSON(label="Analysis Results"),
	title="🚀 Fast Sentiment & Emotion Analysis",
	description="An optimized application using 8-bit quantized models and parallel processing for fast inference.",
	examples=[
	["I'm thrilled to start this new adventure!"],
	["This situation is making me really frustrated."],
	["I feel so heartbroken and lost."]
	],
	theme="soft",
	allow_flagging="never"
	)

	# Warm up the models with a sample input to reduce first-call latency
	_ = analyze_text("Warming up models...")

	if __name__ == "__main__":
	demo.launch()