Spaces:
Runtime error
Runtime error
| from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
| import gradio as gr | |
| import torch | |
| from concurrent.futures import ThreadPoolExecutor | |
| from threading import Lock | |
| # Global cache settings and lock for thread-safety | |
| CACHE_SIZE = 100 | |
| prediction_cache = {} | |
| cache_lock = Lock() | |
| # Function to load models with 8-bit quantization | |
| def load_quantized_model(model_name): | |
| try: | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name, load_in_8bit=True) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| device = 0 if torch.cuda.is_available() else -1 | |
| pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device) | |
| print(f"Loaded model: {model_name}") | |
| return pipe | |
| except Exception as e: | |
| print(f"Error loading model '{model_name}': {e}") | |
| raise e | |
| # Load both models concurrently at startup | |
| with ThreadPoolExecutor() as executor: | |
| sentiment_future = executor.submit(load_quantized_model, "cardiffnlp/twitter-roberta-base-sentiment") | |
| emotion_future = executor.submit(load_quantized_model, "bhadresh-savani/bert-base-uncased-emotion") | |
| sentiment_pipeline = sentiment_future.result() | |
| emotion_pipeline = emotion_future.result() | |
| def analyze_text(text): | |
| # Check cache first (using lock for thread-safety) | |
| with cache_lock: | |
| if text in prediction_cache: | |
| return prediction_cache[text] | |
| try: | |
| # Execute both model inferences in parallel | |
| with ThreadPoolExecutor() as executor: | |
| sentiment_future = executor.submit(sentiment_pipeline, text) | |
| emotion_future = executor.submit(emotion_pipeline, text) | |
| sentiment_result = sentiment_future.result()[0] | |
| emotion_result = emotion_future.result()[0] | |
| # Prepare a clear, rounded output | |
| result = { | |
| "Sentiment": {sentiment_result['label']: round(sentiment_result['score'], 4)}, | |
| "Emotion": {emotion_result['label']: round(emotion_result['score'], 4)} | |
| } | |
| except Exception as e: | |
| result = {"error": str(e)} | |
| # Update cache with lock protection | |
| with cache_lock: | |
| if len(prediction_cache) >= CACHE_SIZE: | |
| prediction_cache.pop(next(iter(prediction_cache))) | |
| prediction_cache[text] = result | |
| return result | |
| # Gradio interface: using gr.JSON to display structured output | |
| demo = gr.Interface( | |
| fn=analyze_text, | |
| inputs=gr.Textbox(placeholder="Enter your text here...", label="Input Text"), | |
| outputs=gr.JSON(label="Analysis Results"), | |
| title="🚀 Fast Sentiment & Emotion Analysis", | |
| description="An optimized application using 8-bit quantized models and parallel processing for fast inference.", | |
| examples=[ | |
| ["I'm thrilled to start this new adventure!"], | |
| ["This situation is making me really frustrated."], | |
| ["I feel so heartbroken and lost."] | |
| ], | |
| theme="soft", | |
| allow_flagging="never" | |
| ) | |
| # Warm up the models with a sample input to reduce first-call latency | |
| _ = analyze_text("Warming up models...") | |
| if __name__ == "__main__": | |
| demo.launch() | |