marshal-yash's picture
Update app.py
929c0eb verified
import gradio as gr
from transformers import AutoProcessor, SeamlessM4Tv2Model, pipeline, XLMRobertaTokenizer, AutoModelForSequenceClassification
import torch
import librosa
import numpy as np
# --- 1. CONFIGURATION ---
# Sentiment Model (Multilingual: Hindi, English, etc.)
SENTIMENT_MODEL_ID = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
# Audio Model (SeamlessM4T v2 Large)
AUDIO_MODEL_ID = "facebook/seamless-m4t-v2-large"
# Auto-select GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"πŸš€ Cloud Brain Running on: {device.upper()}")
# --- 2. LOAD MODELS ---
# A. Load Sentiment Model
print(f"⏳ Loading Sentiment Model ({SENTIMENT_MODEL_ID})...")
tokenizer = XLMRobertaTokenizer.from_pretrained(SENTIMENT_MODEL_ID)
sent_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_ID)
sentiment_pipeline = pipeline(
"text-classification",
model=sent_model,
tokenizer=tokenizer,
device=0 if device == "cuda" else -1
)
# B. Load Audio Model (SeamlessM4T)
print(f"⏳ Loading Audio Model ({AUDIO_MODEL_ID})...")
processor = AutoProcessor.from_pretrained(AUDIO_MODEL_ID)
audio_model = SeamlessM4Tv2Model.from_pretrained(AUDIO_MODEL_ID).to(device)
print("βœ… All Models Loaded Successfully!")
# --- 3. INTELLIGENCE FUNCTIONS ---
def analyze_sentiment(text):
"""
Analyzes text sentiment using XLM-Roberta.
"""
if not text or text.strip() == "":
return "Neutral", 0.0
try:
# Run inference
results = sentiment_pipeline(text)
# Get raw result
raw_label = results[0]['label']
confidence = results[0]['score']
# --- Label Map ---
label_map = {
"LABEL_0": "Negative πŸ”΄",
"LABEL_1": "Neutral 🟑",
"LABEL_2": "Positive 🟒",
"negative": "Negative πŸ”΄",
"neutral": "Neutral 🟑",
"positive": "Positive 🟒"
}
nice_label = label_map.get(raw_label, raw_label)
return nice_label, confidence
except Exception as e:
print(f"Sentiment Error: {e}")
return "Error", 0.0
def process_pipeline(audio_path, language_code, text_input):
"""
Master function:
1. If Audio is provided -> Transcribe it (using selected language).
2. If Text is provided -> Use it directly.
3. Analyze Sentiment of the resulting text.
"""
transcribed_text = ""
# --- Step 1: Transcription (if Audio) ---
if audio_path is not None:
print(f"🎀 Processing Audio: {audio_path} | Language: {language_code}")
try:
# Load audio using librosa to ensure correct sample rate (16kHz required)
# This handles resampling automatically
y, orig_sr = librosa.load(audio_path, sr=16000)
# Prepare inputs
inputs = processor(audio=y, return_tensors="pt", sampling_rate=16000).to(device)
# Generate Transcription
# We explicitly tell the model which language to transcribe (tgt_lang)
output_tokens = audio_model.generate(
**inputs,
tgt_lang=language_code,
generate_speech=False
)[0].cpu().numpy().squeeze()
transcribed_text = processor.decode(output_tokens, skip_special_tokens=True)
print(f"πŸ“ Transcribed: {transcribed_text}")
except Exception as e:
return f"Error in transcription: {str(e)}", "Error ⚠️", 0.0
# --- Step 2: Fallback to Text Input ---
if not transcribed_text and text_input:
transcribed_text = text_input
if not transcribed_text:
return "", "Neutral 🟑", 0.0
# --- Step 3: Sentiment Analysis ---
sentiment_label, confidence = analyze_sentiment(transcribed_text)
# Return: Transcription, Sentiment Label, Confidence Score
return transcribed_text, sentiment_label, round(confidence, 3)
# --- 4. UI CONSTRUCTION ---
with gr.Interface(
fn=process_pipeline,
inputs=[
gr.Audio(type="filepath", label="🎀 Upload Audio or Speak"),
# Dropdown prevents the crash by letting user define language
gr.Dropdown(
choices=["hin", "guj", "eng"],
value="hin",
label="πŸ—£οΈ Select Language Spoken (hin=Hindi, guj=Gujarati)"
),
gr.Textbox(label="⌨️ Or Type Text Here")
],
outputs=[
gr.Textbox(label="πŸ“ Transcription"),
gr.Label(label="Sentiment Analysis"),
gr.Number(label="Confidence Score")
],
title="SGP-IV: Voice Sentiment Brain",
description="Select your language, speak, and get real-time sentiment analysis."
) as demo:
pass
if __name__ == "__main__":
demo.launch()