Spaces:

marshal-yash
/

Indic-Sentiment-Audio-App

Sleeping

App Files Files Community

Indic-Sentiment-Audio-App / app.py

marshal-yash

Update app.py

929c0eb verified 3 months ago

raw

history blame contribute delete

4.84 kB

	import gradio as gr
	from transformers import AutoProcessor, SeamlessM4Tv2Model, pipeline, XLMRobertaTokenizer, AutoModelForSequenceClassification
	import torch
	import librosa
	import numpy as np

	# --- 1. CONFIGURATION ---
	# Sentiment Model (Multilingual: Hindi, English, etc.)
	SENTIMENT_MODEL_ID = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
	# Audio Model (SeamlessM4T v2 Large)
	AUDIO_MODEL_ID = "facebook/seamless-m4t-v2-large"

	# Auto-select GPU if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"🚀 Cloud Brain Running on: {device.upper()}")

	# --- 2. LOAD MODELS ---

	# A. Load Sentiment Model
	print(f"⏳ Loading Sentiment Model ({SENTIMENT_MODEL_ID})...")
	tokenizer = XLMRobertaTokenizer.from_pretrained(SENTIMENT_MODEL_ID)
	sent_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_ID)
	sentiment_pipeline = pipeline(
	"text-classification",
	model=sent_model,
	tokenizer=tokenizer,
	device=0 if device == "cuda" else -1
	)

	# B. Load Audio Model (SeamlessM4T)
	print(f"⏳ Loading Audio Model ({AUDIO_MODEL_ID})...")
	processor = AutoProcessor.from_pretrained(AUDIO_MODEL_ID)
	audio_model = SeamlessM4Tv2Model.from_pretrained(AUDIO_MODEL_ID).to(device)

	print("✅ All Models Loaded Successfully!")

	# --- 3. INTELLIGENCE FUNCTIONS ---

	def analyze_sentiment(text):
	"""
	Analyzes text sentiment using XLM-Roberta.
	"""
	if not text or text.strip() == "":
	return "Neutral", 0.0

	try:
	# Run inference
	results = sentiment_pipeline(text)

	# Get raw result
	raw_label = results[0]['label']
	confidence = results[0]['score']

	# --- Label Map ---
	label_map = {
	"LABEL_0": "Negative 🔴",
	"LABEL_1": "Neutral 🟡",
	"LABEL_2": "Positive 🟢",
	"negative": "Negative 🔴",
	"neutral": "Neutral 🟡",
	"positive": "Positive 🟢"
	}

	nice_label = label_map.get(raw_label, raw_label)
	return nice_label, confidence
	except Exception as e:
	print(f"Sentiment Error: {e}")
	return "Error", 0.0

	def process_pipeline(audio_path, language_code, text_input):
	"""
	Master function:
	1. If Audio is provided -> Transcribe it (using selected language).
	2. If Text is provided -> Use it directly.
	3. Analyze Sentiment of the resulting text.
	"""
	transcribed_text = ""

	# --- Step 1: Transcription (if Audio) ---
	if audio_path is not None:
	print(f"🎤 Processing Audio: {audio_path} \| Language: {language_code}")
	try:
	# Load audio using librosa to ensure correct sample rate (16kHz required)
	# This handles resampling automatically
	y, orig_sr = librosa.load(audio_path, sr=16000)

	# Prepare inputs
	inputs = processor(audio=y, return_tensors="pt", sampling_rate=16000).to(device)

	# Generate Transcription
	# We explicitly tell the model which language to transcribe (tgt_lang)
	output_tokens = audio_model.generate(
	**inputs,
	tgt_lang=language_code,
	generate_speech=False
	)[0].cpu().numpy().squeeze()

	transcribed_text = processor.decode(output_tokens, skip_special_tokens=True)
	print(f"📝 Transcribed: {transcribed_text}")

	except Exception as e:
	return f"Error in transcription: {str(e)}", "Error ⚠️", 0.0

	# --- Step 2: Fallback to Text Input ---
	if not transcribed_text and text_input:
	transcribed_text = text_input

	if not transcribed_text:
	return "", "Neutral 🟡", 0.0

	# --- Step 3: Sentiment Analysis ---
	sentiment_label, confidence = analyze_sentiment(transcribed_text)

	# Return: Transcription, Sentiment Label, Confidence Score
	return transcribed_text, sentiment_label, round(confidence, 3)

	# --- 4. UI CONSTRUCTION ---
	with gr.Interface(
	fn=process_pipeline,
	inputs=[
	gr.Audio(type="filepath", label="🎤 Upload Audio or Speak"),
	# Dropdown prevents the crash by letting user define language
	gr.Dropdown(
	choices=["hin", "guj", "eng"],
	value="hin",
	label="🗣️ Select Language Spoken (hin=Hindi, guj=Gujarati)"
	),
	gr.Textbox(label="⌨️ Or Type Text Here")
	],
	outputs=[
	gr.Textbox(label="📝 Transcription"),
	gr.Label(label="Sentiment Analysis"),
	gr.Number(label="Confidence Score")
	],
	title="SGP-IV: Voice Sentiment Brain",
	description="Select your language, speak, and get real-time sentiment analysis."
	) as demo:
	pass

	if __name__ == "__main__":
	demo.launch()