| import gradio as gr |
| from transformers import AutoProcessor, SeamlessM4Tv2Model, pipeline, XLMRobertaTokenizer, AutoModelForSequenceClassification |
| import torch |
| import librosa |
| import numpy as np |
|
|
| |
| |
| SENTIMENT_MODEL_ID = "cardiffnlp/twitter-xlm-roberta-base-sentiment" |
| |
| AUDIO_MODEL_ID = "facebook/seamless-m4t-v2-large" |
|
|
| |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| print(f"π Cloud Brain Running on: {device.upper()}") |
|
|
| |
|
|
| |
| print(f"β³ Loading Sentiment Model ({SENTIMENT_MODEL_ID})...") |
| tokenizer = XLMRobertaTokenizer.from_pretrained(SENTIMENT_MODEL_ID) |
| sent_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_ID) |
| sentiment_pipeline = pipeline( |
| "text-classification", |
| model=sent_model, |
| tokenizer=tokenizer, |
| device=0 if device == "cuda" else -1 |
| ) |
|
|
| |
| print(f"β³ Loading Audio Model ({AUDIO_MODEL_ID})...") |
| processor = AutoProcessor.from_pretrained(AUDIO_MODEL_ID) |
| audio_model = SeamlessM4Tv2Model.from_pretrained(AUDIO_MODEL_ID).to(device) |
|
|
| print("β
All Models Loaded Successfully!") |
|
|
| |
|
|
| def analyze_sentiment(text): |
| """ |
| Analyzes text sentiment using XLM-Roberta. |
| """ |
| if not text or text.strip() == "": |
| return "Neutral", 0.0 |
|
|
| try: |
| |
| results = sentiment_pipeline(text) |
| |
| |
| raw_label = results[0]['label'] |
| confidence = results[0]['score'] |
| |
| |
| label_map = { |
| "LABEL_0": "Negative π΄", |
| "LABEL_1": "Neutral π‘", |
| "LABEL_2": "Positive π’", |
| "negative": "Negative π΄", |
| "neutral": "Neutral π‘", |
| "positive": "Positive π’" |
| } |
| |
| nice_label = label_map.get(raw_label, raw_label) |
| return nice_label, confidence |
| except Exception as e: |
| print(f"Sentiment Error: {e}") |
| return "Error", 0.0 |
|
|
| def process_pipeline(audio_path, language_code, text_input): |
| """ |
| Master function: |
| 1. If Audio is provided -> Transcribe it (using selected language). |
| 2. If Text is provided -> Use it directly. |
| 3. Analyze Sentiment of the resulting text. |
| """ |
| transcribed_text = "" |
|
|
| |
| if audio_path is not None: |
| print(f"π€ Processing Audio: {audio_path} | Language: {language_code}") |
| try: |
| |
| |
| y, orig_sr = librosa.load(audio_path, sr=16000) |
| |
| |
| inputs = processor(audio=y, return_tensors="pt", sampling_rate=16000).to(device) |
| |
| |
| |
| output_tokens = audio_model.generate( |
| **inputs, |
| tgt_lang=language_code, |
| generate_speech=False |
| )[0].cpu().numpy().squeeze() |
| |
| transcribed_text = processor.decode(output_tokens, skip_special_tokens=True) |
| print(f"π Transcribed: {transcribed_text}") |
| |
| except Exception as e: |
| return f"Error in transcription: {str(e)}", "Error β οΈ", 0.0 |
|
|
| |
| if not transcribed_text and text_input: |
| transcribed_text = text_input |
|
|
| if not transcribed_text: |
| return "", "Neutral π‘", 0.0 |
|
|
| |
| sentiment_label, confidence = analyze_sentiment(transcribed_text) |
|
|
| |
| return transcribed_text, sentiment_label, round(confidence, 3) |
|
|
| |
| with gr.Interface( |
| fn=process_pipeline, |
| inputs=[ |
| gr.Audio(type="filepath", label="π€ Upload Audio or Speak"), |
| |
| gr.Dropdown( |
| choices=["hin", "guj", "eng"], |
| value="hin", |
| label="π£οΈ Select Language Spoken (hin=Hindi, guj=Gujarati)" |
| ), |
| gr.Textbox(label="β¨οΈ Or Type Text Here") |
| ], |
| outputs=[ |
| gr.Textbox(label="π Transcription"), |
| gr.Label(label="Sentiment Analysis"), |
| gr.Number(label="Confidence Score") |
| ], |
| title="SGP-IV: Voice Sentiment Brain", |
| description="Select your language, speak, and get real-time sentiment analysis." |
| ) as demo: |
| pass |
|
|
| if __name__ == "__main__": |
| demo.launch() |