Spaces:

madharjan
/

meralion-api

Build error

App Files Files Community

madharjan commited on Dec 4, 2025

Commit

b074071

1 Parent(s): b2f8d54

Refactor model loading and inference logic in app.py; update requirements.txt for package versions

Browse files

Files changed (2) hide show

app.py +71 -38
requirements.txt +6 -10

app.py CHANGED Viewed

@@ -2,60 +2,93 @@ import gradio as gr
 import torch
 import librosa
 from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
-# Load model and processor
-repo_id = "MERaLiON/MERaLiON-2-10B"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
-model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    repo_id,
-    use_safetensors=True,
-    trust_remote_code=True,
-    attn_implementation="flash_attention_2",
-    torch_dtype=torch.bfloat16
-).to(device)
 def meralion_inference(prompt, uploaded_file):
     if uploaded_file is None:
         return "Please upload an audio file."
-    # Prompt template and example prompts
-    prompt_template = "Instruction: {query} \nFollow the text instruction based on the following audio: <SpeechHere>"
-    audio_array, sr = librosa.load(uploaded_file.name, sr=16000)
-    # Create conversation and apply chat template
-    conversation = [{"role": "user", "content": prompt_template.format(query=prompt)}]
-    chat_prompt = processor.tokenizer.apply_chat_template(
-        conversation=conversation, tokenize=False, add_generation_prompt=True
-    )
-    # Process inputs
-    inputs = processor(text=chat_prompt, audios=audio_array)
-    # Move tensors to device and cast float32 to bfloat16
-    for key, value in inputs.items():
-        if isinstance(value, torch.Tensor):
-            inputs[key] = inputs[key].to(device)
-            if value.dtype == torch.float32:
-                inputs[key] = inputs[key].to(torch.bfloat16)
-    # Generate response
-    outputs = model.generate(**inputs, max_new_tokens=256)
-    generated_ids = outputs[:, inputs['input_ids'].size(1):]
-    response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return response
 with gr.Blocks() as demo:
-    gr.Markdown("# Meralion Model Demo with Prompt and File Upload")
     with gr.Row():
-        prompt_input = gr.Textbox(label="Enter Prompt")
-        file_input = gr.File(label="Upload File")
-    output_text = gr.Textbox(label="Model Output")
-    submit_btn = gr.Button("Run Model")
-    submit_btn.click(meralion_inference, inputs=[prompt_input, file_input], outputs=output_text)
 demo.launch()

 import torch
 import librosa
 from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
+import os
+# Global model cache
+model = None
+processor = None
 device = "cuda" if torch.cuda.is_available() else "cpu"
+def load_model():
+    global model, processor
+    if model is None:
+        repo_id = "MERaLiON/MERaLiON-2-10B"
+        print("Loading MERaLiON-2-10B model...")
+        processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
+        model = AutoModelForSpeechSeq2Seq.from_pretrained(
+            repo_id,
+            use_safetensors=True,
+            trust_remote_code=True,
+            attn_implementation="flash_attention_2",
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+        )
+        print("Model loaded successfully!")
+    return model, processor
 def meralion_inference(prompt, uploaded_file):
+    global model, processor
     if uploaded_file is None:
         return "Please upload an audio file."
+    # Load model on first run
+    model, processor = load_model()
+    try:
+        # Load audio at 16kHz
+        audio_array, sr = librosa.load(uploaded_file.name, sr=16000)
+        # Prompt template
+        prompt_template = "Instruction: {query}\nFollow the text instruction based on the following audio: <SpeechHere>"
+        conversation = [
+            {"role": "user", "content": prompt_template.format(query=prompt)}
+        ]
+        chat_prompt = processor.tokenizer.apply_chat_template(
+            conversation=conversation, tokenize=False, add_generation_prompt=True
+        )
+        # Process inputs
+        inputs = processor(text=chat_prompt, audios=audio_array)
+        # Move to device and fix dtype
+        for key, value in inputs.items():
+            if isinstance(value, torch.Tensor):
+                inputs[key] = value.to(device)
+                if value.dtype == torch.float32:
+                    inputs[key] = inputs[key].to(torch.bfloat16)
+        # Generate
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs, max_new_tokens=256, do_sample=True, temperature=0.7
+            )
+        generated_ids = outputs[:, inputs["input_ids"].size(1) :]
+        response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return response
+    except Exception as e:
+        return f"Error during inference: {str(e)}"
 with gr.Blocks() as demo:
+    gr.Markdown("# MERaLiON-2-10B Audio Demo")
     with gr.Row():
+        prompt_input = gr.Textbox(
+            label="Enter Prompt", value="Please transcribe this speech.", lines=2
+        )
+        file_input = gr.File(
+            label="Upload Audio File (WAV/MP3, max 300s)",
+            file_types=[".wav", ".mp3", ".m4a"],
+        )
+    output_text = gr.Textbox(label="Model Output", lines=8)
+    submit_btn = gr.Button("Run Inference", variant="primary")
+    submit_btn.click(
+        meralion_inference, inputs=[prompt_input, file_input], outputs=output_text
+    )
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,13 +1,9 @@
-# Pinned non-torch packages (change versions if you need newer/stable ones)
-gradio==3.50.1
-transformers==4.35.2
 librosa==0.10.0
-safetensors==0.3.2
-accelerate==0.20.3
 soundfile==0.12.1
-# NOTE: `torch` should be installed via the official PyTorch wheels that match
-# your CUDA version (or CPU-only). See the README.md for Windows CPU/CUDA
-# install commands and pick the appropriate wheel. To keep this file simple
-# we do not pin `torch` here.
 torch

+# Core requirements for MERaLiON-2-10B
+transformers==4.50.1
+gradio==4.44.0
 librosa==0.10.0
+safetensors==0.4.5
+accelerate==0.41.0
 soundfile==0.12.1
 torch
+flash-attn