Spaces:

EarthSpeciesProject
/

NatureLM-Audio

Running on Zero

App Files Files Community

Cheeky Sparrow commited on Aug 6, 2025

Commit

59b01a4

1 Parent(s): d2e4f4f

better app.p

Browse files

Files changed (1) hide show

app.py +91 -100

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import re
 import tempfile
 from collections import Counter
 from pathlib import Path
-from typing import Literal
 import gradio as gr
 import torch
@@ -12,95 +12,92 @@ from NatureLM.models.NatureLM import NatureLM
 from NatureLM.utils import generate_sample_batches, prepare_sample_waveforms
 import spaces
-CONFIG: Config = None
-MODEL: NatureLM = None
-MODEL_LOADED = False
-MODEL_LOADING = False
-MODEL_LOAD_FAILED = False
-def check_model_availability():
-    """Check if the model is available for download"""
-    try:
-        from huggingface_hub import model_info
-        info = model_info("EarthSpeciesProject/NatureLM-audio")
-        return True, "Model is available"
-    except Exception as e:
-        return False, f"Model not available: {str(e)}"
-def reset_model_state():
-    """Reset the model loading state to allow retrying after a failure"""
-    global MODEL, MODEL_LOADED, MODEL_LOADING, MODEL_LOAD_FAILED
-    MODEL = None
-    MODEL_LOADED = False
-    MODEL_LOADING = False
-    MODEL_LOAD_FAILED = False
-    return get_model_status()
-def get_model_status():
-    """Get the current model loading status"""
-    if MODEL_LOADED:
-        return "✅ Model loaded and ready"
-    elif MODEL_LOADING:
-        return "🔄 Loading model... Please wait"
-    elif MODEL_LOAD_FAILED:
-        return "❌ Model failed to load. Please check the configuration."
-    else:
-        return "⏳ Ready to load model on first use"
-def load_model_if_needed():
-    """Lazy load the model when first needed"""
-    global MODEL, MODEL_LOADED, MODEL_LOADING, MODEL_LOAD_FAILED
-    if MODEL_LOADED:
-        return MODEL
-    if MODEL_LOADING:
-        # Model is currently loading, return a message to try again
-        return None
-    if MODEL_LOAD_FAILED:
-        # Model has already failed to load, don't try again
-        return None
-    if MODEL is None:
         try:
-            MODEL_LOADING = True
             print("Loading model...")
             # Check if model is available first
-            available, message = check_model_availability()
             if not available:
                 raise Exception(f"Model not available: {message}")
             model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio")
-            model.to("cuda")  # Use CPU for HuggingFace Spaces
             model.eval()
-            MODEL = model
-            MODEL_LOADED = True
-            MODEL_LOADING = False
             print("Model loaded successfully!")
-            return MODEL
         except Exception as e:
             print(f"Error loading model: {e}")
-            MODEL_LOADING = False
-            MODEL_LOAD_FAILED = True
             return None
-    return MODEL
 @spaces.GPU
-def prompt_lm(audios: list[str], messages: list[dict[str, str]]):
-    # Always try to load the model if needed
-    model = load_model_if_needed()
     if model is None:
-        if MODEL_LOADING:
             return "🔄 Loading model... This may take a few minutes on first use. Please try again in a moment."
-        elif MODEL_LOAD_FAILED:
             return "❌ Model failed to load. This could be due to:\n• No internet connection\n• Insufficient disk space\n• Model repository access issues\n\nPlease check your connection and try again using the retry button."
         else:
             return "Demo mode: Model not loaded. Please check the model configuration."
@@ -115,12 +112,12 @@ def prompt_lm(audios: list[str], messages: list[dict[str, str]]):
         r"<\|start_header_id\|>system<\|end_header_id\|>\n\nCutting Knowledge Date: [^\n]+\nToday Date: [^\n]+\n\n<\|eot_id\|>",
         "",
         prompt_text,
-    )  # exclude the system header from the prompt
-    prompt_text = re.sub("\\n", r"\\n", prompt_text)  # FIXME this is a hack to fix the issue #34
     print(f"{prompt_text=}")
     with torch.cuda.amp.autocast(dtype=torch.float16):
-        llm_answer = model.generate(samples, CONFIG.generate, prompts=[prompt_text])
     return llm_answer[0]
@@ -159,8 +156,9 @@ def combine_model_inputs(msgs: list[dict[str, str]]) -> dict[str, list[str]]:
                 files.append(path)
             case _:
                 messages.append(msg)
     joined_messages = []
-    # join consecutive messages from the same role
     for msg in messages:
         if joined_messages and joined_messages[-1]["role"] == msg["role"]:
             joined_messages[-1]["content"] += msg["content"]
@@ -175,20 +173,19 @@ def bot_response(history: list):
     combined_inputs = combine_model_inputs(history)
     response = prompt_lm(combined_inputs["files"], combined_inputs["messages"])
     history.append({"role": "assistant", "content": response})
     return history
 def _chat_tab(examples):
-    # Add status indicator
     status_text = gr.Textbox(
-        value=get_model_status(),
         label="Model Status",
         interactive=False,
         visible=True
     )
-    # Add retry button that only shows when model failed to load
     retry_button = gr.Button(
         "🔄 Retry Loading Model",
         visible=False,
@@ -201,7 +198,6 @@ def _chat_tab(examples):
         bubble_full_width=False,
         type="messages",
         render_markdown=False,
-        # editable="user",  # disable because of https://github.com/gradio-app/gradio/issues/10320
         resizeable=True,
     )
@@ -218,20 +214,20 @@ def _chat_tab(examples):
     )
     # Update status after bot response
-    bot_msg.then(lambda: get_model_status(), None, [status_text])
     bot_msg.then(lambda: gr.ClearButton(visible=True), None, [clear_button])
     clear_button.click(lambda: gr.ClearButton(visible=False), None, [clear_button])
     # Handle retry button
     retry_button.click(
-        reset_model_state,
         None,
         [status_text]
     )
     # Show/hide retry button based on model status
     def update_retry_button_visibility():
-        return gr.Button(visible=MODEL_LOAD_FAILED)
     # Update retry button visibility when status changes
     bot_msg.then(update_retry_button_visibility, None, [retry_button])
@@ -253,11 +249,11 @@ def summarize_batch_results(results):
 def run_batch_inference(files, task, progress=gr.Progress()) -> str:
-    model = load_model_if_needed()
     if model is None:
-        if MODEL_LOADING:
             return "🔄 Loading model... This may take a few minutes on first use. Please try again in a moment."
-        elif MODEL_LOAD_FAILED:
             return "❌ Model failed to load. This could be due to:\n• No internet connection\n• Insufficient disk space\n• Model repository access issues\n\nPlease check your connection and try again."
         else:
             return "Demo mode: Model not loaded. Please check the model configuration."
@@ -310,10 +306,6 @@ def to_raven_format(outputs: dict[int, str], chunk_len: int = 10) -> str:
     last_label = ""
     row = 1
-    # The "Selection" column is just the row number.
-    # The "view" column will always say "Spectrogram 1".
-    # Channel can always be "1".
-    # For the frequency bounds we can just use 0 and 1/2 the sample rate
     for offset, label in sorted(outputs.items()):
         if label != last_label and last_label:
             raven_output.append(get_line(row, current_offset, offset, last_label))
@@ -332,11 +324,11 @@ def to_raven_format(outputs: dict[int, str], chunk_len: int = 10) -> str:
 def _run_long_recording_inference(file, task, chunk_len: int = 10, hop_len: int = 5, progress=gr.Progress()):
-    model = load_model_if_needed()
     if model is None:
-        if MODEL_LOADING:
             return "🔄 Loading model... This may take a few minutes on first use. Please try again in a moment.", None
-        elif MODEL_LOAD_FAILED:
             return "❌ Model failed to load. This could be due to:\n• No internet connection\n• Insufficient disk space\n• Model repository access issues\n\nPlease check your connection and try again.", None
         else:
             return "Demo mode: Model not loaded. Please check the model configuration.", None
@@ -346,12 +338,12 @@ def _run_long_recording_inference(file, task, chunk_len: int = 10, hop_len: int
     offset = 0
     prompt = f"<Audio><AudioHere></Audio> {task}"
-    prompt = CONFIG.model.prompt_template.format(prompt)
     for batch in progress.tqdm(generate_sample_batches(file, cuda_enabled, chunk_len=chunk_len, hop_len=hop_len)):
         prompt_strs = [prompt] * len(batch["audio_chunk_sizes"])
         with torch.cuda.amp.autocast(dtype=torch.float16):
-            llm_answers = model.generate(batch, CONFIG.generate, prompts=prompt_strs)
         for answer in llm_answers:
             outputs[offset] = answer
             offset += hop_len
@@ -400,23 +392,22 @@ def _long_recording_tab():
         [output, download_raven],
     )
-@spaces.GPU
 def main(
     assets_dir: Path,
     cfg_path: str | Path,
     options: list[str] = [],
     device: str = "cuda",
 ):
-    global CONFIG
     try:
         cfg = Config.from_sources(yaml_file=cfg_path, cli_args=options)
-        CONFIG = cfg
         print("Configuration loaded successfully")
     except Exception as e:
         print(f"Warning: Could not load config: {e}")
         print("Running in demo mode")
-        CONFIG = None
     # Check if assets directory exists, if not create a placeholder
     if not assets_dir.exists():
@@ -466,15 +457,15 @@ def main(
                 _long_recording_tab()
     return app
-# At the bottom of the file:
 app = main(
     assets_dir=Path("assets"),
     cfg_path=Path("configs/inference.yml"),
     options=[],
-    device="cuda", # TODO: from config depending on zerogpu! (to change)
 )
-# Launch the app
 if __name__ == "__main__":
     app.launch()

 import tempfile
 from collections import Counter
 from pathlib import Path
+from typing import Literal, Optional
 import gradio as gr
 import torch
 from NatureLM.utils import generate_sample_batches, prepare_sample_waveforms
 import spaces
+class ModelManager:
+    """Manages model loading and state"""
+    def __init__(self):
+        self.model: Optional[NatureLM] = None
+        self.config: Optional[Config] = None
+        self.is_loaded = False
+        self.is_loading = False
+        self.load_failed = False
+    def check_availability(self) -> tuple[bool, str]:
+        """Check if the model is available for download"""
+        try:
+            from huggingface_hub import model_info
+            info = model_info("EarthSpeciesProject/NatureLM-audio")
+            return True, "Model is available"
+        except Exception as e:
+            return False, f"Model not available: {str(e)}"
+    def reset_state(self):
+        """Reset the model loading state to allow retrying after a failure"""
+        self.model = None
+        self.is_loaded = False
+        self.is_loading = False
+        self.load_failed = False
+        return self.get_status()
+    def get_status(self) -> str:
+        """Get the current model loading status"""
+        if self.is_loaded:
+            return "✅ Model loaded and ready"
+        elif self.is_loading:
+            return "🔄 Loading model... Please wait"
+        elif self.load_failed:
+            return "❌ Model failed to load. Please check the configuration."
+        else:
+            return "⏳ Ready to load model on first use"
+    def load_model(self) -> Optional[NatureLM]:
+        """Load the model if needed"""
+        if self.is_loaded:
+            return self.model
+        if self.is_loading or self.load_failed:
+            return None
         try:
+            self.is_loading = True
             print("Loading model...")
             # Check if model is available first
+            available, message = self.check_availability()
             if not available:
                 raise Exception(f"Model not available: {message}")
             model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio")
+            model.to("cuda")
             model.eval()
+            self.model = model
+            self.is_loaded = True
+            self.is_loading = False
             print("Model loaded successfully!")
+            return model
         except Exception as e:
             print(f"Error loading model: {e}")
+            self.is_loading = False
+            self.load_failed = True
             return None
+# Global model manager instance
+model_manager = ModelManager()
 @spaces.GPU
+def prompt_lm(audios: list[str], messages: list[dict[str, str]]) -> str:
+    """Generate response using the model"""
+    model = model_manager.load_model()
     if model is None:
+        if model_manager.is_loading:
             return "🔄 Loading model... This may take a few minutes on first use. Please try again in a moment."
+        elif model_manager.load_failed:
             return "❌ Model failed to load. This could be due to:\n• No internet connection\n• Insufficient disk space\n• Model repository access issues\n\nPlease check your connection and try again using the retry button."
         else:
             return "Demo mode: Model not loaded. Please check the model configuration."
         r"<\|start_header_id\|>system<\|end_header_id\|>\n\nCutting Knowledge Date: [^\n]+\nToday Date: [^\n]+\n\n<\|eot_id\|>",
         "",
         prompt_text,
+    )
+    prompt_text = re.sub("\\n", r"\\n", prompt_text)
     print(f"{prompt_text=}")
     with torch.cuda.amp.autocast(dtype=torch.float16):
+        llm_answer = model.generate(samples, model_manager.config.generate, prompts=[prompt_text])
     return llm_answer[0]
                 files.append(path)
             case _:
                 messages.append(msg)
+    # Join consecutive messages from the same role
     joined_messages = []
     for msg in messages:
         if joined_messages and joined_messages[-1]["role"] == msg["role"]:
             joined_messages[-1]["content"] += msg["content"]
     combined_inputs = combine_model_inputs(history)
     response = prompt_lm(combined_inputs["files"], combined_inputs["messages"])
     history.append({"role": "assistant", "content": response})
     return history
 def _chat_tab(examples):
+    # Status indicator
     status_text = gr.Textbox(
+        value=model_manager.get_status(),
         label="Model Status",
         interactive=False,
         visible=True
     )
+    # Retry button that only shows when model failed to load
     retry_button = gr.Button(
         "🔄 Retry Loading Model",
         visible=False,
         bubble_full_width=False,
         type="messages",
         render_markdown=False,
         resizeable=True,
     )
     )
     # Update status after bot response
+    bot_msg.then(lambda: model_manager.get_status(), None, [status_text])
     bot_msg.then(lambda: gr.ClearButton(visible=True), None, [clear_button])
     clear_button.click(lambda: gr.ClearButton(visible=False), None, [clear_button])
     # Handle retry button
     retry_button.click(
+        model_manager.reset_state,
         None,
         [status_text]
     )
     # Show/hide retry button based on model status
     def update_retry_button_visibility():
+        return gr.Button(visible=model_manager.load_failed)
     # Update retry button visibility when status changes
     bot_msg.then(update_retry_button_visibility, None, [retry_button])
 def run_batch_inference(files, task, progress=gr.Progress()) -> str:
+    model = model_manager.load_model()
     if model is None:
+        if model_manager.is_loading:
             return "🔄 Loading model... This may take a few minutes on first use. Please try again in a moment."
+        elif model_manager.load_failed:
             return "❌ Model failed to load. This could be due to:\n• No internet connection\n• Insufficient disk space\n• Model repository access issues\n\nPlease check your connection and try again."
         else:
             return "Demo mode: Model not loaded. Please check the model configuration."
     last_label = ""
     row = 1
     for offset, label in sorted(outputs.items()):
         if label != last_label and last_label:
             raven_output.append(get_line(row, current_offset, offset, last_label))
 def _run_long_recording_inference(file, task, chunk_len: int = 10, hop_len: int = 5, progress=gr.Progress()):
+    model = model_manager.load_model()
     if model is None:
+        if model_manager.is_loading:
             return "🔄 Loading model... This may take a few minutes on first use. Please try again in a moment.", None
+        elif model_manager.load_failed:
             return "❌ Model failed to load. This could be due to:\n• No internet connection\n• Insufficient disk space\n• Model repository access issues\n\nPlease check your connection and try again.", None
         else:
             return "Demo mode: Model not loaded. Please check the model configuration.", None
     offset = 0
     prompt = f"<Audio><AudioHere></Audio> {task}"
+    prompt = model_manager.config.model.prompt_template.format(prompt)
     for batch in progress.tqdm(generate_sample_batches(file, cuda_enabled, chunk_len=chunk_len, hop_len=hop_len)):
         prompt_strs = [prompt] * len(batch["audio_chunk_sizes"])
         with torch.cuda.amp.autocast(dtype=torch.float16):
+            llm_answers = model.generate(batch, model_manager.config.generate, prompts=prompt_strs)
         for answer in llm_answers:
             outputs[offset] = answer
             offset += hop_len
         [output, download_raven],
     )
 def main(
     assets_dir: Path,
     cfg_path: str | Path,
     options: list[str] = [],
     device: str = "cuda",
 ):
+    # Load configuration
     try:
         cfg = Config.from_sources(yaml_file=cfg_path, cli_args=options)
+        model_manager.config = cfg
         print("Configuration loaded successfully")
     except Exception as e:
         print(f"Warning: Could not load config: {e}")
         print("Running in demo mode")
+        model_manager.config = None
     # Check if assets directory exists, if not create a placeholder
     if not assets_dir.exists():
                 _long_recording_tab()
     return app
+# Create and launch the app
 app = main(
     assets_dir=Path("assets"),
     cfg_path=Path("configs/inference.yml"),
     options=[],
+    device="cuda",
 )
 if __name__ == "__main__":
     app.launch()