Spaces:

tachiwin
/

classifier

Running

App Files Files Community

Luis J Camargo commited on 3 days ago

Commit

ec249fb

1 Parent(s): 84dac14

test pre return

Browse files

Files changed (1) hide show

app.py +83 -91

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import numpy as np
 from transformers import WhisperProcessor, AutoConfig, AutoModel, WhisperConfig, WhisperPreTrainedModel
 from transformers.models.whisper.modeling_whisper import WhisperEncoder
 import torch.nn as nn
 # === CUSTOM MODEL CLASSES ===
 class WhisperEncoderOnlyConfig(WhisperConfig):
@@ -81,9 +83,6 @@ model.eval()
 print("Model loaded successfully!")
-import psutil
-import gc
 def get_mem_usage():
     process = psutil.Process(os.getpid())
     return process.memory_info().rss / (1024 ** 2)  # In MB
@@ -91,87 +90,85 @@ def get_mem_usage():
 # === INFERENCE FUNCTION ===
 def predict_language(audio):
     if audio is None:
-        yield "⚠️ No audio provided", {}, {}, {}
-        return
-    log_buffer = "--- [LOG] New Request ---\n"
-    yield log_buffer, {}, {}, {}
-    try:
-        gc.collect()
-        start_mem = get_mem_usage()
-        sample_rate, audio_array = audio
-        audio_len_sec = len(audio_array) / sample_rate
-        log_buffer += f"RAM: {start_mem:.2f} MB | Len: {audio_len_sec:.2f}s | SR: {sample_rate}\n"
-        yield log_buffer, {}, {}, {}
-        # Normalization
-        log_buffer += "Step 1: Normalizing...\n"
-        yield log_buffer, {}, {}, {}
-        if audio_array.dtype == np.int16:
-            audio_array = audio_array.astype(np.float32) / 32768.0
-        elif audio_array.dtype == np.int32:
-            audio_array = audio_array.astype(np.float32) / 2147483648.0
-        # Resampling
-        if sample_rate != 16000:
-            log_buffer += f"Step 2: Resampling {sample_rate}Hz -> 16kHz...\n"
-            yield log_buffer, {}, {}, {}
-            import librosa
-            audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=16000)
-            log_buffer += f"Mem post-resample: {get_mem_usage():.2f} MB\n"
-            yield log_buffer, {}, {}, {}
-        # Preprocessing
-        log_buffer += "Step 3: Extracting features...\n"
-        yield log_buffer, {}, {}, {}
-        inputs = processor(
-            audio_array,
-            sampling_rate=16000,
-            return_tensors="pt"
-        )
-        del audio_array
-        gc.collect()
-        log_buffer += f"Mem post-features: {get_mem_usage():.2f} MB\n"
-        yield log_buffer, {}, {}, {}
-        # Inference
-        log_buffer += "Step 4: Running Model (CPU)... \n"
-        yield log_buffer, {}, {}, {}
-        with torch.no_grad():
-            outputs = model(input_features=inputs.input_features)
-        del inputs
-        gc.collect()
-        log_buffer += f"Mem post-inference: {get_mem_usage():.2f} MB\n"
-        yield log_buffer, {}, {}, {}
-        # Post-processing
-        log_buffer += "Step 5: Formatting results...\n"
-        yield log_buffer, {}, {}, {}
-        fam_probs = torch.softmax(outputs["fam_logits"], dim=-1)
-        super_probs = torch.softmax(outputs["super_logits"], dim=-1)
-        code_probs = torch.softmax(outputs["code_logits"], dim=-1)
-        fam_idx = outputs["fam_logits"].argmax(-1).item()
-        super_idx = outputs["super_logits"].argmax(-1).item()
-        code_idx = outputs["code_logits"].argmax(-1).item()
-        fam_conf = fam_probs[0, fam_idx].item()
-        super_conf = super_probs[0, super_idx].item()
-        code_conf = code_probs[0, code_idx].item()
-        log_buffer += "--- [LOG] Finished Successfully ---"
-        yield (
-            log_buffer,
-            {f"{fam_idx}": fam_conf},
-            {f"{super_idx}": super_conf},
-            {f"{code_idx}": code_conf}
-        )
-    except Exception as e:
-        log_buffer += f"\n❌ CRASH: {str(e)}"
-        yield log_buffer, {}, {}, {}
 # === UI COMPONENTS ===
 with gr.Blocks() as demo:
@@ -196,9 +193,6 @@ with gr.Blocks() as demo:
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary")
                 submit_btn = gr.Button("🚀 Classify", variant="primary")
-            # Persistent Log Output
-            status_logs = gr.Textbox(label="🔍 Persistent Status Log (Visible after crash)", interactive=False, lines=10)
         with gr.Column(scale=1):
             gr.Markdown("### 📊 2. Classification Results")
             fam_output = gr.Label(num_top_classes=1, label="🌍 Language Family")
@@ -208,16 +202,15 @@ with gr.Blocks() as demo:
     submit_btn.click(
         fn=predict_language,
         inputs=audio_input,
-        outputs=[status_logs, fam_output, super_output, code_output]
     )
     clear_btn.click(
-        fn=lambda: ("", None, None, None, None),
         inputs=None,
-        outputs=[status_logs, audio_input, fam_output, super_output, code_output]
     )
     gr.Markdown(
         """
         ---
@@ -234,7 +227,6 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    # Increased concurrency for CPU stability
     demo.launch(
         theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"),
         ssr_mode=False,

 from transformers import WhisperProcessor, AutoConfig, AutoModel, WhisperConfig, WhisperPreTrainedModel
 from transformers.models.whisper.modeling_whisper import WhisperEncoder
 import torch.nn as nn
+import psutil
+import gc
 # === CUSTOM MODEL CLASSES ===
 class WhisperEncoderOnlyConfig(WhisperConfig):
 print("Model loaded successfully!")
 def get_mem_usage():
     process = psutil.Process(os.getpid())
     return process.memory_info().rss / (1024 ** 2)  # In MB
 # === INFERENCE FUNCTION ===
 def predict_language(audio):
     if audio is None:
+        return "⚠️ No audio provided", "", ""
+    gc.collect() # Start clean
+    start_mem = get_mem_usage()
+    sample_rate, audio_array = audio
+    audio_len_sec = len(audio_array) / sample_rate
+    print(f"\n--- [LOG] New Request ---")
+    print(f"[LOG] Start Memory: {start_mem:.2f} MB")
+    print(f"[LOG] Audio duration: {audio_len_sec:.2f}s, SR: {sample_rate}")
+    # Normalization
+    print("[LOG] Step 1: Normalizing audio...")
+    if audio_array.dtype == np.int16:
+        print("was npint16")
+        audio_array = audio_array.astype(np.float32) / 32768.0
+    elif audio_array.dtype == np.int32:
+        print("was npint32")
+        audio_array = audio_array.astype(np.float32) / 2147483648.0
+    print(f"[LOG] Memory after normalization: {get_mem_usage():.2f} MB")
+    # Resampling
+    if sample_rate != 16000:
+        print(f"[LOG] Step 2: Resampling {sample_rate}Hz -> 16000Hz...")
+        import librosa
+        audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=16_000)
+        print(f"[LOG] Memory after resampling: {get_mem_usage():.2f} MB")
+    print("[LOG] DID RESAMPLE")
+    return None
+    # Preprocessing
+    print("[LOG] Step 3: Extracting features...")
+    inputs = processor(
+        audio_array,
+        sampling_rate=16_000,
+        do_normalize=True,
+        device="cpu",
+        return_tensors="pt",
+    )
+    # Delete raw audio array immediately as it's now in 'inputs'
+    del audio_array
+    gc.collect()
+    print(f"[LOG] Memory after preprocessing: {get_mem_usage():.2f} MB")
+    # Inference
+    print("[LOG] Step 4: Running model inference...")
+    with torch.no_grad():
+        outputs = model(input_features=inputs.input_features)
+    # Cleanup inputs
+    del inputs
+    gc.collect()
+    print(f"[LOG] Memory after inference: {get_mem_usage():.2f} MB")
+    # Post-processing
+    print("[LOG] Step 5: Post-processing results...")
+    fam_probs = torch.softmax(outputs["fam_logits"], dim=-1)
+    super_probs = torch.softmax(outputs["super_logits"], dim=-1)
+    code_probs = torch.softmax(outputs["code_logits"], dim=-1)
+    fam_idx = outputs["fam_logits"].argmax(-1).item()
+    super_idx = outputs["super_logits"].argmax(-1).item()
+    code_idx = outputs["code_logits"].argmax(-1).item()
+    fam_conf = fam_probs[0, fam_idx].item()
+    super_conf = super_probs[0, super_idx].item()
+    code_conf = code_probs[0, code_idx].item()
+    print(f"[LOG] Final Memory: {get_mem_usage():.2f} MB")
+    print(f"--- [LOG] Request Finished ---\n")
+    # Formatting results
+    return (
+        {f"{fam_idx}": fam_conf},
+        {f"{super_idx}": super_conf},
+        {f"{code_idx}": code_conf}
+    )
 # === UI COMPONENTS ===
 with gr.Blocks() as demo:
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary")
                 submit_btn = gr.Button("🚀 Classify", variant="primary")
         with gr.Column(scale=1):
             gr.Markdown("### 📊 2. Classification Results")
             fam_output = gr.Label(num_top_classes=1, label="🌍 Language Family")
     submit_btn.click(
         fn=predict_language,
         inputs=audio_input,
+        outputs=[fam_output, super_output, code_output]
     )
     clear_btn.click(
+        fn=lambda: (None, None, None, None),
         inputs=None,
+        outputs=[audio_input, fam_output, super_output, code_output]
     )
     gr.Markdown(
         """
         ---
     )
 if __name__ == "__main__":
     demo.launch(
         theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"),
         ssr_mode=False,