Luis J Camargo commited on
Commit
a22ea4f
·
1 Parent(s): 90f1441

feat: Add detailed logging for audio processing and inference, remove unused imports, and adjust Gradio launch configuration.

Browse files
Files changed (2) hide show
  1. app.py +29 -3
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import torch
3
  import numpy as np
@@ -80,15 +81,27 @@ model.eval()
80
 
81
  print("Model loaded successfully!")
82
 
 
 
 
 
 
 
 
83
  # === INFERENCE FUNCTION ===
84
  def predict_language(audio):
85
  if audio is None:
86
  return "⚠️ No audio provided", "", ""
87
 
 
 
 
88
  sample_rate, audio_array = audio
89
  audio_len_sec = len(audio_array) / sample_rate
 
90
  print(f"\n--- [LOG] New Request ---")
91
- print(f"[LOG] Audio length: {audio_len_sec:.2f}s, SR: {sample_rate}")
 
92
 
93
  # Normalization
94
  print("[LOG] Step 1: Normalizing audio...")
@@ -96,12 +109,15 @@ def predict_language(audio):
96
  audio_array = audio_array.astype(np.float32) / 32768.0
97
  elif audio_array.dtype == np.int32:
98
  audio_array = audio_array.astype(np.float32) / 2147483648.0
 
99
 
100
  # Resampling
101
  if sample_rate != 16000:
102
  print(f"[LOG] Step 2: Resampling {sample_rate}Hz -> 16000Hz...")
103
  import librosa
 
104
  audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=16000)
 
105
 
106
  # Preprocessing
107
  print("[LOG] Step 3: Extracting features...")
@@ -110,12 +126,21 @@ def predict_language(audio):
110
  sampling_rate=16000,
111
  return_tensors="pt"
112
  )
 
 
 
 
113
 
114
  # Inference
115
- print("[LOG] Step 4: Running model inference (CPU intensive)...")
116
  with torch.no_grad():
117
  outputs = model(input_features=inputs.input_features)
118
 
 
 
 
 
 
119
  # Post-processing
120
  print("[LOG] Step 5: Post-processing results...")
121
  fam_probs = torch.softmax(outputs["fam_logits"], dim=-1)
@@ -130,7 +155,7 @@ def predict_language(audio):
130
  super_conf = super_probs[0, super_idx].item()
131
  code_conf = code_probs[0, code_idx].item()
132
 
133
- print(f"[LOG] Prediction successful: Family {fam_idx}")
134
  print(f"--- [LOG] Request Finished ---\n")
135
 
136
  # Formatting results
@@ -140,6 +165,7 @@ def predict_language(audio):
140
  {f"{code_idx}": code_conf}
141
  )
142
 
 
143
  # === UI COMPONENTS ===
144
  with gr.Blocks() as demo:
145
  gr.HTML(
 
1
+ import os
2
  import gradio as gr
3
  import torch
4
  import numpy as np
 
81
 
82
  print("Model loaded successfully!")
83
 
84
+ import psutil
85
+ import gc
86
+
87
+ def get_mem_usage():
88
+ process = psutil.Process(os.getpid())
89
+ return process.memory_info().rss / (1024 ** 2) # In MB
90
+
91
  # === INFERENCE FUNCTION ===
92
  def predict_language(audio):
93
  if audio is None:
94
  return "⚠️ No audio provided", "", ""
95
 
96
+ gc.collect() # Start clean
97
+ start_mem = get_mem_usage()
98
+
99
  sample_rate, audio_array = audio
100
  audio_len_sec = len(audio_array) / sample_rate
101
+
102
  print(f"\n--- [LOG] New Request ---")
103
+ print(f"[LOG] Start Memory: {start_mem:.2f} MB")
104
+ print(f"[LOG] Audio duration: {audio_len_sec:.2f}s, SR: {sample_rate}")
105
 
106
  # Normalization
107
  print("[LOG] Step 1: Normalizing audio...")
 
109
  audio_array = audio_array.astype(np.float32) / 32768.0
110
  elif audio_array.dtype == np.int32:
111
  audio_array = audio_array.astype(np.float32) / 2147483648.0
112
+ print(f"[LOG] Memory after normalization: {get_mem_usage():.2f} MB")
113
 
114
  # Resampling
115
  if sample_rate != 16000:
116
  print(f"[LOG] Step 2: Resampling {sample_rate}Hz -> 16000Hz...")
117
  import librosa
118
+ # Use res_type="kaiser_fast" to save memory/cpu if needed, but default is usually fine
119
  audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=16000)
120
+ print(f"[LOG] Memory after resampling: {get_mem_usage():.2f} MB")
121
 
122
  # Preprocessing
123
  print("[LOG] Step 3: Extracting features...")
 
126
  sampling_rate=16000,
127
  return_tensors="pt"
128
  )
129
+ # Delete raw audio array immediately as it's now in 'inputs'
130
+ del audio_array
131
+ gc.collect()
132
+ print(f"[LOG] Memory after preprocessing: {get_mem_usage():.2f} MB")
133
 
134
  # Inference
135
+ print("[LOG] Step 4: Running model inference...")
136
  with torch.no_grad():
137
  outputs = model(input_features=inputs.input_features)
138
 
139
+ # Cleanup inputs
140
+ del inputs
141
+ gc.collect()
142
+ print(f"[LOG] Memory after inference: {get_mem_usage():.2f} MB")
143
+
144
  # Post-processing
145
  print("[LOG] Step 5: Post-processing results...")
146
  fam_probs = torch.softmax(outputs["fam_logits"], dim=-1)
 
155
  super_conf = super_probs[0, super_idx].item()
156
  code_conf = code_probs[0, code_idx].item()
157
 
158
+ print(f"[LOG] Final Memory: {get_mem_usage():.2f} MB")
159
  print(f"--- [LOG] Request Finished ---\n")
160
 
161
  # Formatting results
 
165
  {f"{code_idx}": code_conf}
166
  )
167
 
168
+
169
  # === UI COMPONENTS ===
170
  with gr.Blocks() as demo:
171
  gr.HTML(
requirements.txt CHANGED
@@ -4,4 +4,5 @@ transformers
4
  numpy
5
  librosa
6
  huggingface_hub
7
- safetensors
 
 
4
  numpy
5
  librosa
6
  huggingface_hub
7
+ safetensors
8
+ psutil