oddadmix commited on
Commit
58404fd
·
verified ·
1 Parent(s): 1c40fc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -17
app.py CHANGED
@@ -9,22 +9,24 @@ import os
9
  model = None
10
  processor = None
11
 
12
- print("Loading model...")
13
- model, _ = FastModel.from_pretrained(
14
- model_name = "oddadmix/gemma-4b-egyptian-code-switching-b4-g2",
15
- dtype = None,
16
- max_seq_length = 2048,
17
- load_in_4bit = True, # Enable 4bit for GPU memory efficiency
18
- full_finetuning = False,
19
- )
20
-
21
- processor = Gemma3nProcessor.from_pretrained("google/gemma-3n-E4B-it")
22
-
23
- # Set model to inference mode
24
- FastLanguageModel.for_inference(model)
25
- print("Model loaded successfully!")
26
-
27
-
 
 
28
  @spaces.GPU
29
  def transcribe_audio(audio_path, max_tokens=128):
30
  """Transcribe audio file using the loaded model"""
@@ -140,4 +142,5 @@ with gr.Blocks(title="Egyptian Arabic ASR") as demo:
140
 
141
  # Launch the app
142
  if __name__ == "__main__":
143
- demo.launch()
 
 
9
  model = None
10
  processor = None
11
 
12
+ def load_model():
13
+ """Load the model and processor once at startup"""
14
+ global model, processor
15
+
16
+ print("Loading model...")
17
+ model, _ = FastModel.from_pretrained(
18
+ model_name = "oddadmix/gemma-4b-egyptian-code-switching-b4-g2",
19
+ dtype = None,
20
+ max_seq_length = 2048,
21
+ load_in_4bit = True, # Enable 4bit for GPU memory efficiency
22
+ full_finetuning = False,
23
+ )
24
+
25
+ processor = Gemma3nProcessor.from_pretrained("google/gemma-3n-E4B-it")
26
+
27
+ # Set model to inference mode
28
+ FastLanguageModel.for_inference(model)
29
+ print("Model loaded successfully!")
30
  @spaces.GPU
31
  def transcribe_audio(audio_path, max_tokens=128):
32
  """Transcribe audio file using the loaded model"""
 
142
 
143
  # Launch the app
144
  if __name__ == "__main__":
145
+ demo.launch()
146
+