Fast-Urdu-ASR-CPU

Sleeping

Abid Ali Awan commited on Jul 6, 2025

Commit

e8491b9

1 Parent(s): 4098191

Refactor app.py to streamline model setup by removing unnecessary device specification and loading the model directly in int8 format, enhancing code clarity.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,23 +20,18 @@ torch.set_num_threads(4)
 logging.set_verbosity_error()
-# —— Model & device setup ——
-device = "cpu"
 model_id = "kingabzpro/whisper-base-urdu-full"
-# Load in fp32 and quantize to int8
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     model_id,
-    torch_dtype=torch.float32,
     use_safetensors=True,
 )
-model.eval()
 model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
 processor = AutoProcessor.from_pretrained(model_id)
-processor = AutoProcessor.from_pretrained(model_id)
 # Build a CPU-based pipeline with chunking
 transcriber = pipeline(
     task="automatic-speech-recognition",

 logging.set_verbosity_error()
+# —— Model setup ——
 model_id = "kingabzpro/whisper-base-urdu-full"
+# Load and quantize to int8
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     model_id,
     use_safetensors=True,
 )
 model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
 processor = AutoProcessor.from_pretrained(model_id)
 # Build a CPU-based pipeline with chunking
 transcriber = pipeline(
     task="automatic-speech-recognition",