Abid Ali Awan commited on
Commit
e8491b9
·
1 Parent(s): 4098191

Refactor app.py to streamline model setup by removing unnecessary device specification and loading the model directly in int8 format, enhancing code clarity.

Browse files
Files changed (1) hide show
  1. app.py +2 -7
app.py CHANGED
@@ -20,23 +20,18 @@ torch.set_num_threads(4)
20
 
21
  logging.set_verbosity_error()
22
 
23
- # —— Model & device setup ——
24
- device = "cpu"
25
  model_id = "kingabzpro/whisper-base-urdu-full"
26
 
27
- # Load in fp32 and quantize to int8
28
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
29
  model_id,
30
- torch_dtype=torch.float32,
31
  use_safetensors=True,
32
  )
33
- model.eval()
34
  model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
35
 
36
  processor = AutoProcessor.from_pretrained(model_id)
37
 
38
- processor = AutoProcessor.from_pretrained(model_id)
39
-
40
  # Build a CPU-based pipeline with chunking
41
  transcriber = pipeline(
42
  task="automatic-speech-recognition",
 
20
 
21
  logging.set_verbosity_error()
22
 
23
+ # —— Model setup ——
 
24
  model_id = "kingabzpro/whisper-base-urdu-full"
25
 
26
+ # Load and quantize to int8
27
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
28
  model_id,
 
29
  use_safetensors=True,
30
  )
 
31
  model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
32
 
33
  processor = AutoProcessor.from_pretrained(model_id)
34
 
 
 
35
  # Build a CPU-based pipeline with chunking
36
  transcriber = pipeline(
37
  task="automatic-speech-recognition",