Spaces:

AbdoIR
/

stt-model

Running

App Files Files Community

AbdoIR commited on Aug 20, 2025

Commit

48d8acc

verified ·

1 Parent(s): a703706

Update main.py

Browse files

Files changed (1) hide show

main.py +11 -17

main.py CHANGED Viewed

@@ -10,17 +10,15 @@ from transformers import (
     AutoModelForSeq2SeqLM,
     pipeline
 )
-from huggingface_hub import snapshot_download
 from torch.quantization import quantize_dynamic
 import logging
 import ffmpeg
 import tempfile
-# Force HF cache to /tmp
 os.environ["HF_HOME"] = "/tmp/huggingface"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
 os.environ["HF_HUB_CACHE"] = "/tmp/huggingface/hub"
 os.makedirs(os.environ["HF_HOME"], exist_ok=True)
 # Silence all transformers and huggingface logging
@@ -31,22 +29,20 @@ logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
 app = Flask(__name__)
 CORS(app)
-# ========== Load Whisper Model (quantized + small) ==========
-def load_whisper_model(model_size="small", save_dir="/tmp/models_cache/whisper"):
-    os.makedirs(save_dir, exist_ok=True)
     model_name = f"openai/whisper-{model_size}"
-    processor = WhisperProcessor.from_pretrained(model_name, cache_dir=save_dir)
-    model = WhisperForConditionalGeneration.from_pretrained(model_name, cache_dir=save_dir)
     model = quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
     model.to("cuda" if torch.cuda.is_available() else "cpu")
     return processor, model
 # ========== Load Grammar Correction Model (quantized) ==========
-def load_grammar_model(save_dir="/tmp/models_cache/grammar_corrector"):
-    os.makedirs(save_dir, exist_ok=True)
     model_name = "prithivida/grammar_error_correcter_v1"
-    tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=save_dir)
-    model = AutoModelForSeq2SeqLM.from_pretrained(model_name, cache_dir=save_dir)
     model = quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
     grammar_pipeline = pipeline(
         "text2text-generation",
@@ -115,10 +111,8 @@ def correct_grammar(text, grammar_pipeline):
     return '. '.join([r['generated_text'] for r in results])
 # ========== Initialize Models ==========
-# processor, whisper_model = load_whisper_model("small")
-# grammar_pipeline = load_grammar_model()
-processor = WhisperProcessor.from_pretrained(model_name)
-model = WhisperForConditionalGeneration.from_pretrained(model_name)
 # ========== Warm-Up Models ==========
 def warm_up_models():
@@ -158,4 +152,4 @@ def transcribe():
 # ========== Run App ==========
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", debug=False, port=7860)

     AutoModelForSeq2SeqLM,
     pipeline
 )
 from torch.quantization import quantize_dynamic
 import logging
 import ffmpeg
 import tempfile
+# ========== Force HF cache to /tmp ==========
 os.environ["HF_HOME"] = "/tmp/huggingface"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
 os.environ["HF_HUB_CACHE"] = "/tmp/huggingface/hub"
 os.makedirs(os.environ["HF_HOME"], exist_ok=True)
 # Silence all transformers and huggingface logging
 app = Flask(__name__)
 CORS(app)
+# ========== Load Whisper Model (quantized) ==========
+def load_whisper_model(model_size="small"):
     model_name = f"openai/whisper-{model_size}"
+    processor = WhisperProcessor.from_pretrained(model_name)
+    model = WhisperForConditionalGeneration.from_pretrained(model_name)
     model = quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
     model.to("cuda" if torch.cuda.is_available() else "cpu")
     return processor, model
 # ========== Load Grammar Correction Model (quantized) ==========
+def load_grammar_model():
     model_name = "prithivida/grammar_error_correcter_v1"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
     model = quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
     grammar_pipeline = pipeline(
         "text2text-generation",
     return '. '.join([r['generated_text'] for r in results])
 # ========== Initialize Models ==========
+processor, whisper_model = load_whisper_model("small")
+grammar_pipeline = load_grammar_model()
 # ========== Warm-Up Models ==========
 def warm_up_models():
 # ========== Run App ==========
 if __name__ == '__main__':
+    app.run(host="0.0.0.0", debug=False, port=7860)