Arwaaaa commited on
Commit
14bda6c
·
1 Parent(s): 3ce1394
Files changed (3) hide show
  1. .env +6 -6
  2. src/database.py +3 -1
  3. src/model.py +21 -37
.env CHANGED
@@ -1,7 +1,7 @@
1
- SAMPLE_RATE = 16000
2
- CHANNELS = 1
3
- SAMPLE_WIDTH = 2
4
 
5
- CHUNK_DURATION_MS = 500
6
- DB_PATH = "audio_sessions.db"
7
- AUDIO_STORAGE_DIR = "audio_chunks"
 
1
+ SAMPLE_RATE=16000
2
+ CHANNELS=1
3
+ SAMPLE_WIDTH=2
4
 
5
+ CHUNK_DURATION_MS=500
6
+ DB_PATH=/tmp/audio_sessions.db
7
+ AUDIO_STORAGE_DIR=/tmp/audio_chunks
src/database.py CHANGED
@@ -17,7 +17,9 @@ def init_db():
17
  actual_duration_ms REAL,
18
  sura_number INTEGER,
19
  ayat_begin INTEGER,
20
- ayat_end INTEGER
 
 
21
  )
22
  ''')
23
  conn.commit()
 
17
  actual_duration_ms REAL,
18
  sura_number INTEGER,
19
  ayat_begin INTEGER,
20
+ ayat_end INTEGER,
21
+ word_begin INTEGER,
22
+ word_end INTEGER
23
  )
24
  ''')
25
  conn.commit()
src/model.py CHANGED
@@ -1,58 +1,42 @@
1
  import os
2
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
 
4
- # Hugging Face model repo
5
  MODEL_NAME = "tarteel-ai/whisper-tiny-ar-quran"
6
 
7
- # Local cache path (works locally, or will fallback to /tmp in Spaces)
8
- LOCAL_SAVE_DIR = os.path.abspath("./whisper-tiny-ar-quran-local")
9
- HF_SPACE_CACHE = "/tmp/whisper-tiny-ar-quran"
 
 
 
 
 
 
 
 
10
 
11
- # Globals (singleton pattern)
12
  _model = None
13
  _processor = None
14
 
15
-
16
  def load_model():
17
- """
18
- Load Whisper model + processor.
19
- Priority:
20
- 1. Reuse in-memory model if already loaded.
21
- 2. Load from local_save_dir if exists.
22
- 3. Otherwise, download from HuggingFace Hub.
23
- - If running on HuggingFace Spaces, use /tmp to avoid permission issues.
24
- - Save locally when possible.
25
- """
26
  global _model, _processor
27
 
28
  if _model is not None and _processor is not None:
29
  return _model, _processor
30
 
31
- # Pick best cache dir (local if writable, else /tmp)
32
- target_dir = LOCAL_SAVE_DIR
33
- try:
34
- os.makedirs(LOCAL_SAVE_DIR, exist_ok=True)
35
- test_path = os.path.join(LOCAL_SAVE_DIR, "write_test.txt")
36
- with open(test_path, "w") as f:
37
- f.write("ok")
38
- os.remove(test_path)
39
- except (PermissionError, OSError):
40
- target_dir = HF_SPACE_CACHE
41
- os.makedirs(target_dir, exist_ok=True)
42
-
43
- local_config_file = os.path.join(target_dir, "preprocessor_config.json")
44
 
45
  if os.path.exists(local_config_file):
46
- _processor = WhisperProcessor.from_pretrained(target_dir)
47
- _model = WhisperForConditionalGeneration.from_pretrained(target_dir)
48
  else:
49
- _processor = WhisperProcessor.from_pretrained(MODEL_NAME)
50
- _model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME)
51
 
52
- _model.save_pretrained(target_dir)
53
- _processor.save_pretrained(target_dir)
54
 
55
- # Patch configs (for Arabic transcribe)
56
  if not hasattr(_model.generation_config, "lang_to_id"):
57
  _model.generation_config.lang_to_id = {"arabic": 50361}
58
  if not hasattr(_model.generation_config, "task_to_id"):
@@ -61,5 +45,5 @@ def load_model():
61
  _model.generation_config.language = "arabic"
62
  _model.generation_config.task = "transcribe"
63
 
64
- print(f"✅ Model and processor loaded from: {target_dir}")
65
- return _model, _processor
 
1
  import os
2
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
 
 
4
  MODEL_NAME = "tarteel-ai/whisper-tiny-ar-quran"
5
 
6
+ # Use /tmp for all writable storage
7
+ CACHE_DIR = "/tmp/hf_cache"
8
+ LOCAL_SAVE_DIR = "/tmp/whisper-tiny-ar-quran-local"
9
+
10
+ # Make sure dirs exist
11
+ os.makedirs(CACHE_DIR, exist_ok=True)
12
+ os.makedirs(LOCAL_SAVE_DIR, exist_ok=True)
13
+
14
+ os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
15
+ os.environ["HF_HOME"] = CACHE_DIR
16
+ os.environ["HF_HUB_CACHE"] = CACHE_DIR
17
 
 
18
  _model = None
19
  _processor = None
20
 
 
21
  def load_model():
 
 
 
 
 
 
 
 
 
22
  global _model, _processor
23
 
24
  if _model is not None and _processor is not None:
25
  return _model, _processor
26
 
27
+ local_config_file = os.path.join(LOCAL_SAVE_DIR, "preprocessor_config.json")
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  if os.path.exists(local_config_file):
30
+ _processor = WhisperProcessor.from_pretrained(LOCAL_SAVE_DIR)
31
+ _model = WhisperForConditionalGeneration.from_pretrained(LOCAL_SAVE_DIR)
32
  else:
33
+ _processor = WhisperProcessor.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
34
+ _model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
35
 
36
+ _model.save_pretrained(LOCAL_SAVE_DIR)
37
+ _processor.save_pretrained(LOCAL_SAVE_DIR)
38
 
39
+ # Patch configs
40
  if not hasattr(_model.generation_config, "lang_to_id"):
41
  _model.generation_config.lang_to_id = {"arabic": 50361}
42
  if not hasattr(_model.generation_config, "task_to_id"):
 
45
  _model.generation_config.language = "arabic"
46
  _model.generation_config.task = "transcribe"
47
 
48
+ print(f"✅ Model and processor loaded from: {LOCAL_SAVE_DIR}")
49
+ return _model, _processor