fariedalfarizi commited on
Commit
6dc05e5
Β·
1 Parent(s): 60be371

feat: Unified articulation analysis with dual-mode (PER + Clarity)

Browse files

- Add unified ArticulationService supporting WITH/WITHOUT reference text
- WITH reference: PER (40%) + Clarity (30%) + Stability (20%) + Energy (10%)
- WITHOUT reference: Clarity (50%) + Stability (30%) + Energy (20%)
- Add separate FillerWordsService for filler word detection
- Filler words included as bonus info in articulation (not in scoring)
- Fix audio_processor parameter from transcribed_text to transcript
- Add protobuf==3.20.3 to fix Wav2Vec2 compatibility
- Use HF_HOME environment variable for cache directory
- Model: indonesian-nlp/wav2vec2-indonesian-javanese-sundanese

app/services/articulation.py CHANGED
@@ -21,10 +21,16 @@ class ArticulationService:
21
 
22
  # Load Wav2Vec2 Indonesian model untuk phoneme detection
23
  model_name = "indonesian-nlp/wav2vec2-indonesian-javanese-sundanese"
 
 
 
 
 
24
  try:
25
  print(f"πŸ“¦ Loading Wav2Vec2 model: {model_name}")
26
- self.processor = Wav2Vec2Processor.from_pretrained(model_name, cache_dir="/.cache")
27
- self.model = Wav2Vec2ForCTC.from_pretrained(model_name, cache_dir="/.cache")
 
28
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
  self.model.to(self.device)
30
  self.model_loaded = True
 
21
 
22
  # Load Wav2Vec2 Indonesian model untuk phoneme detection
23
  model_name = "indonesian-nlp/wav2vec2-indonesian-javanese-sundanese"
24
+
25
+ # Set cache directory (production: /.cache, local: default)
26
+ import os
27
+ cache_dir = os.environ.get('HF_HOME', '/.cache')
28
+
29
  try:
30
  print(f"πŸ“¦ Loading Wav2Vec2 model: {model_name}")
31
+ print(f"πŸ“ Cache directory: {cache_dir}")
32
+ self.processor = Wav2Vec2Processor.from_pretrained(model_name, cache_dir=cache_dir)
33
+ self.model = Wav2Vec2ForCTC.from_pretrained(model_name, cache_dir=cache_dir)
34
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
35
  self.model.to(self.device)
36
  self.model_loaded = True
app/services/audio_processor.py CHANGED
@@ -131,15 +131,14 @@ class AudioProcessor:
131
  print(f"βœ… Tempo score: {results['tempo']['score']}/5\n")
132
 
133
  # 3. Articulation Analysis
134
- if analyze_articulation and reference_text:
135
  print("πŸ—£οΈ Step 3/6: Analyzing articulation...")
136
  results['articulation'] = self.articulation_service.analyze(
137
- transcribed_text=transcript,
138
- reference_text=reference_text
 
139
  )
140
  print(f"βœ… Articulation score: {results['articulation']['score']}/5\n")
141
- elif analyze_articulation:
142
- print("⚠️ Step 3/6: Skipping articulation (no reference text)\n")
143
 
144
  # 4. Structure Analysis
145
  if analyze_structure:
 
131
  print(f"βœ… Tempo score: {results['tempo']['score']}/5\n")
132
 
133
  # 3. Articulation Analysis
134
+ if analyze_articulation:
135
  print("πŸ—£οΈ Step 3/6: Analyzing articulation...")
136
  results['articulation'] = self.articulation_service.analyze(
137
+ audio_path=audio_path,
138
+ transcript=transcript,
139
+ reference_text=reference_text if reference_text else None
140
  )
141
  print(f"βœ… Articulation score: {results['articulation']['score']}/5\n")
 
 
142
 
143
  # 4. Structure Analysis
144
  if analyze_structure: