Commit
Β·
6dc05e5
1
Parent(s):
60be371
feat: Unified articulation analysis with dual-mode (PER + Clarity)
Browse files- Add unified ArticulationService supporting WITH/WITHOUT reference text
- WITH reference: PER (40%) + Clarity (30%) + Stability (20%) + Energy (10%)
- WITHOUT reference: Clarity (50%) + Stability (30%) + Energy (20%)
- Add separate FillerWordsService for filler word detection
- Filler words included as bonus info in articulation (not in scoring)
- Fix audio_processor parameter from transcribed_text to transcript
- Add protobuf==3.20.3 to fix Wav2Vec2 compatibility
- Use HF_HOME environment variable for cache directory
- Model: indonesian-nlp/wav2vec2-indonesian-javanese-sundanese
app/services/articulation.py
CHANGED
|
@@ -21,10 +21,16 @@ class ArticulationService:
|
|
| 21 |
|
| 22 |
# Load Wav2Vec2 Indonesian model untuk phoneme detection
|
| 23 |
model_name = "indonesian-nlp/wav2vec2-indonesian-javanese-sundanese"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
try:
|
| 25 |
print(f"π¦ Loading Wav2Vec2 model: {model_name}")
|
| 26 |
-
|
| 27 |
-
self.
|
|
|
|
| 28 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 29 |
self.model.to(self.device)
|
| 30 |
self.model_loaded = True
|
|
|
|
| 21 |
|
| 22 |
# Load Wav2Vec2 Indonesian model untuk phoneme detection
|
| 23 |
model_name = "indonesian-nlp/wav2vec2-indonesian-javanese-sundanese"
|
| 24 |
+
|
| 25 |
+
# Set cache directory (production: /.cache, local: default)
|
| 26 |
+
import os
|
| 27 |
+
cache_dir = os.environ.get('HF_HOME', '/.cache')
|
| 28 |
+
|
| 29 |
try:
|
| 30 |
print(f"π¦ Loading Wav2Vec2 model: {model_name}")
|
| 31 |
+
print(f"π Cache directory: {cache_dir}")
|
| 32 |
+
self.processor = Wav2Vec2Processor.from_pretrained(model_name, cache_dir=cache_dir)
|
| 33 |
+
self.model = Wav2Vec2ForCTC.from_pretrained(model_name, cache_dir=cache_dir)
|
| 34 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 35 |
self.model.to(self.device)
|
| 36 |
self.model_loaded = True
|
app/services/audio_processor.py
CHANGED
|
@@ -131,15 +131,14 @@ class AudioProcessor:
|
|
| 131 |
print(f"β
Tempo score: {results['tempo']['score']}/5\n")
|
| 132 |
|
| 133 |
# 3. Articulation Analysis
|
| 134 |
-
if analyze_articulation
|
| 135 |
print("π£οΈ Step 3/6: Analyzing articulation...")
|
| 136 |
results['articulation'] = self.articulation_service.analyze(
|
| 137 |
-
|
| 138 |
-
|
|
|
|
| 139 |
)
|
| 140 |
print(f"β
Articulation score: {results['articulation']['score']}/5\n")
|
| 141 |
-
elif analyze_articulation:
|
| 142 |
-
print("β οΈ Step 3/6: Skipping articulation (no reference text)\n")
|
| 143 |
|
| 144 |
# 4. Structure Analysis
|
| 145 |
if analyze_structure:
|
|
|
|
| 131 |
print(f"β
Tempo score: {results['tempo']['score']}/5\n")
|
| 132 |
|
| 133 |
# 3. Articulation Analysis
|
| 134 |
+
if analyze_articulation:
|
| 135 |
print("π£οΈ Step 3/6: Analyzing articulation...")
|
| 136 |
results['articulation'] = self.articulation_service.analyze(
|
| 137 |
+
audio_path=audio_path,
|
| 138 |
+
transcript=transcript,
|
| 139 |
+
reference_text=reference_text if reference_text else None
|
| 140 |
)
|
| 141 |
print(f"β
Articulation score: {results['articulation']['score']}/5\n")
|
|
|
|
|
|
|
| 142 |
|
| 143 |
# 4. Structure Analysis
|
| 144 |
if analyze_structure:
|