Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,13 +38,6 @@ except Exception as e:
|
|
| 38 |
print(f"Error loading pyannote pipeline: {type(e).__name__}: {e}. Diarization will be skipped.")
|
| 39 |
diarization_pipeline = None
|
| 40 |
global_diarizer = diarization_pipeline
|
| 41 |
-
ALIGN_MODEL_MAP = {
|
| 42 |
-
"ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu",
|
| 43 |
-
"pa": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu",
|
| 44 |
-
"sd": "Abdullah104/wav2vec2-large-xls-r-300m-sindhi-kaggle",
|
| 45 |
-
"ps": "ihanif/wav2vec2-xls-r-300m-pashto",
|
| 46 |
-
}
|
| 47 |
-
global_align_model_cache = {}
|
| 48 |
model_name = "large-v3"
|
| 49 |
class TimelineItem(BaseModel):
|
| 50 |
start: float
|
|
@@ -165,7 +158,6 @@ def analyze_audio(audio_file: str,
|
|
| 165 |
preprocess_params: Optional[Dict[str, Any]] = None) -> AnalysisResults:
|
| 166 |
|
| 167 |
results = AnalysisResults()
|
| 168 |
-
global global_align_model_cache, ALIGN_MODEL_MAP
|
| 169 |
ends: List[float] = []
|
| 170 |
rows: List[Dict[str, Any]] = []
|
| 171 |
rawTranscriptionText: str = ""
|
|
@@ -210,27 +202,6 @@ def analyze_audio(audio_file: str,
|
|
| 210 |
print(f"Detected language: {languageCode}. Aligning transcription...")
|
| 211 |
align_model = None
|
| 212 |
metadata = None
|
| 213 |
-
if language_code not in global_align_model_cache:
|
| 214 |
-
align_model_name = ALIGN_MODEL_MAP.get(language_code)
|
| 215 |
-
try:
|
| 216 |
-
if align_model_name:
|
| 217 |
-
print(f"Loading custom alignment model for {language_code}: {align_model_name}...")
|
| 218 |
-
align_model, metadata = whisperx.load_align_model(
|
| 219 |
-
language_code=language_code,
|
| 220 |
-
model_name=align_model_name,
|
| 221 |
-
device=device
|
| 222 |
-
)
|
| 223 |
-
global_align_model_cache[language_code] = (align_model, metadata)
|
| 224 |
-
print(f"Alignment model loaded/cached for language: {language_code}")
|
| 225 |
-
|
| 226 |
-
except Exception as e:
|
| 227 |
-
warn(results, "ALIGN_LOAD_FAIL", f"Failed to load alignment model for {language_code}: {e}. Alignment skipped.")
|
| 228 |
-
global_align_model_cache[language_code] = (None, None)
|
| 229 |
-
else:
|
| 230 |
-
align_model, metadata = global_align_model_cache[language_code]
|
| 231 |
-
if align_model:
|
| 232 |
-
print(f"Alignment model loaded from cache for language: {language_code}")
|
| 233 |
-
|
| 234 |
if align_model:
|
| 235 |
try:
|
| 236 |
print("Performing word-level alignment...")
|
|
|
|
| 38 |
print(f"Error loading pyannote pipeline: {type(e).__name__}: {e}. Diarization will be skipped.")
|
| 39 |
diarization_pipeline = None
|
| 40 |
global_diarizer = diarization_pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
model_name = "large-v3"
|
| 42 |
class TimelineItem(BaseModel):
|
| 43 |
start: float
|
|
|
|
| 158 |
preprocess_params: Optional[Dict[str, Any]] = None) -> AnalysisResults:
|
| 159 |
|
| 160 |
results = AnalysisResults()
|
|
|
|
| 161 |
ends: List[float] = []
|
| 162 |
rows: List[Dict[str, Any]] = []
|
| 163 |
rawTranscriptionText: str = ""
|
|
|
|
| 202 |
print(f"Detected language: {languageCode}. Aligning transcription...")
|
| 203 |
align_model = None
|
| 204 |
metadata = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
if align_model:
|
| 206 |
try:
|
| 207 |
print("Performing word-level alignment...")
|