Spaces:
Sleeping
Sleeping
| import os | |
| from huggingface_hub import snapshot_download | |
| from transformers import pipeline | |
| # Use /tmp which is always writable in Hugging Face Spaces | |
| cache_dir = "/tmp/hf_cache" | |
| os.makedirs(cache_dir, exist_ok=True) | |
| os.environ["TRANSFORMERS_CACHE"] = cache_dir | |
| # Download model to the writable tmp directory | |
| model_path = snapshot_download(repo_id="openai/whisper-base", cache_dir=cache_dir) | |
| # Load the pipeline from the local path | |
| asr = pipeline("automatic-speech-recognition", model=model_path) | |
| def detect_accent(audio_path: str): | |
| result = asr(audio_path, return_timestamps=True) | |
| text = result["text"].lower() | |
| # Score dictionary for multiple accents | |
| accent_scores = { | |
| "American": 0, | |
| "British": 0, | |
| "Australian": 0, | |
| "Indian": 0, | |
| } | |
| # American patterns | |
| if any(word in text for word in ["gonna", "wanna", "dude", "gotta"]): | |
| accent_scores["American"] += 2 | |
| if any(word in text for word in ["elevator", "sidewalk", "apartment"]): | |
| accent_scores["American"] += 1 | |
| # British patterns | |
| if any(word in text for word in ["mate", "cheers", "lorry", "flat", "rubbish"]): | |
| accent_scores["British"] += 2 | |
| if any(word in text for word in ["colour", "favourite", "centre"]): | |
| accent_scores["British"] += 1 | |
| # Australian patterns | |
| if any(word in text for word in ["yeah nah", "arvo", "barbie", "brekkie"]): | |
| accent_scores["Australian"] += 2 | |
| if "g’day" in text or "no worries" in text: | |
| accent_scores["Australian"] += 1 | |
| # Indian English patterns | |
| if any(phrase in text for phrase in ["kindly do the needful", "revert back", "only", "prepone"]): | |
| accent_scores["Indian"] += 2 | |
| if any(word in text for word in ["co-brother", "timepass", "out of station"]): | |
| accent_scores["Indian"] += 1 | |
| # Determine best guess or fallback to "Unknown" | |
| top_accent = max(accent_scores, key=accent_scores.get) | |
| if accent_scores[top_accent] == 0: | |
| return "Unknown", 50, text # default fallback | |
| confidence = accent_scores[top_accent] * 10 + 50 # simple mock scoring | |
| return top_accent, min(confidence, 95), text |