import os from huggingface_hub import snapshot_download from transformers import pipeline # Use /tmp which is always writable in Hugging Face Spaces cache_dir = "/tmp/hf_cache" os.makedirs(cache_dir, exist_ok=True) os.environ["TRANSFORMERS_CACHE"] = cache_dir # Download model to the writable tmp directory model_path = snapshot_download(repo_id="openai/whisper-base", cache_dir=cache_dir) # Load the pipeline from the local path asr = pipeline("automatic-speech-recognition", model=model_path) def detect_accent(audio_path: str): result = asr(audio_path, return_timestamps=True) text = result["text"].lower() # Score dictionary for multiple accents accent_scores = { "American": 0, "British": 0, "Australian": 0, "Indian": 0, } # American patterns if any(word in text for word in ["gonna", "wanna", "dude", "gotta"]): accent_scores["American"] += 2 if any(word in text for word in ["elevator", "sidewalk", "apartment"]): accent_scores["American"] += 1 # British patterns if any(word in text for word in ["mate", "cheers", "lorry", "flat", "rubbish"]): accent_scores["British"] += 2 if any(word in text for word in ["colour", "favourite", "centre"]): accent_scores["British"] += 1 # Australian patterns if any(word in text for word in ["yeah nah", "arvo", "barbie", "brekkie"]): accent_scores["Australian"] += 2 if "g’day" in text or "no worries" in text: accent_scores["Australian"] += 1 # Indian English patterns if any(phrase in text for phrase in ["kindly do the needful", "revert back", "only", "prepone"]): accent_scores["Indian"] += 2 if any(word in text for word in ["co-brother", "timepass", "out of station"]): accent_scores["Indian"] += 1 # Determine best guess or fallback to "Unknown" top_accent = max(accent_scores, key=accent_scores.get) if accent_scores[top_accent] == 0: return "Unknown", 50, text # default fallback confidence = accent_scores[top_accent] * 10 + 50 # simple mock scoring return top_accent, min(confidence, 95), text