Spaces:

AmirFARES
/

accentometer

Sleeping

App Files Files Community

accentometer / src /detector.py

AmirFARES

fixed detector

f6b78b1 10 months ago

raw

history blame contribute delete

2.16 kB

	import os
	from huggingface_hub import snapshot_download
	from transformers import pipeline

	# Use /tmp which is always writable in Hugging Face Spaces
	cache_dir = "/tmp/hf_cache"
	os.makedirs(cache_dir, exist_ok=True)
	os.environ["TRANSFORMERS_CACHE"] = cache_dir

	# Download model to the writable tmp directory
	model_path = snapshot_download(repo_id="openai/whisper-base", cache_dir=cache_dir)

	# Load the pipeline from the local path
	asr = pipeline("automatic-speech-recognition", model=model_path)

	def detect_accent(audio_path: str):
	result = asr(audio_path, return_timestamps=True)
	text = result["text"].lower()

	# Score dictionary for multiple accents
	accent_scores = {
	"American": 0,
	"British": 0,
	"Australian": 0,
	"Indian": 0,
	}

	# American patterns
	if any(word in text for word in ["gonna", "wanna", "dude", "gotta"]):
	accent_scores["American"] += 2
	if any(word in text for word in ["elevator", "sidewalk", "apartment"]):
	accent_scores["American"] += 1

	# British patterns
	if any(word in text for word in ["mate", "cheers", "lorry", "flat", "rubbish"]):
	accent_scores["British"] += 2
	if any(word in text for word in ["colour", "favourite", "centre"]):
	accent_scores["British"] += 1

	# Australian patterns
	if any(word in text for word in ["yeah nah", "arvo", "barbie", "brekkie"]):
	accent_scores["Australian"] += 2
	if "g’day" in text or "no worries" in text:
	accent_scores["Australian"] += 1

	# Indian English patterns
	if any(phrase in text for phrase in ["kindly do the needful", "revert back", "only", "prepone"]):
	accent_scores["Indian"] += 2
	if any(word in text for word in ["co-brother", "timepass", "out of station"]):
	accent_scores["Indian"] += 1

	# Determine best guess or fallback to "Unknown"
	top_accent = max(accent_scores, key=accent_scores.get)
	if accent_scores[top_accent] == 0:
	return "Unknown", 50, text # default fallback

	confidence = accent_scores[top_accent] * 10 + 50 # simple mock scoring
	return top_accent, min(confidence, 95), text