Spaces:

AmirFARES
/

accentometer

Sleeping

App Files Files Community

AmirFARES commited on May 29, 2025

Commit

6d410a9

1 Parent(s): 731b37b

upgraded the accent detector

Browse files

Files changed (1) hide show

src/detector.py +40 -10

src/detector.py CHANGED Viewed

@@ -14,15 +14,45 @@ model_path = snapshot_download(repo_id="openai/whisper-base", cache_dir=cache_di
 asr = pipeline("automatic-speech-recognition", model=model_path)
 def detect_accent(audio_path: str):
-    result = asr(audio_path, return_timestamps=False, generate_kwargs={"language": "en"})
     text = result["text"].lower()
-    # Heuristic accent classification (mocked rules)
-    if "cheers" in text or "mate" in text:
-        return "British", 85, text
-    elif "gonna" in text or "dude" in text:
-        return "American", 90, text
-    elif "bro" in text or "yeah nah" in text:
-        return "Australian", 80, text
-    else:
-        return "Uncertain", 50, text

 asr = pipeline("automatic-speech-recognition", model=model_path)
 def detect_accent(audio_path: str):
+    result = asr(audio_path, return_timestamps=False)
     text = result["text"].lower()
+    # Score dictionary for multiple accents
+    accent_scores = {
+        "American": 0,
+        "British": 0,
+        "Australian": 0,
+        "Indian": 0,
+        "Other": 0,
+    }
+    # American patterns
+    if any(word in text for word in ["gonna", "wanna", "dude", "gotta"]):
+        accent_scores["American"] += 2
+    if any(word in text for word in ["elevator", "sidewalk", "apartment"]):
+        accent_scores["American"] += 1
+    # British patterns
+    if any(word in text for word in ["mate", "cheers", "lorry", "flat", "rubbish"]):
+        accent_scores["British"] += 2
+    if any(word in text for word in ["colour", "favourite", "centre"]):
+        accent_scores["British"] += 1
+    # Australian patterns
+    if any(word in text for word in ["yeah nah", "arvo", "barbie", "brekkie", "mate"]):
+        accent_scores["Australian"] += 2
+    if "g’day" in text or "no worries" in text:
+        accent_scores["Australian"] += 1
+    # Indian English patterns
+    if any(phrase in text for phrase in ["kindly do the needful", "revert back", "only", "prepone"]):
+        accent_scores["Indian"] += 2
+    if any(word in text for word in ["co-brother", "timepass", "out of station"]):
+        accent_scores["Indian"] += 1
+    # Determine best guess
+    top_accent = max(accent_scores, key=accent_scores.get)
+    confidence = accent_scores[top_accent] * 10 + 50  # basic mock confidence
+    return top_accent, min(confidence, 95), text