AmirFARES commited on
Commit
6d410a9
·
1 Parent(s): 731b37b

upgraded the accent detector

Browse files
Files changed (1) hide show
  1. src/detector.py +40 -10
src/detector.py CHANGED
@@ -14,15 +14,45 @@ model_path = snapshot_download(repo_id="openai/whisper-base", cache_dir=cache_di
14
  asr = pipeline("automatic-speech-recognition", model=model_path)
15
 
16
  def detect_accent(audio_path: str):
17
- result = asr(audio_path, return_timestamps=False, generate_kwargs={"language": "en"})
18
  text = result["text"].lower()
19
 
20
- # Heuristic accent classification (mocked rules)
21
- if "cheers" in text or "mate" in text:
22
- return "British", 85, text
23
- elif "gonna" in text or "dude" in text:
24
- return "American", 90, text
25
- elif "bro" in text or "yeah nah" in text:
26
- return "Australian", 80, text
27
- else:
28
- return "Uncertain", 50, text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  asr = pipeline("automatic-speech-recognition", model=model_path)
15
 
16
  def detect_accent(audio_path: str):
17
+ result = asr(audio_path, return_timestamps=False)
18
  text = result["text"].lower()
19
 
20
+ # Score dictionary for multiple accents
21
+ accent_scores = {
22
+ "American": 0,
23
+ "British": 0,
24
+ "Australian": 0,
25
+ "Indian": 0,
26
+ "Other": 0,
27
+ }
28
+
29
+ # American patterns
30
+ if any(word in text for word in ["gonna", "wanna", "dude", "gotta"]):
31
+ accent_scores["American"] += 2
32
+ if any(word in text for word in ["elevator", "sidewalk", "apartment"]):
33
+ accent_scores["American"] += 1
34
+
35
+ # British patterns
36
+ if any(word in text for word in ["mate", "cheers", "lorry", "flat", "rubbish"]):
37
+ accent_scores["British"] += 2
38
+ if any(word in text for word in ["colour", "favourite", "centre"]):
39
+ accent_scores["British"] += 1
40
+
41
+ # Australian patterns
42
+ if any(word in text for word in ["yeah nah", "arvo", "barbie", "brekkie", "mate"]):
43
+ accent_scores["Australian"] += 2
44
+ if "g’day" in text or "no worries" in text:
45
+ accent_scores["Australian"] += 1
46
+
47
+ # Indian English patterns
48
+ if any(phrase in text for phrase in ["kindly do the needful", "revert back", "only", "prepone"]):
49
+ accent_scores["Indian"] += 2
50
+ if any(word in text for word in ["co-brother", "timepass", "out of station"]):
51
+ accent_scores["Indian"] += 1
52
+
53
+ # Determine best guess
54
+ top_accent = max(accent_scores, key=accent_scores.get)
55
+ confidence = accent_scores[top_accent] * 10 + 50 # basic mock confidence
56
+
57
+ return top_accent, min(confidence, 95), text
58
+