AmirFARES commited on
Commit
f6b78b1
·
1 Parent(s): 355154b

fixed detector

Browse files
Files changed (1) hide show
  1. src/detector.py +7 -7
src/detector.py CHANGED
@@ -14,7 +14,7 @@ model_path = snapshot_download(repo_id="openai/whisper-base", cache_dir=cache_di
14
  asr = pipeline("automatic-speech-recognition", model=model_path)
15
 
16
  def detect_accent(audio_path: str):
17
- result = asr(audio_path, return_timestamps=False)
18
  text = result["text"].lower()
19
 
20
  # Score dictionary for multiple accents
@@ -23,7 +23,6 @@ def detect_accent(audio_path: str):
23
  "British": 0,
24
  "Australian": 0,
25
  "Indian": 0,
26
- "Other": 0,
27
  }
28
 
29
  # American patterns
@@ -39,7 +38,7 @@ def detect_accent(audio_path: str):
39
  accent_scores["British"] += 1
40
 
41
  # Australian patterns
42
- if any(word in text for word in ["yeah nah", "arvo", "barbie", "brekkie", "mate"]):
43
  accent_scores["Australian"] += 2
44
  if "g’day" in text or "no worries" in text:
45
  accent_scores["Australian"] += 1
@@ -50,9 +49,10 @@ def detect_accent(audio_path: str):
50
  if any(word in text for word in ["co-brother", "timepass", "out of station"]):
51
  accent_scores["Indian"] += 1
52
 
53
- # Determine best guess
54
  top_accent = max(accent_scores, key=accent_scores.get)
55
- confidence = accent_scores[top_accent] * 10 + 50 # basic mock confidence
56
-
57
- return top_accent, min(confidence, 95), text
58
 
 
 
 
14
  asr = pipeline("automatic-speech-recognition", model=model_path)
15
 
16
  def detect_accent(audio_path: str):
17
+ result = asr(audio_path, return_timestamps=True)
18
  text = result["text"].lower()
19
 
20
  # Score dictionary for multiple accents
 
23
  "British": 0,
24
  "Australian": 0,
25
  "Indian": 0,
 
26
  }
27
 
28
  # American patterns
 
38
  accent_scores["British"] += 1
39
 
40
  # Australian patterns
41
+ if any(word in text for word in ["yeah nah", "arvo", "barbie", "brekkie"]):
42
  accent_scores["Australian"] += 2
43
  if "g’day" in text or "no worries" in text:
44
  accent_scores["Australian"] += 1
 
49
  if any(word in text for word in ["co-brother", "timepass", "out of station"]):
50
  accent_scores["Indian"] += 1
51
 
52
+ # Determine best guess or fallback to "Unknown"
53
  top_accent = max(accent_scores, key=accent_scores.get)
54
+ if accent_scores[top_accent] == 0:
55
+ return "Unknown", 50, text # default fallback
 
56
 
57
+ confidence = accent_scores[top_accent] * 10 + 50 # simple mock scoring
58
+ return top_accent, min(confidence, 95), text