Spaces:
Sleeping
Sleeping
| import tempfile | |
| import os | |
| from groq import Groq | |
| from models import SpeechResult | |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| class SpeechAnalyzer: | |
| def transcribe(self, audio_bytes: bytes) -> SpeechResult: | |
| try: | |
| with tempfile.NamedTemporaryFile( | |
| suffix=".wav", delete=False) as tmp: | |
| tmp.write(audio_bytes) | |
| tmp_path = tmp.name | |
| with open(tmp_path, "rb") as f: | |
| result = client.audio.transcriptions.create( | |
| file=f, | |
| model="whisper-large-v3-turbo", | |
| response_format="verbose_json", | |
| ) | |
| os.unlink(tmp_path) | |
| metrics = self._extract_metrics(result) | |
| return SpeechResult( | |
| success=True, | |
| text=result.text.strip(), | |
| language=result.language, | |
| words_per_minute=metrics["words_per_minute"], | |
| pause_count=metrics["pause_count"], | |
| clarity_score=metrics["clarity_score"], | |
| speech_pace=metrics["speech_pace"], | |
| ) | |
| except Exception as e: | |
| return SpeechResult( | |
| success=False, | |
| message=str(e), | |
| text="", | |
| language="unknown", | |
| words_per_minute=0, | |
| pause_count=0, | |
| clarity_score=0, | |
| speech_pace="unknown", | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Private helpers | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def _extract_metrics(self, result) -> dict: | |
| segments = getattr(result, "segments", []) or [] | |
| if not segments: | |
| return { | |
| "words_per_minute": 0, | |
| "pause_count": 0, | |
| "clarity_score": 0, | |
| "speech_pace": "unknown", | |
| } | |
| # ββ WPM ββββββββββββββββββββββββββββββββββ | |
| total_duration = segments[-1]["end"] - segments[0]["start"] | |
| total_words = sum(len(s["text"].split()) for s in segments) | |
| wpm = ( | |
| round(total_words / total_duration * 60, 2) | |
| if total_duration > 0 | |
| else 0 | |
| ) | |
| # ββ Pace label βββββββββββββββββββββββββββ | |
| if wpm < 70: | |
| pace = "too_slow" | |
| elif wpm <= 160: | |
| pace = "normal" | |
| elif wpm <= 190: | |
| pace = "slightly_fast" | |
| else: | |
| pace = "too_fast" | |
| # ββ Pauses (gaps > 2s between segments) ββ | |
| pause_count = sum( | |
| 1 | |
| for i in range(1, len(segments)) | |
| if segments[i]["start"] - segments[i - 1]["end"] > 2 | |
| ) | |
| # ββ Clarity ββββββββββββββββββββββββββββββ | |
| clarity_score = self._calculate_clarity(segments) | |
| return { | |
| "words_per_minute": wpm, | |
| "pause_count": pause_count, | |
| "clarity_score": clarity_score, | |
| "speech_pace": pace, | |
| } | |
| def _calculate_clarity(self, segments: list) -> float: | |
| """ | |
| avg_logprob is negative. Closer to 0 = clearer speech. | |
| Typical range: -0.2 (very clear) to -1.0+ (unclear/noise). | |
| Map [-1.0, 0.0] β [0, 100]: | |
| -0.0 β 100 | |
| -0.2 β 80 (good interview speech) | |
| -0.5 β 50 | |
| -1.0 β 0 | |
| """ | |
| valid = [ | |
| s for s in segments | |
| if s.get("avg_logprob") is not None | |
| ] | |
| if not valid: | |
| return self._clarity_fallback(segments) | |
| raw_logprobs = [s["avg_logprob"] for s in valid] | |
| avg_logprob = sum(raw_logprobs) / len(raw_logprobs) # negative number | |
| # ΩΩΨ³ formula Ψ§ΩΩΩΨ― Ψ§ΩΩΨ―ΩΩ Ψ¨Ψ§ΩΨΈΨ¨Ψ· | |
| score = (avg_logprob + 1.0) * 100.0 | |
| return round(max(0.0, min(100.0, score)), 2) | |
| def _clarity_fallback(self, segments: list) -> float: | |
| """ | |
| ΩΩ avg_logprob Ω Ψ΄ Ω ΩΨ¬ΩΨ― ΩΨ£Ω Ψ³Ψ¨Ψ¨ β | |
| Ψ¨ΩΨΨ³Ψ¨ clarity Ω Ω ΨΉΨ―Ψ― Ψ§ΩΩΩΩ Ψ§Ψͺ Ψ§ΩΨΊΩΨ± ΩΨ§ΨΆΨΨ©. | |
| """ | |
| if not segments: | |
| return 0.0 | |
| unclear_markers = ["[inaudible]", "[unclear]", "...", " uh ", " um "] | |
| unclear_count = sum( | |
| 1 for s in segments | |
| for marker in unclear_markers | |
| if marker in s.get("text", "").lower() | |
| ) | |
| unclear_ratio = unclear_count / len(segments) | |
| return round(max(0.0, min(100.0, (1 - unclear_ratio) * 100)), 2) |