Spaces:

UcsTurkey
/

flare

Paused

App Files Files Community

ciyidogan commited on Jul 11, 2025

Commit

59e181e

verified ·

1 Parent(s): 89d0af3

Update stt/stt_google.py

Browse files

Files changed (1) hide show

stt/stt_google.py +56 -57

stt/stt_google.py CHANGED Viewed

@@ -187,6 +187,42 @@ class GoogleSTT(STTInterface):
                 log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
                 # ✅ Audio analizi
                 self._analyze_audio_content(audio_data)
@@ -197,69 +233,32 @@ class GoogleSTT(STTInterface):
                     log_warning("⚠️ Audio too short after trimming")
                     return None
-                # ✅ Test kodundan EXACT aynı format - wave modülü kullan
-                wav_audio = self._create_wav_like_test(trimmed_audio, config.sample_rate)
-                log_info(f"🔧 WAV conversion: {len(trimmed_audio)} PCM → {len(wav_audio)} WAV")
-                # Configure recognition - TEST KODUNDAN EXACT AYNI
-                recognition_config = RecognitionConfig(
-                    encoding=RecognitionConfig.AudioEncoding.LINEAR16,
-                    sample_rate_hertz=config.sample_rate,
-                    language_code="tr-TR",  # Hardcode tr-TR like test
-                    audio_channel_count=1,
-                    enable_separate_recognition_per_channel=False,
-                )
-                log_debug(f"Recognition config: language=tr-TR, sample_rate={config.sample_rate}")
-                # ✅ Create audio object with WAV data
-                audio = RecognitionAudio(content=wav_audio)
-                # Perform synchronous recognition
-                log_info(f"🔄 Sending {len(wav_audio)} bytes WAV to Google Cloud Speech API...")
-                response = self.client.recognize(config=recognition_config, audio=audio)
-                # ✅ Detaylı response analizi
-                log_debug(f"API Response: {response}")
-                log_info(f"🔍 Google response details:")
-                log_info(f"- Has results: {bool(response.results)}")
-                log_info(f"- Results count: {len(response.results)}")
-                # ✅ Request ID'yi logla
-                if hasattr(response, '_pb') and hasattr(response._pb, 'request_id'):
-                    log_info(f"- Request ID: {response._pb.request_id}")
-                if hasattr(response, 'total_billed_time'):
-                    billed_seconds = response.total_billed_time.total_seconds()
-                    log_info(f"- Billed time: {billed_seconds}s")
-                    # ✅ Eğer billed time 0 ise, Google hiç audio işlememiş demektir
-                    if billed_seconds == 0:
-                        log_error("❌ Google didn't process any audio - possible format issue")
-                        return None
-                else:
-                    log_info(f"- Billed time: 0s (no audio processed)")
-                # Process results
-                if response.results:
-                    for i, result in enumerate(response.results):
-                        log_debug(f"Result {i}: {result}")
-                        if result.alternatives:
-                            alternative = result.alternatives[0]
-                            transcription = TranscriptionResult(
-                                text=alternative.transcript,
-                                confidence=alternative.confidence,
-                                timestamp=datetime.now().timestamp(),
-                                language="tr-TR",
-                                word_timestamps=None
-                            )
-                            log_info(f"✅ Transcription: '{alternative.transcript}' (confidence: {alternative.confidence:.2f})")
-                            return transcription
-                log_warning("⚠️ No transcription results - Google couldn't recognize speech")
                 return None
             except Exception as e:

                 log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
+                # ✅ Raw audio'yu direkt WAV olarak kaydet ve test et
+                import tempfile
+                import os
+                import wave
+                # Raw audio'yu WAV olarak kaydet
+                raw_wav_file = f"/tmp/raw_audio_{datetime.now().strftime('%H%M%S')}.wav"
+                with wave.open(raw_wav_file, 'wb') as wav_file:
+                    wav_file.setnchannels(1)
+                    wav_file.setsampwidth(2)
+                    wav_file.setframerate(config.sample_rate)
+                    wav_file.writeframes(audio_data)
+                log_info(f"🎯 RAW audio saved as WAV: {raw_wav_file}")
+                # Test koduyla test et
+                try:
+                    import subprocess
+                    result = subprocess.run([
+                        'python', '/app/test_single_wav.py', raw_wav_file
+                    ], capture_output=True, text=True, timeout=30)
+                    log_info(f"🔍 Raw WAV test result: {result.stdout}")
+                    if result.stderr:
+                        log_error(f"🔍 Raw WAV test error: {result.stderr}")
+                    # Eğer raw audio çalışıyorsa, sorun trimming'te
+                    if "Transcript:" in result.stdout:
+                        log_info("✅ RAW audio works! Problem is in our processing.")
+                    else:
+                        log_error("❌ Even RAW audio doesn't work - problem in frontend!")
+                except Exception as e:
+                    log_warning(f"Could not run raw audio test: {e}")
                 # ✅ Audio analizi
                 self._analyze_audio_content(audio_data)
                     log_warning("⚠️ Audio too short after trimming")
                     return None
+                # Trimmed audio'yu da kaydet
+                trimmed_wav_file = f"/tmp/trimmed_audio_{datetime.now().strftime('%H%M%S')}.wav"
+                with wave.open(trimmed_wav_file, 'wb') as wav_file:
+                    wav_file.setnchannels(1)
+                    wav_file.setsampwidth(2)
+                    wav_file.setframerate(config.sample_rate)
+                    wav_file.writeframes(trimmed_audio)
+                log_info(f"🎯 TRIMMED audio saved as WAV: {trimmed_wav_file}")
+                # Trimmed audio'yu da test et
+                try:
+                    result = subprocess.run([
+                        'python', '/app/test_single_wav.py', trimmed_wav_file
+                    ], capture_output=True, text=True, timeout=30)
+                    log_info(f"🔍 Trimmed WAV test result: {result.stdout}")
+                    if result.stderr:
+                        log_error(f"🔍 Trimmed WAV test error: {result.stderr}")
+                except Exception as e:
+                    log_warning(f"Could not run trimmed audio test: {e}")
+                # Sonuç olarak Google'a gönderme
+                log_info("❌ Skipping Google API call for debugging")
                 return None
             except Exception as e: