Spaces:
Paused
Paused
Update stt/stt_google.py
Browse files- stt/stt_google.py +56 -57
stt/stt_google.py
CHANGED
|
@@ -187,6 +187,42 @@ class GoogleSTT(STTInterface):
|
|
| 187 |
|
| 188 |
log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
# ✅ Audio analizi
|
| 191 |
self._analyze_audio_content(audio_data)
|
| 192 |
|
|
@@ -197,69 +233,32 @@ class GoogleSTT(STTInterface):
|
|
| 197 |
log_warning("⚠️ Audio too short after trimming")
|
| 198 |
return None
|
| 199 |
|
| 200 |
-
#
|
| 201 |
-
|
| 202 |
-
log_info(f"🔧 WAV conversion: {len(trimmed_audio)} PCM → {len(wav_audio)} WAV")
|
| 203 |
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
audio_channel_count=1,
|
| 210 |
-
enable_separate_recognition_per_channel=False,
|
| 211 |
-
)
|
| 212 |
-
|
| 213 |
-
log_debug(f"Recognition config: language=tr-TR, sample_rate={config.sample_rate}")
|
| 214 |
-
|
| 215 |
-
# ✅ Create audio object with WAV data
|
| 216 |
-
audio = RecognitionAudio(content=wav_audio)
|
| 217 |
|
| 218 |
-
|
| 219 |
-
log_info(f"🔄 Sending {len(wav_audio)} bytes WAV to Google Cloud Speech API...")
|
| 220 |
-
response = self.client.recognize(config=recognition_config, audio=audio)
|
| 221 |
-
|
| 222 |
-
# ✅ Detaylı response analizi
|
| 223 |
-
log_debug(f"API Response: {response}")
|
| 224 |
-
log_info(f"🔍 Google response details:")
|
| 225 |
-
log_info(f"- Has results: {bool(response.results)}")
|
| 226 |
-
log_info(f"- Results count: {len(response.results)}")
|
| 227 |
|
| 228 |
-
#
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
billed_seconds = response.total_billed_time.total_seconds()
|
| 234 |
-
log_info(f"- Billed time: {billed_seconds}s")
|
| 235 |
|
| 236 |
-
|
| 237 |
-
if
|
| 238 |
-
log_error("
|
| 239 |
-
return None
|
| 240 |
-
else:
|
| 241 |
-
log_info(f"- Billed time: 0s (no audio processed)")
|
| 242 |
-
|
| 243 |
-
# Process results
|
| 244 |
-
if response.results:
|
| 245 |
-
for i, result in enumerate(response.results):
|
| 246 |
-
log_debug(f"Result {i}: {result}")
|
| 247 |
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
transcription = TranscriptionResult(
|
| 252 |
-
text=alternative.transcript,
|
| 253 |
-
confidence=alternative.confidence,
|
| 254 |
-
timestamp=datetime.now().timestamp(),
|
| 255 |
-
language="tr-TR",
|
| 256 |
-
word_timestamps=None
|
| 257 |
-
)
|
| 258 |
-
|
| 259 |
-
log_info(f"✅ Transcription: '{alternative.transcript}' (confidence: {alternative.confidence:.2f})")
|
| 260 |
-
return transcription
|
| 261 |
|
| 262 |
-
|
|
|
|
| 263 |
return None
|
| 264 |
|
| 265 |
except Exception as e:
|
|
|
|
| 187 |
|
| 188 |
log_info(f"📊 Transcribing {len(audio_data)} bytes of audio")
|
| 189 |
|
| 190 |
+
# ✅ Raw audio'yu direkt WAV olarak kaydet ve test et
|
| 191 |
+
import tempfile
|
| 192 |
+
import os
|
| 193 |
+
import wave
|
| 194 |
+
|
| 195 |
+
# Raw audio'yu WAV olarak kaydet
|
| 196 |
+
raw_wav_file = f"/tmp/raw_audio_{datetime.now().strftime('%H%M%S')}.wav"
|
| 197 |
+
|
| 198 |
+
with wave.open(raw_wav_file, 'wb') as wav_file:
|
| 199 |
+
wav_file.setnchannels(1)
|
| 200 |
+
wav_file.setsampwidth(2)
|
| 201 |
+
wav_file.setframerate(config.sample_rate)
|
| 202 |
+
wav_file.writeframes(audio_data)
|
| 203 |
+
|
| 204 |
+
log_info(f"🎯 RAW audio saved as WAV: {raw_wav_file}")
|
| 205 |
+
|
| 206 |
+
# Test koduyla test et
|
| 207 |
+
try:
|
| 208 |
+
import subprocess
|
| 209 |
+
result = subprocess.run([
|
| 210 |
+
'python', '/app/test_single_wav.py', raw_wav_file
|
| 211 |
+
], capture_output=True, text=True, timeout=30)
|
| 212 |
+
|
| 213 |
+
log_info(f"🔍 Raw WAV test result: {result.stdout}")
|
| 214 |
+
if result.stderr:
|
| 215 |
+
log_error(f"🔍 Raw WAV test error: {result.stderr}")
|
| 216 |
+
|
| 217 |
+
# Eğer raw audio çalışıyorsa, sorun trimming'te
|
| 218 |
+
if "Transcript:" in result.stdout:
|
| 219 |
+
log_info("✅ RAW audio works! Problem is in our processing.")
|
| 220 |
+
else:
|
| 221 |
+
log_error("❌ Even RAW audio doesn't work - problem in frontend!")
|
| 222 |
+
|
| 223 |
+
except Exception as e:
|
| 224 |
+
log_warning(f"Could not run raw audio test: {e}")
|
| 225 |
+
|
| 226 |
# ✅ Audio analizi
|
| 227 |
self._analyze_audio_content(audio_data)
|
| 228 |
|
|
|
|
| 233 |
log_warning("⚠️ Audio too short after trimming")
|
| 234 |
return None
|
| 235 |
|
| 236 |
+
# Trimmed audio'yu da kaydet
|
| 237 |
+
trimmed_wav_file = f"/tmp/trimmed_audio_{datetime.now().strftime('%H%M%S')}.wav"
|
|
|
|
| 238 |
|
| 239 |
+
with wave.open(trimmed_wav_file, 'wb') as wav_file:
|
| 240 |
+
wav_file.setnchannels(1)
|
| 241 |
+
wav_file.setsampwidth(2)
|
| 242 |
+
wav_file.setframerate(config.sample_rate)
|
| 243 |
+
wav_file.writeframes(trimmed_audio)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
+
log_info(f"🎯 TRIMMED audio saved as WAV: {trimmed_wav_file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
|
| 247 |
+
# Trimmed audio'yu da test et
|
| 248 |
+
try:
|
| 249 |
+
result = subprocess.run([
|
| 250 |
+
'python', '/app/test_single_wav.py', trimmed_wav_file
|
| 251 |
+
], capture_output=True, text=True, timeout=30)
|
|
|
|
|
|
|
| 252 |
|
| 253 |
+
log_info(f"🔍 Trimmed WAV test result: {result.stdout}")
|
| 254 |
+
if result.stderr:
|
| 255 |
+
log_error(f"🔍 Trimmed WAV test error: {result.stderr}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
+
except Exception as e:
|
| 258 |
+
log_warning(f"Could not run trimmed audio test: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
+
# Sonuç olarak Google'a gönderme
|
| 261 |
+
log_info("❌ Skipping Google API call for debugging")
|
| 262 |
return None
|
| 263 |
|
| 264 |
except Exception as e:
|