kj03 commited on
Commit
90d94e7
·
verified ·
1 Parent(s): 3c875dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -21,13 +21,17 @@ def bangla_reader(image):
21
  generated_ids = model.generate(pixel_values)
22
  ocr_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
23
 
24
- # TTS (without speaker argument)
 
 
 
 
25
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
26
- tts.tts_to_file(text=ocr_text, file_path=tmp.name, language="bn") # Removed speaker
27
  audio_path = tmp.name
28
 
29
  return f"OCR ফলাফল: {ocr_text}", audio_path
30
-
31
  # Gradio UI
32
  demo = gr.Interface(
33
  fn=bangla_reader,
 
21
  generated_ids = model.generate(pixel_values)
22
  ocr_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
23
 
24
+ # Choose speaker safely
25
+ available_speakers = tts.speakers
26
+ speaker_id = available_speakers[0] if available_speakers else None
27
+
28
+ # TTS
29
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
30
+ tts.tts_to_file(text=ocr_text, file_path=tmp.name, language="bn", speaker=speaker_id)
31
  audio_path = tmp.name
32
 
33
  return f"OCR ফলাফল: {ocr_text}", audio_path
34
+
35
  # Gradio UI
36
  demo = gr.Interface(
37
  fn=bangla_reader,