ibrahimlasfar commited on
Commit
dbecd18
·
1 Parent(s): 75ac719

Fix yield syntax in generation.py for TTS and image analysis

Browse files
Files changed (1) hide show
  1. utils/generation.py +29 -24
utils/generation.py CHANGED
@@ -165,23 +165,24 @@ def request_generation(
165
  return
166
 
167
  # معالجة تحويل النص إلى صوت (TTS)
168
- if model_name == TTS_MODEL or output_format == "audio":
169
- task_type = "text_to_speech"
170
- try:
171
- model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
172
- processor = AutoProcessor.from_pretrained(TTS_MODEL)
173
- inputs = processor(text=message, return_tensors="pt")
174
- audio = model.generate(**inputs)
175
- audio_file = io.BytesIO()
176
- torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
177
- audio_file.seek(0)
178
- yield audio_file.read()
179
- cache[cache_key] = [audio_file.read()]
180
- return
181
- except Exception as e:
182
- logger.error(f"Text-to-speech failed: {e}")
183
- yield f"Error: Text-to-speech failed: {e}"
184
- return
 
185
 
186
  # معالجة الصور
187
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
@@ -203,7 +204,8 @@ if model_name == TTS_MODEL or output_format == "audio":
203
  audio_file = io.BytesIO()
204
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
205
  audio_file.seek(0)
206
- yield audio_file.read()
 
207
  else:
208
  yield result
209
  cache[cache_key] = [result]
@@ -353,8 +355,9 @@ if model_name == TTS_MODEL or output_format == "audio":
353
  audio_file = io.BytesIO()
354
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
355
  audio_file.seek(0)
356
- cached_chunks.append(audio_file.read())
357
- yield audio_file.read()
 
358
  except Exception as e:
359
  logger.error(f"Text-to-speech conversion failed: {e}")
360
  yield f"Error: Text-to-speech conversion failed: {e}"
@@ -460,8 +463,9 @@ if model_name == TTS_MODEL or output_format == "audio":
460
  audio_file = io.BytesIO()
461
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
462
  audio_file.seek(0)
463
- cached_chunks.append(audio_file.read())
464
- yield audio_file.read()
 
465
  except Exception as e:
466
  logger.error(f"Text-to-speech conversion failed: {e}")
467
  yield f"Error: Text-to-speech conversion failed: {e}"
@@ -519,8 +523,9 @@ if model_name == TTS_MODEL or output_format == "audio":
519
  audio_file = io.BytesIO()
520
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
521
  audio_file.seek(0)
522
- cached_chunks.append(audio_file.read())
523
- yield audio_file.read()
 
524
  except Exception as e:
525
  logger.error(f"Text-to-speech conversion failed: {e}")
526
  yield f"Error: Text-to-speech conversion failed: {e}"
 
165
  return
166
 
167
  # معالجة تحويل النص إلى صوت (TTS)
168
+ if model_name == TTS_MODEL or output_format == "audio":
169
+ task_type = "text_to_speech"
170
+ try:
171
+ model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
172
+ processor = AutoProcessor.from_pretrained(TTS_MODEL)
173
+ inputs = processor(text=message, return_tensors="pt")
174
+ audio = model.generate(**inputs)
175
+ audio_file = io.BytesIO()
176
+ torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
177
+ audio_file.seek(0)
178
+ audio_data = audio_file.read()
179
+ yield audio_data # ← تصحيح: استخدام yield مع البيانات مباشرة
180
+ cache[cache_key] = [audio_data]
181
+ return
182
+ except Exception as e:
183
+ logger.error(f"Text-to-speech failed: {e}")
184
+ yield f"Error: Text-to-speech failed: {e}"
185
+ return
186
 
187
  # معالجة الصور
188
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
 
204
  audio_file = io.BytesIO()
205
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
206
  audio_file.seek(0)
207
+ audio_data = audio_file.read()
208
+ yield audio_data
209
  else:
210
  yield result
211
  cache[cache_key] = [result]
 
355
  audio_file = io.BytesIO()
356
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
357
  audio_file.seek(0)
358
+ audio_data = audio_file.read()
359
+ cached_chunks.append(audio_data)
360
+ yield audio_data
361
  except Exception as e:
362
  logger.error(f"Text-to-speech conversion failed: {e}")
363
  yield f"Error: Text-to-speech conversion failed: {e}"
 
463
  audio_file = io.BytesIO()
464
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
465
  audio_file.seek(0)
466
+ audio_data = audio_file.read()
467
+ cached_chunks.append(audio_data)
468
+ yield audio_data
469
  except Exception as e:
470
  logger.error(f"Text-to-speech conversion failed: {e}")
471
  yield f"Error: Text-to-speech conversion failed: {e}"
 
523
  audio_file = io.BytesIO()
524
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
525
  audio_file.seek(0)
526
+ audio_data = audio_file.read()
527
+ cached_chunks.append(audio_data)
528
+ yield audio_data
529
  except Exception as e:
530
  logger.error(f"Text-to-speech conversion failed: {e}")
531
  yield f"Error: Text-to-speech conversion failed: {e}"