Spaces:
Runtime error
Runtime error
Commit
·
dbecd18
1
Parent(s):
75ac719
Fix yield syntax in generation.py for TTS and image analysis
Browse files- utils/generation.py +29 -24
utils/generation.py
CHANGED
|
@@ -165,23 +165,24 @@ def request_generation(
|
|
| 165 |
return
|
| 166 |
|
| 167 |
# معالجة تحويل النص إلى صوت (TTS)
|
| 168 |
-
if model_name == TTS_MODEL or output_format == "audio":
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
| 185 |
|
| 186 |
# معالجة الصور
|
| 187 |
if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
|
|
@@ -203,7 +204,8 @@ if model_name == TTS_MODEL or output_format == "audio":
|
|
| 203 |
audio_file = io.BytesIO()
|
| 204 |
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 205 |
audio_file.seek(0)
|
| 206 |
-
|
|
|
|
| 207 |
else:
|
| 208 |
yield result
|
| 209 |
cache[cache_key] = [result]
|
|
@@ -353,8 +355,9 @@ if model_name == TTS_MODEL or output_format == "audio":
|
|
| 353 |
audio_file = io.BytesIO()
|
| 354 |
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 355 |
audio_file.seek(0)
|
| 356 |
-
|
| 357 |
-
|
|
|
|
| 358 |
except Exception as e:
|
| 359 |
logger.error(f"Text-to-speech conversion failed: {e}")
|
| 360 |
yield f"Error: Text-to-speech conversion failed: {e}"
|
|
@@ -460,8 +463,9 @@ if model_name == TTS_MODEL or output_format == "audio":
|
|
| 460 |
audio_file = io.BytesIO()
|
| 461 |
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 462 |
audio_file.seek(0)
|
| 463 |
-
|
| 464 |
-
|
|
|
|
| 465 |
except Exception as e:
|
| 466 |
logger.error(f"Text-to-speech conversion failed: {e}")
|
| 467 |
yield f"Error: Text-to-speech conversion failed: {e}"
|
|
@@ -519,8 +523,9 @@ if model_name == TTS_MODEL or output_format == "audio":
|
|
| 519 |
audio_file = io.BytesIO()
|
| 520 |
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 521 |
audio_file.seek(0)
|
| 522 |
-
|
| 523 |
-
|
|
|
|
| 524 |
except Exception as e:
|
| 525 |
logger.error(f"Text-to-speech conversion failed: {e}")
|
| 526 |
yield f"Error: Text-to-speech conversion failed: {e}"
|
|
|
|
| 165 |
return
|
| 166 |
|
| 167 |
# معالجة تحويل النص إلى صوت (TTS)
|
| 168 |
+
if model_name == TTS_MODEL or output_format == "audio":
|
| 169 |
+
task_type = "text_to_speech"
|
| 170 |
+
try:
|
| 171 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
|
| 172 |
+
processor = AutoProcessor.from_pretrained(TTS_MODEL)
|
| 173 |
+
inputs = processor(text=message, return_tensors="pt")
|
| 174 |
+
audio = model.generate(**inputs)
|
| 175 |
+
audio_file = io.BytesIO()
|
| 176 |
+
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 177 |
+
audio_file.seek(0)
|
| 178 |
+
audio_data = audio_file.read()
|
| 179 |
+
yield audio_data # ← تصحيح: استخدام yield مع البيانات مباشرة
|
| 180 |
+
cache[cache_key] = [audio_data]
|
| 181 |
+
return
|
| 182 |
+
except Exception as e:
|
| 183 |
+
logger.error(f"Text-to-speech failed: {e}")
|
| 184 |
+
yield f"Error: Text-to-speech failed: {e}"
|
| 185 |
+
return
|
| 186 |
|
| 187 |
# معالجة الصور
|
| 188 |
if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
|
|
|
|
| 204 |
audio_file = io.BytesIO()
|
| 205 |
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 206 |
audio_file.seek(0)
|
| 207 |
+
audio_data = audio_file.read()
|
| 208 |
+
yield audio_data
|
| 209 |
else:
|
| 210 |
yield result
|
| 211 |
cache[cache_key] = [result]
|
|
|
|
| 355 |
audio_file = io.BytesIO()
|
| 356 |
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 357 |
audio_file.seek(0)
|
| 358 |
+
audio_data = audio_file.read()
|
| 359 |
+
cached_chunks.append(audio_data)
|
| 360 |
+
yield audio_data
|
| 361 |
except Exception as e:
|
| 362 |
logger.error(f"Text-to-speech conversion failed: {e}")
|
| 363 |
yield f"Error: Text-to-speech conversion failed: {e}"
|
|
|
|
| 463 |
audio_file = io.BytesIO()
|
| 464 |
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 465 |
audio_file.seek(0)
|
| 466 |
+
audio_data = audio_file.read()
|
| 467 |
+
cached_chunks.append(audio_data)
|
| 468 |
+
yield audio_data
|
| 469 |
except Exception as e:
|
| 470 |
logger.error(f"Text-to-speech conversion failed: {e}")
|
| 471 |
yield f"Error: Text-to-speech conversion failed: {e}"
|
|
|
|
| 523 |
audio_file = io.BytesIO()
|
| 524 |
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 525 |
audio_file.seek(0)
|
| 526 |
+
audio_data = audio_file.read()
|
| 527 |
+
cached_chunks.append(audio_data)
|
| 528 |
+
yield audio_data
|
| 529 |
except Exception as e:
|
| 530 |
logger.error(f"Text-to-speech conversion failed: {e}")
|
| 531 |
yield f"Error: Text-to-speech conversion failed: {e}"
|