Update app.py
Browse files
app.py
CHANGED
|
@@ -115,11 +115,11 @@ def get_user_ip():
|
|
| 115 |
return request.headers.getlist("X-Forwarded-For")[0].split(',')[0].strip()
|
| 116 |
return request.remote_addr
|
| 117 |
|
| 118 |
-
# --- TTS HELPER FUNCTIONS
|
| 119 |
def call_worker(index, chunk_payload):
|
| 120 |
raw_text = chunk_payload.get("text", "")
|
| 121 |
|
| 122 |
-
#
|
| 123 |
clean_text = re.sub(r'[*_`~#]', '', raw_text)
|
| 124 |
clean_text = re.sub(r'\[.*?\]|\(.*?\)', '', clean_text)
|
| 125 |
clean_text = clean_text.strip()
|
|
@@ -142,8 +142,7 @@ def call_worker(index, chunk_payload):
|
|
| 142 |
"fallback_to_live": True
|
| 143 |
}
|
| 144 |
|
| 145 |
-
|
| 146 |
-
max_attempts = 50
|
| 147 |
|
| 148 |
for attempt in range(max_attempts):
|
| 149 |
workers = list(WORKER_URLS)
|
|
@@ -172,8 +171,7 @@ def call_worker(index, chunk_payload):
|
|
| 172 |
logging.warning(f"🔄 [قطعه {index+1}] کارگرها شلوغ/خطا دادند. استراحت به مدت {sleep_time:.1f} ثانیه...")
|
| 173 |
time.sleep(sleep_time)
|
| 174 |
|
| 175 |
-
|
| 176 |
-
raise ValueError(f"قطعه {index+1} پس از 50 بار تلاش ساخته نشد.")
|
| 177 |
|
| 178 |
# --- AI PODCAST SCRIPT LOGIC (Cohere Labs Space) ---
|
| 179 |
def generate_podcast_in_background(task_id, system_prompt, safety_settings):
|
|
@@ -246,30 +244,30 @@ def generate_podcast_in_background(task_id, system_prompt, safety_settings):
|
|
| 246 |
except Exception as e:
|
| 247 |
with tasks_lock: tasks[task_id].update({'status': 'failed', 'error': str(e)})
|
| 248 |
|
| 249 |
-
# --- FULL AUTO PODCAST LOGIC
|
| 250 |
def generate_full_podcast_audio_background(task_id, prompt, speakers):
|
| 251 |
try:
|
| 252 |
logging.info(f"🚀 [پروژه {task_id}] عملیات ساخت پادکست آغاز شد.")
|
| 253 |
with tasks_lock:
|
| 254 |
tasks[task_id] = {'status': 'writing_script', 'progress': 'در حال نگارش سناریو...'}
|
| 255 |
|
|
|
|
|
|
|
|
|
|
| 256 |
spk_text = "\n".join([f"- {s['id']}: {s['name']}" for s in speakers])
|
| 257 |
|
| 258 |
-
# 🔴 پرامپت دقیقاً منطبق با سایت شد + دستور اکید برای نوشتن فقط به زبان فارسی تا کارگر ارور نده
|
| 259 |
system_prompt = f"""Act as a Professional Podcast Producer.
|
| 260 |
Topic: "{prompt}"
|
| 261 |
Speakers Available:
|
| 262 |
{spk_text}
|
| 263 |
|
| 264 |
-
CRITICAL
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
- 🔴 IMPORTANT: Write the ENTIRE script ONLY in Persian (Farsi) language. Do NOT use English characters.
|
| 269 |
|
| 270 |
Output ONLY valid JSON.
|
| 271 |
-
Format: {{"selected_speakers": ["id1", "id2"], "script": [{{"speaker_id": "id1", "dialogue": "..."}}]}}
|
| 272 |
-
Dialogue rules: No stage directions like [laugh], (sigh). Just spoken words."""
|
| 273 |
|
| 274 |
AYA_SPACE_URL = "https://coherelabs-aya-expanse.hf.space/gradio_api"
|
| 275 |
MAX_ATTEMPTS = 50
|
|
@@ -320,7 +318,6 @@ Dialogue rules: No stage directions like [laugh], (sigh). Just spoken words."""
|
|
| 320 |
for t in data["script"]:
|
| 321 |
if "dialogue" in t: t["dialogue"] = re.sub(r'\[.*?\]|\(.*?\)', '', t["dialogue"]).strip()
|
| 322 |
|
| 323 |
-
# فقط نوبتهایی که متن دارن رو پردازش میکنیم
|
| 324 |
script_turns = [t for t in data.get("script", []) if str(t.get("dialogue", "")).strip()]
|
| 325 |
total_turns = len(script_turns)
|
| 326 |
|
|
@@ -333,15 +330,28 @@ Dialogue rules: No stage directions like [laugh], (sigh). Just spoken words."""
|
|
| 333 |
completed_count = 0
|
| 334 |
|
| 335 |
def process_single_chunk(index, turn_data):
|
| 336 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
dialogue = turn_data.get("dialogue")
|
| 338 |
-
payload = {"text": dialogue, "speaker":
|
| 339 |
idx, audio_seg = call_worker(index, payload)
|
| 340 |
if audio_seg is None:
|
| 341 |
raise ValueError(f"خطا در تولید صدای نوبت {index+1}")
|
| 342 |
return idx, audio_seg
|
| 343 |
|
| 344 |
-
# 🌟 استراتژی طلایی دستهبندی (7 تا 7 تا) که پیشنهاد خودت بود 🌟
|
| 345 |
batch_size = 7
|
| 346 |
batches = [script_turns[i:i + batch_size] for i in range(0, total_turns, batch_size)]
|
| 347 |
|
|
@@ -502,7 +512,7 @@ def create_full_podcast():
|
|
| 502 |
safety = [{"category": c, "threshold": "BLOCK_NONE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]
|
| 503 |
spk_text = "\n".join([f"- {s['id']}: {s['name']}" for s in speakers])
|
| 504 |
|
| 505 |
-
sys_prompt = f"""Act as a Professional Podcast Producer.\nTopic: "{prompt}"\nSpeakers Available:\n{spk_text}\nCRITICAL
|
| 506 |
|
| 507 |
threading.Thread(target=generate_podcast_in_background, args=(task_id, sys_prompt, safety)).start()
|
| 508 |
return jsonify({"task_id": task_id}), 202
|
|
|
|
| 115 |
return request.headers.getlist("X-Forwarded-For")[0].split(',')[0].strip()
|
| 116 |
return request.remote_addr
|
| 117 |
|
| 118 |
+
# --- TTS HELPER FUNCTIONS ---
|
| 119 |
def call_worker(index, chunk_payload):
|
| 120 |
raw_text = chunk_payload.get("text", "")
|
| 121 |
|
| 122 |
+
# پاکسازی متن از براکتها و کاراکترهای مخرب
|
| 123 |
clean_text = re.sub(r'[*_`~#]', '', raw_text)
|
| 124 |
clean_text = re.sub(r'\[.*?\]|\(.*?\)', '', clean_text)
|
| 125 |
clean_text = clean_text.strip()
|
|
|
|
| 142 |
"fallback_to_live": True
|
| 143 |
}
|
| 144 |
|
| 145 |
+
max_attempts = 40
|
|
|
|
| 146 |
|
| 147 |
for attempt in range(max_attempts):
|
| 148 |
workers = list(WORKER_URLS)
|
|
|
|
| 171 |
logging.warning(f"🔄 [قطعه {index+1}] کارگرها شلوغ/خطا دادند. استراحت به مدت {sleep_time:.1f} ثانیه...")
|
| 172 |
time.sleep(sleep_time)
|
| 173 |
|
| 174 |
+
raise ValueError(f"قطعه {index+1} پس از 40 بار تلاش ساخته نشد.")
|
|
|
|
| 175 |
|
| 176 |
# --- AI PODCAST SCRIPT LOGIC (Cohere Labs Space) ---
|
| 177 |
def generate_podcast_in_background(task_id, system_prompt, safety_settings):
|
|
|
|
| 244 |
except Exception as e:
|
| 245 |
with tasks_lock: tasks[task_id].update({'status': 'failed', 'error': str(e)})
|
| 246 |
|
| 247 |
+
# --- FULL AUTO PODCAST LOGIC ---
|
| 248 |
def generate_full_podcast_audio_background(task_id, prompt, speakers):
|
| 249 |
try:
|
| 250 |
logging.info(f"🚀 [پروژه {task_id}] عملیات ساخت پادکست آغاز شد.")
|
| 251 |
with tasks_lock:
|
| 252 |
tasks[task_id] = {'status': 'writing_script', 'progress': 'در حال نگارش سناریو...'}
|
| 253 |
|
| 254 |
+
valid_speaker_ids = [str(s['id']).strip() for s in speakers]
|
| 255 |
+
default_speaker_id = valid_speaker_ids[0] if valid_speaker_ids else "Charon"
|
| 256 |
+
|
| 257 |
spk_text = "\n".join([f"- {s['id']}: {s['name']}" for s in speakers])
|
| 258 |
|
|
|
|
| 259 |
system_prompt = f"""Act as a Professional Podcast Producer.
|
| 260 |
Topic: "{prompt}"
|
| 261 |
Speakers Available:
|
| 262 |
{spk_text}
|
| 263 |
|
| 264 |
+
CRITICAL INSTRUCTIONS:
|
| 265 |
+
1. Create a VERY LONG, in-depth, and highly detailed podcast script.
|
| 266 |
+
2. STRICT RULE: Keep EVERY SINGLE dialogue line SHORT. If a speaker has a lot to say, BREAK their speech into multiple separate, consecutive turns for that SAME speaker.
|
| 267 |
+
3. NO stage directions, NO emojis, NO brackets like [laugh] or (sigh). Plain spoken text ONLY.
|
|
|
|
| 268 |
|
| 269 |
Output ONLY valid JSON.
|
| 270 |
+
Format: {{"selected_speakers": ["id1", "id2"], "script": [{{"speaker_id": "id1", "dialogue": "..."}}]}}"""
|
|
|
|
| 271 |
|
| 272 |
AYA_SPACE_URL = "https://coherelabs-aya-expanse.hf.space/gradio_api"
|
| 273 |
MAX_ATTEMPTS = 50
|
|
|
|
| 318 |
for t in data["script"]:
|
| 319 |
if "dialogue" in t: t["dialogue"] = re.sub(r'\[.*?\]|\(.*?\)', '', t["dialogue"]).strip()
|
| 320 |
|
|
|
|
| 321 |
script_turns = [t for t in data.get("script", []) if str(t.get("dialogue", "")).strip()]
|
| 322 |
total_turns = len(script_turns)
|
| 323 |
|
|
|
|
| 330 |
completed_count = 0
|
| 331 |
|
| 332 |
def process_single_chunk(index, turn_data):
|
| 333 |
+
# 🔴 سیستم هوشمند جایگزینی گوینده (شبیهسازی کامل رفتار سایت)
|
| 334 |
+
raw_speaker_id = str(turn_data.get("speaker_id", "")).strip()
|
| 335 |
+
final_speaker_id = raw_speaker_id
|
| 336 |
+
|
| 337 |
+
if final_speaker_id not in valid_speaker_ids:
|
| 338 |
+
found = False
|
| 339 |
+
for v_id in valid_speaker_ids:
|
| 340 |
+
if final_speaker_id.lower() == v_id.lower():
|
| 341 |
+
final_speaker_id = v_id
|
| 342 |
+
found = True
|
| 343 |
+
break
|
| 344 |
+
if not found:
|
| 345 |
+
logging.warning(f"⚠️ آیدی گوینده نامعتبر '{raw_speaker_id}' تشخیص داده شد. جایگزین شد با '{default_speaker_id}'.")
|
| 346 |
+
final_speaker_id = default_speaker_id
|
| 347 |
+
|
| 348 |
dialogue = turn_data.get("dialogue")
|
| 349 |
+
payload = {"text": dialogue, "speaker": final_speaker_id, "temperature": 0.9, "is_custom": False}
|
| 350 |
idx, audio_seg = call_worker(index, payload)
|
| 351 |
if audio_seg is None:
|
| 352 |
raise ValueError(f"خطا در تولید صدای نوبت {index+1}")
|
| 353 |
return idx, audio_seg
|
| 354 |
|
|
|
|
| 355 |
batch_size = 7
|
| 356 |
batches = [script_turns[i:i + batch_size] for i in range(0, total_turns, batch_size)]
|
| 357 |
|
|
|
|
| 512 |
safety = [{"category": c, "threshold": "BLOCK_NONE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]
|
| 513 |
spk_text = "\n".join([f"- {s['id']}: {s['name']}" for s in speakers])
|
| 514 |
|
| 515 |
+
sys_prompt = f"""Act as a Professional Podcast Producer.\nTopic: "{prompt}"\nSpeakers Available:\n{spk_text}\nCRITICAL INSTRUCTIONS:\n1. Create a VERY LONG, in-depth podcast script.\n2. Keep EVERY dialogue line SHORT. Break long speeches into multiple consecutive turns for the same speaker.\n3. NO stage directions, NO emojis, NO brackets. Plain text ONLY.\n\nOutput ONLY valid JSON.\nFormat: {{"selected_speakers": ["id1", "id2"], "script": [{{"speaker_id": "id1", "dialogue": "..."}}]}}"""
|
| 516 |
|
| 517 |
threading.Thread(target=generate_podcast_in_background, args=(task_id, sys_prompt, safety)).start()
|
| 518 |
return jsonify({"task_id": task_id}), 202
|