import os import gradio as gr from PIL import Image from io import BytesIO from dotenv import load_dotenv from gtts import gTTS import tempfile import traceback from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as RLImage from reportlab.lib.pagesizes import letter from reportlab.lib.styles import getSampleStyleSheet import re import google.generativeai as genai # Load API keys load_dotenv() api_key = os.getenv("GOOGLE_API_KEY") if not api_key: raise EnvironmentError("Missing API keys. Check GOOGLE_API_KEY in .env.") # Configure models genai.configure(api_key=api_key) text_model = genai.GenerativeModel("gemini-1.5-flash") image_model = genai.GenerativeModel( model_name="gemini-2.0-flash-preview-image-generation", generation_config={"response_modalities": ["TEXT", "IMAGE"]} ) # -------------------- Utility Functions -------------------- def generate_image_from_text(prompt): try: response = image_model.generate_content(prompt) for part in response.candidates[0].content.parts: if hasattr(part, 'inline_data') and part.inline_data.mime_type.startswith("image/"): return Image.open(BytesIO(part.inline_data.data)) except Exception as e: print("Image generation error:", e) return None def summarize_scene(scene_text): try: response = text_model.generate_content(f"Summarize this scene in one sentence: {scene_text}") return response.text.strip() except Exception as e: print("Summary error:", e) return "Summary unavailable." def explain_scene(image): try: response = text_model.generate_content([image, "Explain this image scene in detail."]) return response.text.strip() except: return "Explanation unavailable." def text_to_speech(text): try: tts = gTTS(text) tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(tmp.name) return tmp.name except Exception as e: print("TTS error:", e) return None def generate_pdf(images, explanations, title="AI-Generated Story Scenes"): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: doc = SimpleDocTemplate(tmp.name, pagesize=letter) styles = getSampleStyleSheet() story = [Paragraph(title, styles["Title"]), Spacer(1, 12)] for i, img in enumerate(images): if img: with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_tmp: img.save(img_tmp.name) story += [ RLImage(img_tmp.name, width=400, height=300), Spacer(1, 12), Paragraph(f"Scene {i + 1} Explanation", styles["Heading3"]), Paragraph(explanations[i], styles["BodyText"]), Spacer(1, 24), ] doc.build(story) return tmp.name def reset_fields(): return "", "", "", "", "", "Fantasy" def recreate_scene_handler( num_scenes, theme, char_count, character_names, dialogue, dialogue_speaker, char_styles, char_moods, bg_style, images, summaries, explanations, scene_number_to_recreate): index = int(scene_number_to_recreate) - 1 if index < 0 or index >= len(images): return ( None, "", "❌ Invalid input. You have not generated Scene {} yet.".format(scene_number_to_recreate), images, summaries, explanations, f"⚠️ Scene {scene_number_to_recreate} is not available. Generate it first.", gr.update(visible=False) ) return generate_scene( num_scenes, theme, char_count, character_names, dialogue, dialogue_speaker, char_styles, char_moods, bg_style, images, summaries, explanations, recreate_mode=True, current_index=index ) # -------------------- Core Logic -------------------- def generate_scene(num_scenes, theme, char_count, character_names, dialogue, dialogue_speaker, char_styles, char_moods, bg_style, images, summaries, explanations, recreate_mode=False, current_index=0): prompt = ( f"A {bg_style}-style illustration for Scene {current_index + 1} with {char_count} characters in a '{theme}' setting. " f"Characters: {character_names}. They are dressed as: {char_styles}. Current mood: {char_moods}. " ) if dialogue: prompt += f'The character "{dialogue_speaker}" says: "{dialogue}". Display this in a speech bubble.' prompt += f' Please visualize this as a speech bubble above {dialogue_speaker}, like in a cartoon.' image = generate_image_from_text(prompt) summary = summarize_scene(prompt) explanation = explain_scene(image) if image else "Explanation unavailable." if recreate_mode: images[current_index] = image summaries[current_index] = summary explanations[current_index] = explanation else: if len(images) >= int(num_scenes): return gr.update(), gr.update(), gr.update(), images, summaries, explanations, \ f"✅ All {num_scenes} scenes have been generated.", gr.update(visible=True) images.append(image) summaries.append(summary) explanations.append(explanation) status = f"✅ Scene {current_index + 1} {'recreated' if recreate_mode else 'generated'}." done_visible = len(images) == int(num_scenes) return image, summary, explanation, images, summaries, explanations, status, gr.update(visible=done_visible) def finalize_story(images, explanations, title): if not images or not explanations: return None, None pdf = generate_pdf(images, explanations,title) with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") as txt: for i, exp in enumerate(explanations): txt.write(f"Scene {i+1}:\n{exp}\n\n") txt_path = txt.name return txt_path, pdf def ai_write_scene(theme, total_scenes, scene_summaries): try: scene_index = len(scene_summaries) + 1 if scene_index > int(total_scenes): return "", "", "", "", "", "Fantasy", f"✅ All {total_scenes} scenes completed." story_so_far = "\n".join([f"Scene {i+1}: {s}" for i, s in enumerate(scene_summaries)]) if scene_summaries else "" prompt = f""" You are co-writing a story set in this theme: "{theme}". Generate Scene {scene_index} of {total_scenes}. Continue the story logically based on previous scenes (if any): {story_so_far} Return: 1. Character names (existing or new), 2. A single dialogue line, 3. The speaker of that dialogue, 4. Outfits worn, 5. Characters' emotional moods, 6. Background style (choose from: Realistic, Cartoon, Fantasy, Dark Fantasy). Only provide the raw values, no headers. """ response = text_model.generate_content(prompt).text.strip() parts = [re.sub(r"^\d+\.\s*", "", line.strip()) for line in response.split("\n") if line.strip()] char_names = parts[0] if len(parts) > 0 else "" dialogue = parts[1] if len(parts) > 1 else "" speaker = parts[2] if len(parts) > 2 else "" outfits = parts[3] if len(parts) > 3 else "" moods = parts[4] if len(parts) > 4 else "" bg_style = parts[5] if len(parts) > 5 else "Fantasy" return char_names, dialogue, speaker, outfits, moods, bg_style, f"📝 Scene {scene_index} ready to generate." except Exception as e: print("AI write error:", e) return "", "", "", "", "", "Fantasy", "⚠️ AI scene generation failed." # -------------------- UI -------------------- with gr.Blocks( title="Comic Creator", css=""" body { background-color: #ffffff; } .gradio-container { max-width: 1000px; margin: 2rem auto; padding: 32px; background: #d5f2ee; border-radius: 20px; box-shadow: 0 10px 40px rgba(0, 0, 0, 0.08); font-family: 'Segoe UI', sans-serif; } .gr-markdown h2, .gr-markdown h3 { color: #2e4053; } .gr-button { border-radius: 10px; font-weight: bold; padding: 12px 24px; transition: all 0.4s ease; box-shadow: 0 4px 10px rgba(0,0,0,0.15); } #generate-btn, #ai-write-btn, #recreate-btn, #reset-btn, #done-btn, #recreate-scene-index { background-color: #66b2b2; color: white; border: 1px solid #000000; border-radius:10px; margin:12px; } #generate-btn, #ai-write-btn{ display: flex; justify-content: center; gap: 10px; } #recreate-btn,#reset-btn, #done-btn { display: flex; justify-content: center; gap: 10px; } #tts-btn{ background-color: #66b2b2; color: white; border: 1px solid #000000; border-radius:10px; width:700px; height:80px; } #generate-btn:hover, #ai-write-btn:hover, #recreate-btn:hover, #reset-btn:hover, #done-btn:hover, #tts-btn:hover{ background: #008080 ; transform: scale(1.05); color: #000; cursor: pointer; border-radius:10px; } input, textarea, select { border-radius: 8px ; border: 1px solid #004c4c; padding: 10px !important; background-color: #d5f2ee; box-shadow: inset 0 1px 3px rgba(0,0,0,0.05); transition: all 0.2s ease-in-out; } input:focus, textarea:focus, select:focus { border-color: #004c4c !important; box-shadow: 0 0 6px rgba(255, 105, 180, 0.3); outline: none; } .custom-rows{ background: #e1f5f5; padding: 10px; border-radius: 8px; } /* 🎆 Scene Image Border */ .gr-image img { border: 4px dashed #004c4c; border-radius: 16px; padding: 4px; } .gr-accordion { background-color: #d4a373; border: 1px solid #004c4c; border-radius: 12px; padding: 12px; margin-bottom: 12px; } .gr-accordion .gr-box { background-color: #d4a373; border-radius: 10px; padding: 12px; } .gr-image, .gr-audio, .gr-file { border: 1px solid #d6eaf8; border-radius: 12px; background-color: #d1e8e8; padding: 12px; } .animated-title { font-size: 3rem; font-weight: bold; text-align: center; color: #147d5a; animation: float 3s ease-in-out infinite; text-shadow: 2px 2px 4px rgba(0,0,0,0.2); margin-bottom: 1.5rem; font-family: 'Comic Sans MS', cursive, sans-serif; } #accordion { background: linear-gradient(to right, #ffffff, #66b2b2); border: 2px solid #004c4c; border-radius: 12px; padding: 16px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); font-family: 'Segoe UI', sans-serif; transition: all 0.3s ease-in-out; } #accordion:hover { box-shadow: 0 6px 20px rgba(0,0,0,0.2); transform: scale(1.01); } #accordion h2 { color: #343a40; font-weight: 600; font-size: 1.2rem; font-color: black; } #accordion1{ background: linear-gradient(to right, #ffffff, #66b2b2); border: 2px solid #004c4c; border-radius: 12px; padding: 16px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); font-family: 'Segoe UI', sans-serif; transition: all 0.3s ease-in-out; } #accordion1:hover { box-shadow: 0 6px 20px rgba(0,0,0,0.2); transform: scale(1.01); } #accordion1 h2 { color: #343a40; font-weight: 600; font-size: 1.2rem; } #accordion2{ background: linear-gradient(to right, #ffffff, #66b2b2); border: 2px solid #004c4c; border-radius: 12px; padding: 16px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); font-family: 'Segoe UI', sans-serif; transition: all 0.3s ease-in-out; } #accordion2:hover { box-shadow: 0 6px 20px rgba(0,0,0,0.2); transform: scale(1.01); } #accordion2 h2 { color: #343a40; font-weight: 600; font-size: 1.2rem; } #custom-dropdown select { background-color: #d5f2ee; border: 2px solid #008080; border-radius: 10px; padding: 10px 12px; font-size: 1rem; color: #004c4c; font-weight: 600; transition: all 0.3s ease-in-out; box-shadow: inset 0 1px 3px rgba(0,0,0,0.08); } #custom-dropdown select:focus { outline: none; border-color: #00a3a3; box-shadow: 0 0 8px rgba(0, 163, 163, 0.3); background-color: #d5f2ee; } #custom-dropdown label { font-weight: bold; color: #2e4053; margin-bottom: 6px; } #output { background: #fff0f5; border: 2px dashed #004c4c; border-radius: 14px; padding: 16px; font-size: 1.1rem; color: #4a235a; animation: floatX 4s ease-in-out infinite; box-shadow: 0 4px 8px rgba(214, 51, 132, 0.15); transition: all 0.3s ease-in-out; } #output:hover { background: #ffe0ec; transform: scale(1.01); cursor: default; } @keyframes float { 0% { transform: translateY(0px); } 50% { transform: translateY(-10px); } 100% { transform: translateY(0px); } } @keyframes floatX { 0% { transform: translateX(0); } 50% { transform: translateX(15px); } 100% { transform: translateX(0); } } @keyframes pulseColor { 0% { color: #ff4081; } 50% { color: #7e57c2; } 100% { color: #42a5f5; } } @keyframes backgroundMove { 0% { background-position: 0% 50%; } 100% { background-position: 100% 50%; } } @keyframes bounce { 0% { transform: scale(1); } 50% { transform: scale(1.1); } 100% { transform: scale(1); } } .gr-file label[for^=component-] { background-color: #ffe6f0; border: 2px solid #006666; border-radius: 12px; padding: 10px; transition: all 0.3s ease-in-out; box-shadow: 0 4px 10px rgba(255, 105, 180, 0.2); font-weight: bold; color: #4a235a; font-size: 1rem; text-align: center; } .gr-file label[for^=component-]:hover { background-color: #f8bbd0; transform: scale(1.03); box-shadow: 0 6px 12px rgba(255, 105, 180, 0.3); cursor: pointer; } .gr-audio { border: 2px solid #006666; background-color: ##b2d8d8; border-radius: 16px; padding: left 12px; animation: pulseColor 3s infinite; transition: transform 0.3s ease; } .gr-audio:hover { transform: scale(1.02); box-shadow: 0 6px 12px rgba(171, 71, 188, 0.2); } """) as demo: gr.Markdown('

🎬 Comic Generator

') gr.Markdown("Describe your story one scene at a time, with AI-generated images, summaries, and explanations.\nPut your own GOOGLE API KEY (named as GOOGLE_API_KEY) in the SECRET VARIABLE.") with gr.Accordion("🧩 Story Setup", open=True, elem_id="accordion"): title = gr.Textbox(label="📖 Story Title", placeholder="e.g. The Enchanted Forest",elem_id="textarea") scene_total = gr.Number(label="🔢 Number of Scenes", precision=0, value=3, elem_id="input") theme = gr.Textbox(label="🌍 Global Theme", placeholder="e.g. A magical forest full of glowing creatures", elem_id="textarea") gr.Markdown("### ✨ Describe Your Next Scene", elem_id="accordion1") with gr.Group(elem_id = "accordion"): with gr.Row(elem_classes="custom-rows"): char_count = gr.Number(label="👥 Number of Characters", precision=0, value=2, elem_id="input") character_names = gr.Textbox(label="🧙‍♂️ Character Names", elem_id="textarea") with gr.Row(elem_classes="custom-rows"): dialogue = gr.Textbox(label="💬 Dialogue (optional)", placeholder="e.g. 'Protect the forest!'", elem_id="textarea") dialogue_speaker = gr.Textbox(label="🗣️ Who says the dialogue?", placeholder="e.g. Bramble", elem_id = "textarea") with gr.Row(elem_classes="custom-rows"): char_styles = gr.Textbox(label="🧥 Outfit Descriptions", placeholder="e.g. Elora wears a leafy cloak, Bramble has a warrior vest", elem_id="textarea") char_moods = gr.Textbox(label="😠 Character Moods", placeholder="e.g. Elora is cautious, Bramble is brave", elem_id="textarea") bg_style = gr.Textbox(label="🎨 Background Style", placeholder="e.g. Realistic, Cartoon, Fantasy, Dark Fantasy" ,elem_id="textarea") with gr.Group(elem_id="accordion"): with gr.Row(elem_classes="custom-rows"): with gr.Row(elem_classes="custom-rows"): generate_btn = gr.Button("➕ Generate The Scene", elem_id="generate-btn") ai_coauthor_btn = gr.Button("🤖 Let AI Write This One", elem_id="ai-write-btn") with gr.Row(elem_classes="custom-rows"): recreate_btn = gr.Button("🔄 Recreate The Scene", elem_id="recreate-btn") reset_btn = gr.Button("\n⏭️ Reset", elem_id="reset-btn") with gr.Row(elem_classes="custom-rows"): recreate_scene_index = gr.Number(label="🔢 Scene Number to Recreate", precision=0, value=1, elem_id="recreate_scene_index") status = gr.Markdown(elem_classes="gr-image") image_output = gr.Image(label="🖼️ Scene Image", type="pil", elem_id="output") summary_output = gr.Markdown(label="📝 Scene Summary", elem_id = "output") explanation_output = gr.Textbox(label="📖 Scene Explanation", lines=6, elem_id="output") with gr.Group(elem_id="accordion"): with gr.Row(elem_classes="custom-rows"): tts_btn = gr.Button("🔊 Read Aloud", elem_id="tts-btn") tts_audio = gr.Audio(label="Audio", autoplay=False,elem_classes="gr-audio") done_btn = gr.Button("✅ Done", visible = False, elem_id="done-btn") with gr.Group(elem_id="accordion2"): with gr.Row(elem_classes="custom-rows"): txt_file = gr.File(label="📄 Explanations (.txt)") pdf_file = gr.File(label="📘 Scene PDF") # States scene_images = gr.State([]) scene_explanations = gr.State([]) scene_summaries = gr.State([]) current_scene_index = gr.State(0) recreate_mode = gr.State(True) generate_btn.click( fn=generate_scene, inputs=[ scene_total, theme, char_count, character_names, dialogue, dialogue_speaker, char_styles, char_moods, bg_style, scene_images, scene_summaries, scene_explanations, gr.State(False), current_scene_index ], outputs=[ image_output, summary_output, explanation_output, scene_images, scene_summaries, scene_explanations, status, done_btn ] ) recreate_btn.click( fn=recreate_scene_handler, inputs=[ scene_total, theme, char_count, character_names, dialogue, dialogue_speaker, char_styles, char_moods, bg_style, scene_images, scene_summaries, scene_explanations, recreate_scene_index ], outputs=[ image_output, summary_output, explanation_output, scene_images, scene_summaries, scene_explanations, status, done_btn ] ) ai_coauthor_btn.click( fn=ai_write_scene, inputs=[theme, scene_total, scene_summaries], outputs=[character_names, dialogue, dialogue_speaker, char_styles, char_moods, bg_style, status] ) done_btn.click( fn=finalize_story, inputs=[scene_images, scene_explanations, title], outputs=[txt_file, pdf_file] ) reset_btn.click( fn=reset_fields, inputs=[], outputs=[ character_names, dialogue, dialogue_speaker, char_styles, char_moods, bg_style ] ) tts_btn.click( fn=text_to_speech, inputs=[explanation_output], outputs=[tts_audio] ) demo.launch()