Spaces:

fizzarif7
/

voice2comic

Sleeping

App Files Files Community

voice2comic / app.py

fizzarif7

Update app.py

e306d8d verified 7 months ago

raw

history blame contribute delete

20.8 kB

	import os
	import gradio as gr
	from PIL import Image
	from io import BytesIO
	from dotenv import load_dotenv
	from gtts import gTTS
	import tempfile
	import traceback
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as RLImage
	from reportlab.lib.pagesizes import letter
	from reportlab.lib.styles import getSampleStyleSheet
	import re
	import google.generativeai as genai


	# Load API keys
	load_dotenv()
	api_key = os.getenv("GOOGLE_API_KEY")


	if not api_key:
	raise EnvironmentError("Missing API keys. Check GOOGLE_API_KEY in .env.")

	# Configure models
	genai.configure(api_key=api_key)
	text_model = genai.GenerativeModel("gemini-1.5-flash")
	image_model = genai.GenerativeModel(
	model_name="gemini-2.0-flash-preview-image-generation",
	generation_config={"response_modalities": ["TEXT", "IMAGE"]}
	)


	# -------------------- Utility Functions --------------------
	def generate_image_from_text(prompt):
	try:
	response = image_model.generate_content(prompt)
	for part in response.candidates[0].content.parts:
	if hasattr(part, 'inline_data') and part.inline_data.mime_type.startswith("image/"):
	return Image.open(BytesIO(part.inline_data.data))
	except Exception as e:
	print("Image generation error:", e)
	return None

	def summarize_scene(scene_text):
	try:
	response = text_model.generate_content(f"Summarize this scene in one sentence: {scene_text}")
	return response.text.strip()
	except Exception as e:
	print("Summary error:", e)
	return "Summary unavailable."

	def explain_scene(image):
	try:
	response = text_model.generate_content([image, "Explain this image scene in detail."])
	return response.text.strip()
	except:
	return "Explanation unavailable."

	def text_to_speech(text):
	try:
	tts = gTTS(text)
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(tmp.name)
	return tmp.name
	except Exception as e:
	print("TTS error:", e)
	return None

	def generate_pdf(images, explanations, title="AI-Generated Story Scenes"):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	doc = SimpleDocTemplate(tmp.name, pagesize=letter)
	styles = getSampleStyleSheet()
	story = [Paragraph(title, styles["Title"]), Spacer(1, 12)]


	for i, img in enumerate(images):
	if img:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_tmp:
	img.save(img_tmp.name)
	story += [
	RLImage(img_tmp.name, width=400, height=300),
	Spacer(1, 12),
	Paragraph(f"Scene {i + 1} Explanation", styles["Heading3"]),
	Paragraph(explanations[i], styles["BodyText"]),
	Spacer(1, 24),
	]
	doc.build(story)
	return tmp.name
	def reset_fields():
	return "", "", "", "", "", "Fantasy"

	def recreate_scene_handler(
	num_scenes, theme, char_count, character_names, dialogue,
	dialogue_speaker, char_styles, char_moods, bg_style,
	images, summaries, explanations, scene_number_to_recreate):

	index = int(scene_number_to_recreate) - 1
	if index < 0 or index >= len(images):
	return (
	None, "", "❌ Invalid input. You have not generated Scene {} yet.".format(scene_number_to_recreate),
	images, summaries, explanations,
	f"⚠️ Scene {scene_number_to_recreate} is not available. Generate it first.", gr.update(visible=False)
	)

	return generate_scene(
	num_scenes, theme, char_count, character_names, dialogue,
	dialogue_speaker, char_styles, char_moods, bg_style,
	images, summaries, explanations,
	recreate_mode=True, current_index=index
	)



	# -------------------- Core Logic --------------------

	def generate_scene(num_scenes, theme, char_count, character_names, dialogue,
	dialogue_speaker, char_styles, char_moods, bg_style,
	images, summaries, explanations, recreate_mode=False, current_index=0):

	prompt = (
	f"A {bg_style}-style illustration for Scene {current_index + 1} with {char_count} characters in a '{theme}' setting. "
	f"Characters: {character_names}. They are dressed as: {char_styles}. Current mood: {char_moods}. "
	)
	if dialogue:
	prompt += f'The character "{dialogue_speaker}" says: "{dialogue}". Display this in a speech bubble.'
	prompt += f' Please visualize this as a speech bubble above {dialogue_speaker}, like in a cartoon.'

	image = generate_image_from_text(prompt)
	summary = summarize_scene(prompt)
	explanation = explain_scene(image) if image else "Explanation unavailable."

	if recreate_mode:
	images[current_index] = image
	summaries[current_index] = summary
	explanations[current_index] = explanation
	else:
	if len(images) >= int(num_scenes):
	return gr.update(), gr.update(), gr.update(), images, summaries, explanations, \
	f"✅ All {num_scenes} scenes have been generated.", gr.update(visible=True)
	images.append(image)
	summaries.append(summary)
	explanations.append(explanation)

	status = f"✅ Scene {current_index + 1} {'recreated' if recreate_mode else 'generated'}."
	done_visible = len(images) == int(num_scenes)

	return image, summary, explanation, images, summaries, explanations, status, gr.update(visible=done_visible)

	def finalize_story(images, explanations, title):
	if not images or not explanations:
	return None, None

	pdf = generate_pdf(images, explanations,title)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") as txt:
	for i, exp in enumerate(explanations):
	txt.write(f"Scene {i+1}:\n{exp}\n\n")
	txt_path = txt.name

	return txt_path, pdf

	def ai_write_scene(theme, total_scenes, scene_summaries):
	try:
	scene_index = len(scene_summaries) + 1
	if scene_index > int(total_scenes):
	return "", "", "", "", "", "Fantasy", f"✅ All {total_scenes} scenes completed."

	story_so_far = "\n".join([f"Scene {i+1}: {s}" for i, s in enumerate(scene_summaries)]) if scene_summaries else ""
	prompt = f"""
	You are co-writing a story set in this theme: "{theme}".
	Generate Scene {scene_index} of {total_scenes}.
	Continue the story logically based on previous scenes (if any):

	{story_so_far}

	Return:
	1. Character names (existing or new),
	2. A single dialogue line,
	3. The speaker of that dialogue,
	4. Outfits worn,
	5. Characters' emotional moods,
	6. Background style (choose from: Realistic, Cartoon, Fantasy, Dark Fantasy).
	Only provide the raw values, no headers.
	"""
	response = text_model.generate_content(prompt).text.strip()
	parts = [re.sub(r"^\d+\.\s*", "", line.strip()) for line in response.split("\n") if line.strip()]
	char_names = parts[0] if len(parts) > 0 else ""
	dialogue = parts[1] if len(parts) > 1 else ""
	speaker = parts[2] if len(parts) > 2 else ""
	outfits = parts[3] if len(parts) > 3 else ""
	moods = parts[4] if len(parts) > 4 else ""
	bg_style = parts[5] if len(parts) > 5 else "Fantasy"

	return char_names, dialogue, speaker, outfits, moods, bg_style, f"📝 Scene {scene_index} ready to generate."
	except Exception as e:
	print("AI write error:", e)
	return "", "", "", "", "", "Fantasy", "⚠️ AI scene generation failed."


	# -------------------- UI --------------------


	with gr.Blocks(
	title="Comic Creator", css="""
	body {
	background-color: #ffffff;
	}

	.gradio-container {
	max-width: 1000px;
	margin: 2rem auto;
	padding: 32px;
	background: #d5f2ee;
	border-radius: 20px;
	box-shadow: 0 10px 40px rgba(0, 0, 0, 0.08);
	font-family: 'Segoe UI', sans-serif;
	}

	.gr-markdown h2, .gr-markdown h3 {
	color: #2e4053;
	}

	.gr-button {
	border-radius: 10px;
	font-weight: bold;
	padding: 12px 24px;
	transition: all 0.4s ease;
	box-shadow: 0 4px 10px rgba(0,0,0,0.15);
	}

	#generate-btn,
	#ai-write-btn,
	#recreate-btn,
	#reset-btn,
	#done-btn,
	#recreate-scene-index {
	background-color: #66b2b2;
	color: white;
	border: 1px solid #000000;
	border-radius:10px;
	margin:12px;
	}

	#generate-btn, #ai-write-btn{
	display: flex;
	justify-content: center;
	gap: 10px;
	}

	#recreate-btn,#reset-btn, #done-btn {
	display: flex;
	justify-content: center;
	gap: 10px;
	}
	#tts-btn{
	background-color: #66b2b2;
	color: white;
	border: 1px solid #000000;
	border-radius:10px;
	width:700px;
	height:80px;
	}


	#generate-btn:hover,
	#ai-write-btn:hover,
	#recreate-btn:hover,
	#reset-btn:hover,
	#done-btn:hover,
	#tts-btn:hover{
	background: #008080 ;
	transform: scale(1.05);
	color: #000;
	cursor: pointer;
	border-radius:10px;
	}


	input, textarea, select {
	border-radius: 8px ;
	border: 1px solid #004c4c;
	padding: 10px !important;
	background-color: #d5f2ee;
	box-shadow: inset 0 1px 3px rgba(0,0,0,0.05);
	transition: all 0.2s ease-in-out;
	}
	input:focus, textarea:focus, select:focus {
	border-color: #004c4c !important;
	box-shadow: 0 0 6px rgba(255, 105, 180, 0.3);
	outline: none;
	}

	.custom-rows{
	background: #e1f5f5;
	padding: 10px;
	border-radius: 8px;
	}

	/* 🎆 Scene Image Border */
	.gr-image img {
	border: 4px dashed #004c4c;
	border-radius: 16px;
	padding: 4px;
	}

	.gr-accordion {
	background-color: #d4a373;
	border: 1px solid #004c4c;
	border-radius: 12px;
	padding: 12px;
	margin-bottom: 12px;
	}

	.gr-accordion .gr-box {
	background-color: #d4a373;
	border-radius: 10px;
	padding: 12px;
	}

	.gr-image, .gr-audio, .gr-file {
	border: 1px solid #d6eaf8;
	border-radius: 12px;
	background-color: #d1e8e8;
	padding: 12px;
	}

	.animated-title {
	font-size: 3rem;
	font-weight: bold;
	text-align: center;
	color: #147d5a;
	animation: float 3s ease-in-out infinite;
	text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
	margin-bottom: 1.5rem;
	font-family: 'Comic Sans MS', cursive, sans-serif;
	}

	#accordion {
	background: linear-gradient(to right, #ffffff, #66b2b2);
	border: 2px solid #004c4c;
	border-radius: 12px;
	padding: 16px;
	box-shadow: 0 4px 12px rgba(0,0,0,0.1);
	font-family: 'Segoe UI', sans-serif;
	transition: all 0.3s ease-in-out;
	}

	#accordion:hover {
	box-shadow: 0 6px 20px rgba(0,0,0,0.2);
	transform: scale(1.01);
	}

	#accordion h2 {
	color: #343a40;
	font-weight: 600;
	font-size: 1.2rem;
	font-color: black;
	}
	#accordion1{
	background: linear-gradient(to right, #ffffff, #66b2b2);
	border: 2px solid #004c4c;
	border-radius: 12px;
	padding: 16px;
	box-shadow: 0 4px 12px rgba(0,0,0,0.1);
	font-family: 'Segoe UI', sans-serif;
	transition: all 0.3s ease-in-out;
	}

	#accordion1:hover {
	box-shadow: 0 6px 20px rgba(0,0,0,0.2);
	transform: scale(1.01);
	}

	#accordion1 h2 {
	color: #343a40;
	font-weight: 600;
	font-size: 1.2rem;

	}
	#accordion2{
	background: linear-gradient(to right, #ffffff, #66b2b2);
	border: 2px solid #004c4c;
	border-radius: 12px;
	padding: 16px;
	box-shadow: 0 4px 12px rgba(0,0,0,0.1);
	font-family: 'Segoe UI', sans-serif;
	transition: all 0.3s ease-in-out;
	}

	#accordion2:hover {
	box-shadow: 0 6px 20px rgba(0,0,0,0.2);
	transform: scale(1.01);
	}

	#accordion2 h2 {
	color: #343a40;
	font-weight: 600;
	font-size: 1.2rem;
	}
	#custom-dropdown select {
	background-color: #d5f2ee;
	border: 2px solid #008080;
	border-radius: 10px;
	padding: 10px 12px;
	font-size: 1rem;
	color: #004c4c;
	font-weight: 600;
	transition: all 0.3s ease-in-out;
	box-shadow: inset 0 1px 3px rgba(0,0,0,0.08);
	}

	#custom-dropdown select:focus {
	outline: none;
	border-color: #00a3a3;
	box-shadow: 0 0 8px rgba(0, 163, 163, 0.3);
	background-color: #d5f2ee;
	}

	#custom-dropdown label {
	font-weight: bold;
	color: #2e4053;
	margin-bottom: 6px;
	}

	#output {
	background: #fff0f5;
	border: 2px dashed #004c4c;
	border-radius: 14px;
	padding: 16px;
	font-size: 1.1rem;
	color: #4a235a;
	animation: floatX 4s ease-in-out infinite;
	box-shadow: 0 4px 8px rgba(214, 51, 132, 0.15);
	transition: all 0.3s ease-in-out;
	}

	#output:hover {
	background: #ffe0ec;
	transform: scale(1.01);
	cursor: default;
	}



	@keyframes float {
	0% { transform: translateY(0px); }
	50% { transform: translateY(-10px); }
	100% { transform: translateY(0px); }
	}

	@keyframes floatX {
	0% { transform: translateX(0); }
	50% { transform: translateX(15px); }
	100% { transform: translateX(0); }
	}

	@keyframes pulseColor {
	0% { color: #ff4081; }
	50% { color: #7e57c2; }
	100% { color: #42a5f5; }
	}

	@keyframes backgroundMove {
	0% { background-position: 0% 50%; }
	100% { background-position: 100% 50%; }
	}
	@keyframes bounce {
	0% { transform: scale(1); }
	50% { transform: scale(1.1); }
	100% { transform: scale(1); }
	}

	.gr-file label[for^=component-] {
	background-color: #ffe6f0;
	border: 2px solid #006666;
	border-radius: 12px;
	padding: 10px;
	transition: all 0.3s ease-in-out;
	box-shadow: 0 4px 10px rgba(255, 105, 180, 0.2);
	font-weight: bold;
	color: #4a235a;
	font-size: 1rem;
	text-align: center;
	}

	.gr-file label[for^=component-]:hover {
	background-color: #f8bbd0;
	transform: scale(1.03);
	box-shadow: 0 6px 12px rgba(255, 105, 180, 0.3);
	cursor: pointer;
	}

	.gr-audio {
	border: 2px solid #006666;
	background-color: ##b2d8d8;
	border-radius: 16px;
	padding: left 12px;
	animation: pulseColor 3s infinite;
	transition: transform 0.3s ease;

	}

	.gr-audio:hover {
	transform: scale(1.02);
	box-shadow: 0 6px 12px rgba(171, 71, 188, 0.2);
	}



	""") as demo:
	gr.Markdown('<h1 class="animated-title">🎬 Comic Generator</h1>')
	gr.Markdown("Describe your story one scene at a time, with AI-generated images, summaries, and explanations.\nPut your own GOOGLE API KEY (named as GOOGLE_API_KEY) in the SECRET VARIABLE.")

	with gr.Accordion("🧩 Story Setup", open=True, elem_id="accordion"):
	title = gr.Textbox(label="📖 Story Title", placeholder="e.g. The Enchanted Forest",elem_id="textarea")
	scene_total = gr.Number(label="🔢 Number of Scenes", precision=0, value=3, elem_id="input")
	theme = gr.Textbox(label="🌍 Global Theme", placeholder="e.g. A magical forest full of glowing creatures", elem_id="textarea")

	gr.Markdown("### ✨ Describe Your Next Scene", elem_id="accordion1")

	with gr.Group(elem_id = "accordion"):
	with gr.Row(elem_classes="custom-rows"):
	char_count = gr.Number(label="👥 Number of Characters", precision=0, value=2, elem_id="input")
	character_names = gr.Textbox(label="🧙‍♂️ Character Names", elem_id="textarea")
	with gr.Row(elem_classes="custom-rows"):
	dialogue = gr.Textbox(label="💬 Dialogue (optional)", placeholder="e.g. 'Protect the forest!'", elem_id="textarea")
	dialogue_speaker = gr.Textbox(label="🗣️ Who says the dialogue?", placeholder="e.g. Bramble", elem_id = "textarea")

	with gr.Row(elem_classes="custom-rows"):
	char_styles = gr.Textbox(label="🧥 Outfit Descriptions", placeholder="e.g. Elora wears a leafy cloak, Bramble has a warrior vest", elem_id="textarea")
	char_moods = gr.Textbox(label="😠 Character Moods", placeholder="e.g. Elora is cautious, Bramble is brave", elem_id="textarea")
	bg_style = gr.Textbox(label="🎨 Background Style", placeholder="e.g. Realistic, Cartoon, Fantasy, Dark Fantasy" ,elem_id="textarea")
	with gr.Group(elem_id="accordion"):
	with gr.Row(elem_classes="custom-rows"):
	with gr.Row(elem_classes="custom-rows"):
	generate_btn = gr.Button("➕ Generate The Scene", elem_id="generate-btn")
	ai_coauthor_btn = gr.Button("🤖 Let AI Write This One", elem_id="ai-write-btn")
	with gr.Row(elem_classes="custom-rows"):
	recreate_btn = gr.Button("🔄 Recreate The Scene", elem_id="recreate-btn")
	reset_btn = gr.Button("\n⏭️ Reset", elem_id="reset-btn")
	with gr.Row(elem_classes="custom-rows"):
	recreate_scene_index = gr.Number(label="🔢 Scene Number to Recreate", precision=0, value=1, elem_id="recreate_scene_index")

	status = gr.Markdown(elem_classes="gr-image")
	image_output = gr.Image(label="🖼️ Scene Image", type="pil", elem_id="output")
	summary_output = gr.Markdown(label="📝 Scene Summary", elem_id = "output")
	explanation_output = gr.Textbox(label="📖 Scene Explanation", lines=6, elem_id="output")
	with gr.Group(elem_id="accordion"):
	with gr.Row(elem_classes="custom-rows"):
	tts_btn = gr.Button("🔊 Read Aloud", elem_id="tts-btn")
	tts_audio = gr.Audio(label="Audio", autoplay=False,elem_classes="gr-audio")

	done_btn = gr.Button("✅ Done", visible = False, elem_id="done-btn")

	with gr.Group(elem_id="accordion2"):
	with gr.Row(elem_classes="custom-rows"):
	txt_file = gr.File(label="📄 Explanations (.txt)")
	pdf_file = gr.File(label="📘 Scene PDF")

	# States
	scene_images = gr.State([])
	scene_explanations = gr.State([])
	scene_summaries = gr.State([])
	current_scene_index = gr.State(0)
	recreate_mode = gr.State(True)

	generate_btn.click(
	fn=generate_scene,
	inputs=[
	scene_total, theme, char_count, character_names, dialogue,
	dialogue_speaker, char_styles, char_moods, bg_style,
	scene_images, scene_summaries, scene_explanations,
	gr.State(False), current_scene_index
	],
	outputs=[
	image_output, summary_output, explanation_output,
	scene_images, scene_summaries, scene_explanations,
	status, done_btn
	]
	)


	recreate_btn.click(
	fn=recreate_scene_handler,
	inputs=[
	scene_total, theme, char_count, character_names, dialogue,
	dialogue_speaker, char_styles, char_moods, bg_style,
	scene_images, scene_summaries, scene_explanations,
	recreate_scene_index
	],
	outputs=[
	image_output, summary_output, explanation_output,
	scene_images, scene_summaries, scene_explanations,
	status, done_btn
	]
	)



	ai_coauthor_btn.click(
	fn=ai_write_scene,
	inputs=[theme, scene_total, scene_summaries],
	outputs=[character_names, dialogue, dialogue_speaker, char_styles, char_moods, bg_style, status]
	)

	done_btn.click(
	fn=finalize_story,
	inputs=[scene_images, scene_explanations, title],
	outputs=[txt_file, pdf_file]
	)

	reset_btn.click(
	fn=reset_fields,
	inputs=[],
	outputs=[
	character_names,
	dialogue,
	dialogue_speaker,
	char_styles,
	char_moods,
	bg_style
	]
	)


	tts_btn.click(
	fn=text_to_speech,
	inputs=[explanation_output],
	outputs=[tts_audio]
	)



	demo.launch()