fizzarif7 commited on
Commit
19b0461
Β·
verified Β·
1 Parent(s): 8e59407

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +186 -0
app.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import math
4
+ import gradio as gr
5
+ from dotenv import load_dotenv
6
+ from PIL import Image
7
+ from io import BytesIO
8
+ from gtts import gTTS
9
+ import tempfile
10
+ import traceback
11
+
12
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as RLImage
13
+ from reportlab.lib.pagesizes import letter
14
+ from reportlab.lib.styles import getSampleStyleSheet
15
+
16
+ import google.generativeai as genai
17
+ from huggingface_hub import InferenceClient
18
+
19
+ # Load environment variables
20
+ load_dotenv()
21
+ api_key = os.getenv("GOOGLE_API_KEY")
22
+ hf_token = os.getenv("HF_token")
23
+
24
+ if api_key is None or hf_token is None:
25
+ raise ValueError("Missing API keys: Check GOOGLE_API_KEY and HF_TOKEN in secrets.")
26
+
27
+ # Configure APIs
28
+ genai.configure(api_key=api_key)
29
+ text_model = genai.GenerativeModel(model_name="gemini-1.5-flash")
30
+ image_model = genai.GenerativeModel(
31
+ model_name="gemini-2.0-flash-preview-image-generation",
32
+ generation_config={"response_modalities": ["TEXT", "IMAGE"]}
33
+ )
34
+ hf_client = InferenceClient(token=hf_token)
35
+
36
+ # Utility Functions
37
+ def simple_sentence_tokenize(text):
38
+ return [s for s in re.split(r'(?<=[.!?])\s+', text.strip()) if s]
39
+
40
+ def divide_into_scenes(paragraph):
41
+ sentences = simple_sentence_tokenize(paragraph)
42
+ count = len(sentences)
43
+ per_scene = math.ceil(count / 4)
44
+ return [' '.join(sentences[i:i + per_scene]) for i in range(0, count, per_scene)]
45
+
46
+ def generate_image_from_text(prompt):
47
+ try:
48
+ response = image_model.generate_content(prompt)
49
+ for part in response.candidates[0].content.parts:
50
+ if hasattr(part, 'inline_data') and part.inline_data.mime_type.startswith("image/"):
51
+ return Image.open(BytesIO(part.inline_data.data))
52
+ except Exception as e:
53
+ print(f"Image gen error: {e}")
54
+ return None
55
+
56
+ def summarize_scene(scene_text):
57
+ try:
58
+ response = text_model.generate_content(f"Summarize this scene in one sentence: {scene_text}")
59
+ return response.text.strip()
60
+ except Exception as e:
61
+ print(f"Summary error: {e}")
62
+ return "Scene summary unavailable."
63
+
64
+ def explain_scene(image):
65
+ try:
66
+ response = text_model.generate_content([image, "Explain this image scene in detail."])
67
+ return response.text.strip()
68
+ except Exception:
69
+ try:
70
+ buffered = BytesIO()
71
+ image.save(buffered, format="PNG")
72
+ buffered.seek(0)
73
+ result = hf_client.image_to_text(
74
+ model="Salesforce/blip-image-captioning-base",
75
+ image=buffered
76
+ )
77
+ return result.strip()
78
+ except Exception:
79
+ traceback.print_exc()
80
+ return "Explanation unavailable."
81
+
82
+ def text_to_speech(text):
83
+ try:
84
+ tts = gTTS(text)
85
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
86
+ tts.save(tmp.name)
87
+ return tmp.name
88
+ except Exception as e:
89
+ print(f"TTS error: {e}")
90
+ return None
91
+
92
+ def generate_pdf(images, explanations):
93
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
94
+ doc = SimpleDocTemplate(tmp.name, pagesize=letter)
95
+ styles = getSampleStyleSheet()
96
+ story = [Paragraph("AI Story Scenes", styles["Title"]), Spacer(1, 12)]
97
+
98
+ for i, img in enumerate(images):
99
+ if img:
100
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as img_tmp:
101
+ img.save(img_tmp.name)
102
+ story.extend([
103
+ RLImage(img_tmp.name, width=400, height=300),
104
+ Spacer(1, 12),
105
+ Paragraph(f"Scene {i+1} Explanation", styles["Heading3"]),
106
+ Paragraph(explanations[i], styles["BodyText"]),
107
+ Spacer(1, 24),
108
+ ])
109
+ doc.build(story)
110
+ return tmp.name
111
+
112
+ def generate_story_outputs(story_text):
113
+ scenes = divide_into_scenes(story_text)
114
+ results = []
115
+
116
+ for i, scene in enumerate(scenes):
117
+ print(f"Generating Scene {i+1}")
118
+ image = generate_image_from_text(f"Scene {i+1}: {scene}")
119
+ caption = summarize_scene(scene)
120
+ explanation = explain_scene(image) if image else "Explanation unavailable."
121
+ results.append((image, caption, explanation))
122
+
123
+ return results
124
+
125
+ # Gradio Interface
126
+ def generate_for_gradio(story_text):
127
+ results = generate_story_outputs(story_text)
128
+ all_images, all_captions, all_explanations = [], [], []
129
+
130
+ for img, cap, expl in results:
131
+ all_images.append(img)
132
+ all_captions.append(cap)
133
+ all_explanations.append(expl)
134
+
135
+ # Save explanations as TXT
136
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") as txtfile:
137
+ for i, expl in enumerate(all_explanations):
138
+ txtfile.write(f"Scene {i+1}:\n{expl}\n\n")
139
+ txt_path = txtfile.name
140
+
141
+ # Create PDF with images and explanations
142
+ pdf_path = generate_pdf(all_images, all_explanations)
143
+
144
+ return all_images + all_captions + all_explanations + [txt_path, pdf_path]
145
+
146
+ def play_tts(text):
147
+ audio_path = text_to_speech(text)
148
+ return audio_path
149
+
150
+ with gr.Blocks(title="Story to AI Scene Images") as demo:
151
+ gr.Markdown("## πŸ“–πŸ–ΌοΈ AI Story Scene Generator\nEnter a short story and get AI-generated scenes, captions, explanations, audio, and downloads.")
152
+
153
+ input_text = gr.Textbox(lines=10, label="Enter your story")
154
+ generate_btn = gr.Button("πŸš€ Generate Scenes")
155
+
156
+ image_blocks = []
157
+ captions, explanations, tts_buttons, tts_outputs = [], [], [], []
158
+
159
+ with gr.Row():
160
+ for i in range(4):
161
+ with gr.Column():
162
+ img = gr.Image(label=f"Scene {i+1}", show_label=False, type="pil")
163
+ cap = gr.Markdown()
164
+ expl = gr.Textbox(label="Detailed Explanation", lines=8, interactive=False)
165
+ tts_btn = gr.Button("πŸ”Š Read Aloud")
166
+ tts_audio = gr.Audio(label="Audio", autoplay=False)
167
+
168
+ image_blocks.append(img)
169
+ captions.append(cap)
170
+ explanations.append(expl)
171
+ tts_buttons.append(tts_btn)
172
+ tts_outputs.append(tts_audio)
173
+
174
+ txt_download = gr.File(label="πŸ“„ Download Explanations (.txt)")
175
+ pdf_download = gr.File(label="πŸ“˜ Download Story PDF")
176
+
177
+ generate_btn.click(
178
+ fn=generate_for_gradio,
179
+ inputs=[input_text],
180
+ outputs=image_blocks + captions + explanations + [txt_download, pdf_download]
181
+ )
182
+
183
+ for btn, txt, audio in zip(tts_buttons, explanations, tts_outputs):
184
+ btn.click(fn=play_tts, inputs=[txt], outputs=[audio])
185
+
186
+ demo.launch()