J1mb0o commited on
Commit
0dd2051
·
verified ·
1 Parent(s): f2da41d

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +123 -0
  2. generator.py +177 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ from dotenv import load_dotenv
5
+ from PIL import Image
6
+ from generator import caption_generator, generate_story, generate_image, decode_image
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ # Debug placeholder image URL
12
+ DEBUG_IMAGE_URL = "https://picsum.photos/1024"
13
+
14
+ # Set page configuration
15
+ st.set_page_config(
16
+ page_title="AI Story Co-Creation",
17
+ page_icon="📚",
18
+ layout="centered"
19
+ )
20
+
21
+ def main():
22
+ # Title and description
23
+ st.title("📚 AI Interactive Story Co-Creation")
24
+ st.markdown("Upload an image and let's create a story together!")
25
+ st.warning("⚠️ Please ensure your uploaded image is appropriate and safe for work (SFW). NSFW content is not permitted. Examples of prohibited content include: nudity, violence, gore, abuse, drugs, explicit content, or other inappropriate material.")
26
+ # Sidebar for mood selection
27
+ with st.sidebar:
28
+ st.header("Story Settings")
29
+ mood = st.selectbox(
30
+ "Choose the mood for your story:",
31
+ [
32
+ "Adventure",
33
+ "Dark fantasy",
34
+ "Dystopian",
35
+ "Fantasy",
36
+ "Science fiction",
37
+ "Romance",
38
+ "Horror",
39
+ "Thriller",
40
+ "Post-apocalyptic"
41
+ ]
42
+ )
43
+
44
+ # Debug mode toggle
45
+ # debug_mode = st.checkbox("Debug Mode (Skip API calls)", value=True)
46
+ debug_mode = False
47
+
48
+ # Image upload section
49
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
50
+
51
+ if uploaded_file is not None:
52
+ # Display uploaded image
53
+ image = Image.open(uploaded_file)
54
+ st.image(image, caption="Uploaded Image", use_container_width=True)
55
+
56
+ # Generate caption button
57
+ if st.button("Generate Caption"):
58
+ with st.spinner("Generating caption..."):
59
+ try:
60
+ if debug_mode:
61
+ time.sleep(2) # Simulate API call
62
+ caption = "Debug mode: This is a sample caption for the uploaded image."
63
+ else:
64
+ image_path = f"temp_{uploaded_file.name}"
65
+ image.save(image_path)
66
+ caption = caption_generator(image_path)
67
+ os.remove(image_path)
68
+
69
+ if caption:
70
+ st.session_state['caption'] = caption
71
+ st.success("Caption generated!")
72
+ except Exception as e:
73
+ st.error(f"Error generating caption: {str(e)}")
74
+
75
+ # Caption editing section
76
+ if 'caption' in st.session_state:
77
+ st.markdown("### Edit Caption")
78
+ edited_caption = st.text_area(
79
+ "You can edit the caption before generating the story:",
80
+ value=st.session_state['caption'],
81
+ height=100
82
+ )
83
+ st.session_state['caption'] = edited_caption
84
+
85
+ # Story generation
86
+ if st.button("Generate Story"):
87
+ with st.spinner("Creating your story and image..."):
88
+ try:
89
+ if debug_mode:
90
+ time.sleep(3) # Simulate API call
91
+ story = f"Debug mode: This is a sample story based on the caption: '{edited_caption}' with {mood} mood."
92
+ else:
93
+ story = generate_story(edited_caption, mood.lower())
94
+
95
+ if story:
96
+ st.session_state['story'] = story
97
+ st.markdown("### Your Story")
98
+ st.write(story)
99
+
100
+ # Generate and display story image
101
+ # with st.spinner("Generating story image..."):
102
+ if debug_mode:
103
+ time.sleep(2) # Simulate API call
104
+ st.info("Debug mode: Using placeholder image")
105
+ st.image(DEBUG_IMAGE_URL, caption="Debug Story Illustration", use_container_width=True)
106
+ else:
107
+ image_url = generate_image(story)
108
+ image = decode_image(image_url)
109
+ if image_url:
110
+ st.image(image=image, caption="Story Illustration", use_container_width=True)
111
+ except Exception as e:
112
+ st.error(f"Error generating story: {str(e)}")
113
+
114
+ # Feedback section
115
+ st.markdown("### Share Your Feedback")
116
+ st.markdown("""
117
+ We'd love to hear your thoughts on the Story Generator!
118
+ Please take a moment to fill out our feedback form.
119
+ """)
120
+ st.markdown("[📝 Share Your Feedback](https://docs.google.com/forms/d/e/1FAIpQLScgAXLTfVUQyMm4EjDXYE0Kw7XdcdajKX3L4FPWeGF2X54b_A/viewform?usp=header)", unsafe_allow_html=True)
121
+
122
+ if __name__ == "__main__":
123
+ main()
generator.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import base64
3
+ from PIL import Image
4
+ from io import BytesIO
5
+ import streamlit as st
6
+
7
+
8
+ openai_api_key = st.secrets['openai']["OPENAI_API_KEY"]
9
+ client = OpenAI()
10
+
11
+
12
+ def encode_image(image_path):
13
+ with open(image_path, "rb") as image_file:
14
+ encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
15
+ return encoded_string
16
+
17
+
18
+ def decode_image(encoded_string, debug=False):
19
+ decoded_image = base64.b64decode(encoded_string)
20
+ if debug:
21
+
22
+ with open("decoded_image.jpg", "wb") as image_file:
23
+ image_file.write(base64.b64decode(encoded_string))
24
+
25
+ image = Image.open(BytesIO(decoded_image))
26
+ return image
27
+
28
+
29
+ def caption_generator(image_path, debug=False):
30
+ response = client.chat.completions.create(
31
+ model="gpt-4o-mini",
32
+ messages=[
33
+ {
34
+ "role": "system",
35
+ "content": [
36
+ {
37
+ "type": "text",
38
+ "text": "Analyze an image to create a descriptive caption that accurately conveys the essence and details, using no more than 20 words. \n\n# Steps\n\n1. Examine the image thoroughly, identifying key elements, subjects, actions, and emotions.\n2. Determine the central theme or story conveyed by the image.\n3. Select the most notable and characteristic elements to include in the caption.\n4. Construct a concise, vibrant caption that visually describes the scene.\n\n# Output Format\n\n- A single text sentence with a maximum of 20 words.\n\n# Notes\n\n- Focus on clarity and vivid imagery in the wording.\n- The caption should reflect the most significant or interesting aspect of the image.",
39
+ }
40
+ ],
41
+ },
42
+ {
43
+ "role": "user",
44
+ "content": [
45
+ {
46
+ "type": "image_url",
47
+ "image_url": {
48
+ "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
49
+ },
50
+ }
51
+ ],
52
+ },
53
+ ],
54
+ response_format={"type": "text"},
55
+ temperature=1,
56
+ max_completion_tokens=2048,
57
+ top_p=1,
58
+ frequency_penalty=0,
59
+ presence_penalty=0,
60
+ )
61
+ if debug:
62
+ with open("tests/caption_generator.md", "w") as f:
63
+ f.write(response.choices[0].message.content)
64
+ return response.choices[0].message.content
65
+
66
+
67
+ def generate_story(caption, mood, debug=False):
68
+
69
+ mood_dictionary = {
70
+ "fantasy": "A high-fantasy setting inspired by works like The Lord of the Rings or The Witcher, featuring epic quests, mythical creatures, ancient magic, and richly detailed worlds.",
71
+ "science fiction": "A futuristic and imaginative setting with advanced technology, space exploration, alien encounters, and themes like artificial intelligence and intergalactic conflict.",
72
+ "romance": "A heartfelt and emotional atmosphere centered on love and relationships, often with elements of passion, conflict, and ultimate connection.",
73
+ "horror": "An eerie and suspenseful mood focused on fear, dread, and the supernatural, often with haunted settings, dangerous creatures, or psychological terror.",
74
+ "adventure": "An exciting and action-packed mood that involves exploration, daring feats, and encounters with challenges or enemies in exotic locales.",
75
+ "dystopian": "A bleak and oppressive setting characterized by societal collapse, authoritarian regimes, and the struggle for survival in a harsh world.",
76
+ "thriller": "A tense and gripping mood filled with suspense, high stakes, and fast-paced action, often involving espionage, crime, or danger.",
77
+ "dark fantasy": "A blend of fantasy elements with a grim and sinister tone, featuring morally ambiguous characters, dangerous magic, and foreboding settings.",
78
+ "post-apocalyptic": "A setting where civilization has collapsed due to events like nuclear war, pandemics, or environmental disaster, with themes of survival and rebuilding.",
79
+ }
80
+
81
+ response = client.chat.completions.create(
82
+ model="gpt-4o-mini",
83
+ messages=[
84
+ {
85
+ "role": "system",
86
+ "content": [
87
+ {
88
+ "type": "text",
89
+ "text": 'Create a short story based on a given caption and mood, formatted in Markdown.\n\nIncorporate elements from the caption and maintain the desired mood throughout the narrative.\n\n# Steps\n\n1. **Understand the Caption and Mood**: \n - Identify key elements and themes from the provided caption.\n - Recognize the specific mood that needs to be reflected in the story.\n\n2. **Story Development**:\n - Develop a plot that is engaging and consistent with the themes from the caption.\n - Integrate characters, setting, and conflict that align with the mood.\n\n3. **Conclusion**:\n - Provide a resolution that enhances or concludes the mood and plot effectively.\n\n# Output Format\n\n- Use markdown for formatting the story.\n - Begin with a suitable **Title** for the story.\n - Use normal text for the body of the story.\n - Employ **bold** or *italic* for emphasis where necessary.\n - Utilize bullet points or numbered lists if required within the story.\n\n# Examples\n\n**Input**:\n- Caption: "A secret door within the library leads to a world of magic."\n- Mood: "Mysterious and whimsical"\n\n**Output**:\n**Title**: The Enchanted Passage\n\nIn the dim glow of the library\'s ancient lamps, [character name] stumbled upon an unassuming wooden panel nestled between dusty tomes. **Curious** and somewhat skeptical, [he/she/they] pressed against the panel, feeling a *soft click* under [his/her/their] fingertips. The room shimmered briefly, and before [him/her/them] unfolded a world bathed in luminescent colors and whispered secrets—a realm only accessible through that **secret door**, where every shadow seemed to hold a story, and the air tingled with anticipation of magic yet to be revealed...',
90
+ }
91
+ ],
92
+ },
93
+ {
94
+ "role": "user",
95
+ "content": [
96
+ {
97
+ "type": "text",
98
+ "text": f"Caption: {caption} \n Mood: {mood_dictionary[mood]} ",
99
+ }
100
+ ],
101
+ },
102
+ ],
103
+ response_format={"type": "text"},
104
+ temperature=1,
105
+ max_completion_tokens=2048,
106
+ top_p=1,
107
+ frequency_penalty=0,
108
+ presence_penalty=0,
109
+ )
110
+
111
+ if debug:
112
+ with open("tests/generate_story.md", "w") as f:
113
+ f.write(response.choices[0].message.content)
114
+
115
+ return response.choices[0].message.content
116
+
117
+
118
+ # step open summarize the story
119
+
120
+
121
+ def summarize_story(story, debug=False):
122
+
123
+ response = client.chat.completions.create(
124
+ model="gpt-4o",
125
+ messages=[
126
+ {
127
+ "role": "system",
128
+ "content": [
129
+ {
130
+ "type": "text",
131
+ "text": "Summarize a story into a concise and vivid description of less than 1000 characters, focusing on a single scene that would be most suitable for generating an image. Ensure the summary evokes visual imagery without including any text elements that identify characters or settings explicitly.\n\n# Steps\n\n1. **Identify Key Scenes**: Read the story and identify crucial scenes that are rich in visual detail and pivotal to the narrative.\n2. **Select One Scene**: Choose one scene that encapsulates significant action, emotion, or a transformative moment.\n3. **Describe the Scene**: Write a vivid description of this scene, focusing on elements that can be visually depicted, such as actions, emotions, settings, and dynamics.\n4. **Ensure Clarity and Brevity**: Keep the description under 1000 charachters, ensuring it is clear and easy to visualize.\n\n# Output Format\n\n- A descriptive summary in paragraph form, less than 1000 characters.\n- The output must exclude any textual identifiers such as character names, dialogue, or specific place names.\n\n# Examples\n\n**Example 1:**\n\n**Input:** \nA fairy tale about a princess who embarks on a dangerous journey to save her kingdom.\n\n**Output:** \nIn the heart of the enchanted forest, a gleaming castle rises under a twilight sky. Near the edge of a cascading waterfall, the princess, cloaked in emerald-green, stands defiant against the swirling mist. Her hand grips the hilt of a shimmering sword, its silver blade catching the last rays of the setting sun, while ethereal creatures hover silently, watching her every move. \n\n(The actual output should contain similar vivid imagery, yet respect the 1000-word limit and avoid explicit character or place names.)\n\n# Notes\n\n- Focus on visual-rich segments of the story for better image generation.\n- Avoid giving too much background information; instead, bring the scene to life.\n- Ensure no explicit identifiers like names or dialogues are included in the description.",
132
+ }
133
+ ],
134
+ },
135
+ {"role": "user", "content": [{"type": "text", "text": f"{story}"}]},
136
+ ],
137
+ response_format={"type": "text"},
138
+ temperature=1,
139
+ max_completion_tokens=2048,
140
+ top_p=1,
141
+ frequency_penalty=0,
142
+ presence_penalty=0,
143
+ )
144
+ if debug:
145
+ with open("tests/summarize_story.md", "w") as f:
146
+ f.write(response.choices[0].message.content)
147
+ return response.choices[0].message.content
148
+
149
+
150
+ def generate_image(prompt, debug=False):
151
+
152
+ response = client.images.generate(
153
+ model="dall-e-3",
154
+ prompt=f"{prompt}\n\n DO NOT INCLUDE ANY TEXT IN THE IMAGE. JUST THE IMAGE.",
155
+ size="1024x1024",
156
+ quality="standard",
157
+ response_format="b64_json",
158
+ n=1,
159
+ )
160
+ if debug:
161
+ with open("tests/decoded_image.jpg", "wb") as image_file:
162
+ image_file.write(base64.b64decode(response.data[0].b64_json))
163
+ # print(response)
164
+ return response.data[0].b64_json
165
+
166
+
167
+ if __name__ == "__main__":
168
+ # print(caption_generator("dog.jpg"))
169
+ print("Generating Caption")
170
+ caption = caption_generator("dog.jpg",debug=True)
171
+ print("Generating Story")
172
+ story = generate_story(caption,mood="adventure",debug=True)
173
+ print("Summarizing Story")
174
+ summary = summarize_story(story,debug=True)
175
+ print("Generating Image")
176
+ # summary = """Dash, a spirited golden retriever, revels in the sunlit sands of Crestview Beach. While exploring, he uncovers a hidden map, igniting a quest to find rumored treasures in a secret cove. Emboldened, Dash races forward, overcoming obstacles: leaping over a crab blockade and skillfully slipping across a slick log bridge amidst rising tides. Reaching the cove, he encounters Finn, a clever dolphin guarding the treasure. Finn poses a riddle, "What runs but never walks, has a mouth but never talks?" Dash triumphantly answers, "A river!" Impressed, Finn reveals a chest brimming with sparkling seashells and stones. Victorious, Dash returns home, the sunset casting long shadows behind him. His heart swells with joy as the waves and wind celebrate his daring feats, leaving a trail of paw prints and a newfound tale of courage, balance, and wit at Crestview Beach. Themes of adventure, resourcefulness, and bravery pervade this joyful escapade."""
177
+ image = generate_image(summary, debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openai>=1.0.0
2
+ streamlit>=1.24.0
3
+ python-dotenv>=0.19.0
4
+ Pillow>=8.0.0