CR7CAD commited on
Commit
f006a50
·
verified ·
1 Parent(s): c62c780

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -118
app.py CHANGED
@@ -1,13 +1,7 @@
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
- import torch
5
- from PIL import Image
6
- import io
7
  import os
8
- from huggingface_hub import InferenceClient
9
- import numpy as np
10
- import base64
11
 
12
  # function part
13
  # img2text
@@ -18,37 +12,37 @@ def img2text(image_path):
18
 
19
  # text2story
20
  def text2story(text):
21
- # Using Llama model through API to avoid GGUF format complexities in Streamlit
22
- client = InferenceClient(model="MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF")
23
 
24
  # Create a prompt for the story generation
25
- prompt = f"""Write a fun, engaging children's story of about 100 words based on this caption:
26
- "{text}"
27
-
28
- The story should be suitable for kids aged 3-10 years old, with simple language, positive themes, and a clear beginning, middle, and end.
29
- """
30
 
31
  # Generate the story
32
- story_text = client.text_generation(
33
  prompt,
34
- max_new_tokens=250,
35
- temperature=0.7,
36
- top_p=0.9,
37
- repetition_penalty=1.2
 
 
38
  )
39
 
 
 
 
 
 
 
 
 
40
  return story_text
41
 
42
  # text2audio
43
  def text2audio(story_text):
44
- # Using Bark text-to-speech model
45
- tts = pipeline("text-to-speech", model="suno/bark")
46
-
47
- # Generate audio with a voice suitable for children's stories
48
- audio_output = tts(
49
- text=story_text,
50
- forward_params={"speaker": "v2/en_speaker_6", "text_temp": 0.7}
51
- )
52
 
53
  return {
54
  "audio": audio_output["audio"],
@@ -57,110 +51,53 @@ def text2audio(story_text):
57
 
58
  # Function to save temporary image file
59
  def save_uploaded_image(uploaded_file):
60
- # Create a temp directory if it doesn't exist
61
  if not os.path.exists("temp"):
62
  os.makedirs("temp")
63
 
64
- # Define the path to save the image
65
  image_path = os.path.join("temp", uploaded_file.name)
66
 
67
- # Save the image
68
  with open(image_path, "wb") as f:
69
  f.write(uploaded_file.getvalue())
70
 
71
  return image_path
72
 
73
  # main part
74
- st.set_page_config(
75
- page_title="Kids Storytelling Magic",
76
- page_icon="📚",
77
- layout="centered"
78
- )
79
-
80
- # Add some CSS for a child-friendly interface
81
- st.markdown("""
82
- <style>
83
- .main {
84
- background-color: #f0f8ff;
85
- }
86
- h1, h2, h3 {
87
- color: #1e90ff;
88
- }
89
- .stButton>button {
90
- background-color: #ff6b6b;
91
- color: white;
92
- font-size: 1.2rem;
93
- border-radius: 10px;
94
- padding: 0.5rem 1rem;
95
- }
96
- </style>
97
- """, unsafe_allow_html=True)
98
-
99
- st.title("🧸 Kids Storytelling Magic 🦄")
100
- st.subheader("Upload a picture and hear a magical story!")
101
-
102
- uploaded_file = st.file_uploader("Choose a fun picture...", type=["jpg", "jpeg", "png"])
103
 
104
  if uploaded_file is not None:
105
- # Display a loading spinner
106
- with st.spinner("Working on your magical story..."):
107
- # Display the uploaded image
108
- st.image(uploaded_file, caption="Your magical picture", use_column_width=True)
109
-
110
- # Save the image temporarily
111
- image_path = save_uploaded_image(uploaded_file)
112
-
113
- # Stage 1: Image to Text
114
- with st.spinner("Looking at your picture..."):
115
- caption = img2text(image_path)
116
- st.markdown("### 📝 I see...")
117
- st.write(caption)
118
-
119
- # Stage 2: Text to Story
120
- with st.spinner("Creating your story..."):
121
- story = text2story(caption)
122
- st.markdown("### 📖 Your Story")
123
- st.write(story)
124
-
125
- # Stage 3: Story to Audio data
126
- with st.spinner("Making your story speak..."):
127
- try:
128
- audio_data = text2audio(story)
129
-
130
- # Add a play button with cute icon
131
- st.markdown("### 🔊 Listen to your story")
132
- if st.button("🎵 Play Story"):
133
- st.audio(
134
- audio_data["audio"],
135
- format="audio/wav",
136
- start_time=0,
137
- sample_rate=audio_data["sampling_rate"]
138
- )
139
- except Exception as e:
140
- st.error(f"Oops! Something went wrong with the audio: {str(e)}")
141
- st.write("But you can still read the story above!")
142
-
143
- # Clean up - delete the temporary image
144
- try:
145
- os.remove(image_path)
146
- except:
147
- pass
148
-
149
- else:
150
- # Show instructions with a friendly message
151
- st.markdown("""
152
- ### How to use:
153
- 1. Click the button above to upload a picture
154
- 2. Wait for the magical story to appear
155
- 3. Press play to hear your story!
156
 
157
- Try pictures of animals, nature, toys, or anything fun!
158
- """)
159
 
160
- # Show a placeholder image
161
- st.image("https://placehold.co/600x400/9370db/ffffff?text=Upload+an+image+to+start+the+magic!",
162
- caption="Ready for your picture!", use_column_width=True)
163
-
164
- # Add a footer
165
- st.markdown("---")
166
- st.markdown("Made for kids to enjoy the stories")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
 
 
 
4
  import os
 
 
 
5
 
6
  # function part
7
  # img2text
 
12
 
13
  # text2story
14
  def text2story(text):
15
+ # Using a smaller text generation model
16
+ generator = pipeline('text-generation', model='gpt2')
17
 
18
  # Create a prompt for the story generation
19
+ prompt = f"Write a fun children's story based on this: {text}. Once upon a time, "
 
 
 
 
20
 
21
  # Generate the story
22
+ story_result = generator(
23
  prompt,
24
+ max_length=200,
25
+ num_return_sequences=1,
26
+ temperature=0.8,
27
+ top_k=50,
28
+ top_p=0.95,
29
+ do_sample=True
30
  )
31
 
32
+ # Extract the generated text
33
+ story_text = story_result[0]['generated_text']
34
+ story_text = story_text.replace(prompt, "Once upon a time, ")
35
+
36
+ # Make sure the story is at least 100 words
37
+ if len(story_text.split()) < 100:
38
+ story_text += " The children had a wonderful adventure and learned that imagination can take you anywhere. They returned home with smiles, eager to share their magical story with family and friends. And they lived happily ever after."
39
+
40
  return story_text
41
 
42
  # text2audio
43
  def text2audio(story_text):
44
+ tts = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
45
+ audio_output = tts(story_text)
 
 
 
 
 
 
46
 
47
  return {
48
  "audio": audio_output["audio"],
 
51
 
52
  # Function to save temporary image file
53
  def save_uploaded_image(uploaded_file):
 
54
  if not os.path.exists("temp"):
55
  os.makedirs("temp")
56
 
 
57
  image_path = os.path.join("temp", uploaded_file.name)
58
 
 
59
  with open(image_path, "wb") as f:
60
  f.write(uploaded_file.getvalue())
61
 
62
  return image_path
63
 
64
  # main part
65
+ st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
66
+ st.header("Turn Your Image to Audio Story")
67
+ uploaded_file = st.file_uploader("Select an Image...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  if uploaded_file is not None:
70
+ # Display the uploaded image
71
+ st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ # Save the image temporarily
74
+ image_path = save_uploaded_image(uploaded_file)
75
 
76
+ # Stage 1: Image to Text
77
+ st.text('Processing img2text...')
78
+ caption = img2text(image_path)
79
+ st.write(caption)
80
+
81
+ # Stage 2: Text to Story
82
+ st.text('Generating a story...')
83
+ story = text2story(caption)
84
+ st.write(story)
85
+
86
+ # Stage 3: Story to Audio data
87
+ st.text('Generating audio data...')
88
+ audio_data = text2audio(story)
89
+
90
+ # Play button
91
+ if st.button("Play Audio"):
92
+ st.audio(
93
+ audio_data["audio"],
94
+ format="audio/wav",
95
+ start_time=0,
96
+ sample_rate=audio_data["sampling_rate"]
97
+ )
98
+
99
+ # Clean up the temporary file
100
+ try:
101
+ os.remove(image_path)
102
+ except:
103
+ pass