Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

e77741a

verified ·

1 Parent(s): fbad1e8

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -45

app.py CHANGED Viewed

@@ -1,32 +1,31 @@
-# import part - only using the two requested imports
 import streamlit as st
 from transformers import pipeline
 from PIL import Image
-import io
 # function part
-# img2text
 def img2text(image):
-    image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
-    text = image_to_text(image)[0]["generated_text"]
     return text
-# text2story - IMPROVED to end naturally
 def text2story(text):
-    # Using a smaller text generation model
-    generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-    # Create a prompt for the story generation
-    prompt = f"Write a fun children's story based on this: {text}. The story should be short and end naturally with a conclusion. Once upon a time, "
-    # Generate the story
     story_result = generator(
         prompt,
-        max_length=250,  # Increased to allow for a complete story
         num_return_sequences=1,
         temperature=0.7,
         top_k=50,
-        top_p=0.95,
         do_sample=True
     )
@@ -34,33 +33,25 @@ def text2story(text):
     story_text = story_result[0]['generated_text']
     story_text = story_text.replace(prompt, "Once upon a time, ")
-    # Find a natural ending point (end of sentence) before 100 words
-    words = story_text.split()
-    if len(words) > 100:
-        # Join the first 100 words
-        shortened_text = " ".join(words[:100])
-        # Find the last complete sentence
-        last_period = shortened_text.rfind('.')
-        last_question = shortened_text.rfind('?')
-        last_exclamation = shortened_text.rfind('!')
-        # Find the last sentence ending punctuation
-        last_end = max(last_period, last_question, last_exclamation)
-        if last_end > 0:
-            # Truncate at the end of the last complete sentence
-            story_text = shortened_text[:last_end + 1]
-        else:
-            # If no sentence ending found, just use the shortened text
-            story_text = shortened_text
     return story_text
 # text2audio - Using HelpingAI-TTS-v1 model
 def text2audio(story_text):
     try:
-        synthesizer = pipeline("text-to-speech", model="umarigan/speecht5_tts_tr_v1.0")
         # Limit text length to avoid timeouts
         max_chars = 500
@@ -74,9 +65,6 @@ def text2audio(story_text):
         # Generate speech
         speech = synthesizer(story_text)
-        # Get output information
-        st.write(f"Speech output keys: {list(speech.keys())}")
         return speech
     except Exception as e:
@@ -95,19 +83,25 @@ if uploaded_file is not None:
     # Convert the file to a PIL Image
     image = Image.open(uploaded_file)
     # Stage 1: Image to Text
-    st.text('Processing img2text...')
-    caption = img2text(image)  # Pass PIL image to pipeline
-    st.write(caption)
     # Stage 2: Text to Story
-    st.text('Generating a story...')
-    story = text2story(caption)
-    st.write(story)
     # Stage 3: Story to Audio data
-    st.text('Generating audio data...')
-    speech_output = text2audio(story)
     # Play button
     if st.button("Play Audio"):

+# import part
 import streamlit as st
 from transformers import pipeline
 from PIL import Image
 # function part
+# img2text - Using a lighter model
 def img2text(image):
+    # Use a smaller, faster image captioning model
+    image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+    text = image_to_text(image, max_new_tokens=20)[0]["generated_text"]
     return text
+# text2story - Using a much faster model with constraints
 def text2story(text):
+    # Use a tiny model that's much faster
+    generator = pipeline("text-generation", model="distilgpt2")
+    # Create a more constrained prompt for faster generation
+    prompt = f"A short children's story about {text}: Once upon a time, "
+    # Generate with strict constraints for speed
     story_result = generator(
         prompt,
+        max_new_tokens=100,  # Limit token generation
         num_return_sequences=1,
         temperature=0.7,
         top_k=50,
         do_sample=True
     )
     story_text = story_result[0]['generated_text']
     story_text = story_text.replace(prompt, "Once upon a time, ")
+    # Find a natural ending point (end of sentence)
+    last_period = story_text.rfind('.')
+    last_question = story_text.rfind('?')
+    last_exclamation = story_text.rfind('!')
+    # Find the last sentence ending punctuation
+    last_end = max(last_period, last_question, last_exclamation)
+    if last_end > 0:
+        # Truncate at the end of the last complete sentence
+        story_text = story_text[:last_end + 1]
     return story_text
 # text2audio - Using HelpingAI-TTS-v1 model
 def text2audio(story_text):
     try:
+        # Use the HelpingAI TTS model as requested
+        synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
         # Limit text length to avoid timeouts
         max_chars = 500
         # Generate speech
         speech = synthesizer(story_text)
         return speech
     except Exception as e:
     # Convert the file to a PIL Image
     image = Image.open(uploaded_file)
+    # Progress indicator
+    progress_bar = st.progress(0)
     # Stage 1: Image to Text
+    with st.spinner('Processing image caption...'):
+        caption = img2text(image)
+        progress_bar.progress(33)
+    st.write(f"**Image caption:** {caption}")
     # Stage 2: Text to Story
+    with st.spinner('Creating story...'):
+        story = text2story(caption)
+        progress_bar.progress(66)
+    st.write(f"**Story:** {story}")
     # Stage 3: Story to Audio data
+    with st.spinner('Generating audio...'):
+        speech_output = text2audio(story)
+        progress_bar.progress(100)
     # Play button
     if st.button("Play Audio"):