Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

e1ee436

verified ·

1 Parent(s): d0e7248

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -60

app.py CHANGED Viewed

@@ -1,62 +1,36 @@
-# import part
 import streamlit as st
 from transformers import pipeline
-# function part
-# img2text
-def img2text(url):
-    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-    text = image_to_text_model(url)[0]["generated_text"]
-    return text # key output
-# text2story
-def text2story(text):
-    story_text = ""   # to be completed
-    return story_text
-# text2audio
-def text2audio(story_text):
-    audio_data = ""     # to be completed
-    return audio_data
-# main part, also where the project code begins if you want to add/revise sth
-st.set_page_config(page_title="Your Image to Audio Story",
-                   page_icon="🦜")
-st.header("Turn Your Image to Audio Story")
-uploaded_file = st.file_uploader("Select an Image...")
-if uploaded_file is not None: # if user didn't upload an image, this would be skipped, codes below will not be executed, then rerun the codes and if the file(image) is uploaded, the part would be executed
-    # some kind of looping, not for or while loop
-    print(uploaded_file)
-    bytes_data = uploaded_file.getvalue()
-    with open(uploaded_file.name, "wb") as file:
-        file.write(bytes_data)
-    st.image(uploaded_file, caption="Uploaded Image",
-             use_column_width=True)
-    #Stage 1: Image to Text
-    st.text('Processing img2text...')
-    scenario = img2text(uploaded_file.name)
-    st.write(scenario)
-    #Stage 2: Text to Story
-    st.text('Generating a story...')
-    #story = text2story(scenario)
-    #st.write(story)
-    #Stage 3: Story to Audio data
-    #st.text('Generating audio data...')
-    #audio_data =text2audio(story)
-    # Play button
-    if st.button("Play Audio"):
-        #st.audio(audio_data['audio'],
-        #            format="audio/wav",
-        #            start_time=0,
-        #            sample_rate = audio_data['sampling_rate'])
-        st.audio("kids_playing_audio.wav")

 import streamlit as st
 from transformers import pipeline
+from PIL import Image
+# Set the title of the app
+st.title("Image-to-Text Converter using Donut")
+# Description of the app
+st.write("Upload an image to extract text using the Donut model (naver-clova-ix/donut-base).")
+# Create a file uploader for image files
+uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+# Initialize the pipeline
+@st.cache_resource(show_spinner=False)
+def load_pipeline():
+    return pipeline("image-to-text", model="naver-clova-ix/donut-base")
+pipe = load_pipeline()
+if uploaded_file is not None:
+    try:
+        # Open the image file and convert to RGB (if necessary)
+        image = Image.open(uploaded_file).convert("RGB")
+        st.image(image, caption="Uploaded Image", use_column_width=True)
+        # Process the image through the pipeline
+        result = pipe(image)
+        # Extract generated text from the result list
+        generated_text = result[0].get("generated_text", "No text generated.")
+        st.subheader("Extracted Text")
+        st.text_area("Result", generated_text, height=200)
+    except Exception as e:
+        st.error(f"An error occurred: {e}")