Spaces:

wavesoumen
/

WAVE_AI

Build error

App Files Files Community

wavesoumen commited on Jun 5, 2024

Commit

a80511b

verified ·

1 Parent(s): 7c7cb02

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -26

app.py CHANGED Viewed

@@ -1,17 +1,40 @@
 import streamlit as st
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import nltk
 from youtube_transcript_api import YouTubeTranscriptApi
 # Download NLTK data
 nltk.download('punkt')
-# Initialize the image captioning pipeline
-captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-# Load the tokenizer and model for tag generation
-tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
-model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
 # Function to fetch YouTube transcript
 def fetch_transcript(url):
@@ -34,22 +57,21 @@ with tab1:
     st.header("Image Captioning")
     # Input for image URL
-    image_url = st.text_input("Enter the URL of the image:")
     # If an image URL is provided
-    if image_url:
-        try:
-            # Display the image
-            st.image(image_url, caption="Provided Image", use_column_width=True)
-            # Generate the caption
-            caption = captioner(image_url)
-            # Display the caption
-            st.write("**Generated Caption:**")
-            st.write(caption[0]['generated_text'])
-        except Exception as e:
-            st.error(f"An error occurred: {e}")
 # Text Tag Generation Tab
 with tab2:
@@ -59,17 +81,17 @@ with tab2:
     text = st.text_area("Enter the text for tag extraction:", height=200)
     # Button to generate tags
-    if st.button("Generate Tags"):
         if text:
             try:
                 # Tokenize and encode the input text
-                inputs = tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
                 # Generate tags
-                output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
                 # Decode the output
-                decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
                 # Extract unique tags
                 tags = list(set(decoded_output.strip().split(", ")))
@@ -90,7 +112,7 @@ with tab3:
     youtube_url = st.text_input("Enter YouTube URL:")
     # Button to get transcript
-    if st.button("Get Transcript"):
         if youtube_url:
             transcript = fetch_transcript(youtube_url)
             if "error" not in transcript.lower():
@@ -100,4 +122,3 @@ with tab3:
                 st.error(f"An error occurred: {transcript}")
         else:
             st.warning("Please enter a URL.")

 import streamlit as st
+import requests
+from PIL import Image
+from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import nltk
 from youtube_transcript_api import YouTubeTranscriptApi
 # Download NLTK data
 nltk.download('punkt')
+# Initialize the image captioning processor and model
+caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+# Initialize the tokenizer and model for tag generation
+tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
+tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
+# Function to generate captions for an image
+def generate_caption(img_url, text="a photography of"):
+    try:
+        raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
+    except Exception as e:
+        st.error(f"Error loading image: {e}")
+        return None, None
+    # Conditional image captioning
+    inputs_conditional = caption_processor(raw_image, text, return_tensors="pt")
+    out_conditional = caption_model.generate(**inputs_conditional)
+    caption_conditional = caption_processor.decode(out_conditional[0], skip_special_tokens=True)
+    # Unconditional image captioning
+    inputs_unconditional = caption_processor(raw_image, return_tensors="pt")
+    out_unconditional = caption_model.generate(**inputs_unconditional)
+    caption_unconditional = caption_processor.decode(out_unconditional[0], skip_special_tokens=True)
+    return caption_conditional, caption_unconditional
 # Function to fetch YouTube transcript
 def fetch_transcript(url):
     st.header("Image Captioning")
     # Input for image URL
+    img_url = st.text_input("Enter Image URL:")
     # If an image URL is provided
+    if st.button("Generate Captions", key='caption_button'):
+        if img_url:
+            caption_conditional, caption_unconditional = generate_caption(img_url)
+            if caption_conditional and caption_unconditional:
+                st.success("Captions successfully generated!")
+                st.image(img_url, caption="Input Image", use_column_width=True)
+                st.write("### Conditional Caption")
+                st.write(caption_conditional)
+                st.write("### Unconditional Caption")
+                st.write(caption_unconditional)
+        else:
+            st.warning("Please enter an image URL.")
 # Text Tag Generation Tab
 with tab2:
     text = st.text_area("Enter the text for tag extraction:", height=200)
     # Button to generate tags
+    if st.button("Generate Tags", key='tag_button'):
         if text:
             try:
                 # Tokenize and encode the input text
+                inputs = tag_tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
                 # Generate tags
+                output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
                 # Decode the output
+                decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
                 # Extract unique tags
                 tags = list(set(decoded_output.strip().split(", ")))
     youtube_url = st.text_input("Enter YouTube URL:")
     # Button to get transcript
+    if st.button("Get Transcript", key='transcript_button'):
         if youtube_url:
             transcript = fetch_transcript(youtube_url)
             if "error" not in transcript.lower():
                 st.error(f"An error occurred: {transcript}")
         else:
             st.warning("Please enter a URL.")