Spaces:

ProfessorLeVesseur
/

VisionTexts

Sleeping

App Files Files Community

ProfessorLeVesseur commited on Nov 20, 2024

Commit

7956ee3

verified ·

1 Parent(s): bbec170

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -62

app.py CHANGED Viewed

@@ -1,29 +1,38 @@
 import streamlit as st
 import requests
 # Streamlit page setup
-st.set_page_config(
-    page_title="MTSS Image Accessibility Alt Text Generator",
-    layout="centered",
-    initial_sidebar_state="auto"
-)
 # Add the image with a specified width
-image_width = 300  # Set the desired width in pixels
 st.image('MTSS.ai_Logo.png', width=image_width)
 st.header('VisionTexts™ | Accessibility')
 st.subheader('Image Alt Text Creator')
-# Initialize the API key from Streamlit secrets
-api_key = st.secrets["huggingface_api_key"]
 # File uploader
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 if uploaded_file:
-    # Display the uploaded image with specified width
-    image_width = 200  # Set the desired width in pixels
     with st.expander("Image", expanded=True):
         st.image(uploaded_file, caption=uploaded_file.name, width=image_width, use_column_width=False)
@@ -33,83 +42,69 @@ show_details = st.checkbox("Add details about the image.", value=False)
 if show_details:
     # Text input for additional details about the image
     additional_details = st.text_area(
-        "Include any specific information that is important to include in the alt text or reflect why the image is being used:",
     )
 # Toggle for modifying the prompt for complex images
 complex_image = st.checkbox("Is this a complex image?", value=False)
 if complex_image:
-    # Caption explaining the impact of the complex image toggle
     st.caption(
-        "By selecting this option, the app will create a detailed description that may exceed the typical 125-character limit for alt text."
     )
 # Button to trigger the analysis
 analyze_button = st.button("Analyze the Image")
-# Check if an image has been uploaded and if the analyze button has been pressed
 if uploaded_file is not None and analyze_button:
     with st.spinner("Analyzing the image ..."):
-        # Read the image bytes
         image_bytes = uploaded_file.read()
-        # Decide on the model to use
-        model_id = "Salesforce/blip-image-captioning-base"  # You can choose another model if desired
-        # Prepare headers and endpoint
-        headers = {
-            "Authorization": f"Bearer {api_key}",
-            "Content-Type": "application/octet-stream"
-        }
-        api_url = f"https://api-inference.huggingface.co/models/{model_id}"
-        # Prepare the parameters
-        parameters = {
-            # "max_length": 50,  # Adjust as needed
-            # "num_return_sequences": 1,
-        }
-        # Include additional details in the prompt if provided
         if show_details and additional_details:
-            prompt_text = f"{additional_details}"
-            parameters["inputs"] = prompt_text
-        # Make the request to the Hugging Face API
         try:
-            # Send the request with the image bytes
-            response = requests.post(
-                api_url,
-                headers=headers,
-                data=image_bytes,
-                params=parameters,
-                timeout=60  # Optional: increase timeout if needed
-            )
-            # Check for errors
-            response.raise_for_status()
-            # Parse the response
-            completion = response.json()
-            # Extract the generated description
-            if isinstance(completion, list) and "generated_text" in completion[0]:
-                assistant_response = completion[0]["generated_text"]
-                # Adjust the description based on complexity
-                if not complex_image and len(assistant_response) > 125:
-                    assistant_response = assistant_response[:125] + "..."
-                # Display the response
-                st.markdown(assistant_response)
-                st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
             else:
-                st.error("Unexpected response format from the API.")
-        except requests.exceptions.HTTPError as http_err:
-            st.error(f"HTTP error occurred: {http_err}")
         except Exception as e:
             st.error(f"An error occurred: {e}")
 else:
     # Warning for user action required
     if not uploaded_file and analyze_button:

 import streamlit as st
 import requests
+# Hugging Face API setup
+API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct"
+headers = {"Authorization": f"Bearer {st.secrets['hf_api_key']}"}
+# Function to query the model
+def query_image(image_data, prompt_text):
+    # Prepare the payload
+    payload = {
+        "inputs": {
+            "image": image_data,
+            "text": prompt_text
+        }
+    }
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
 # Streamlit page setup
+st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
 # Add the image with a specified width
+image_width = 300  # Desired width in pixels
 st.image('MTSS.ai_Logo.png', width=image_width)
 st.header('VisionTexts™ | Accessibility')
 st.subheader('Image Alt Text Creator')
 # File uploader
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 if uploaded_file:
+    # Display the uploaded image
+    image_width = 200  # Desired width in pixels
     with st.expander("Image", expanded=True):
         st.image(uploaded_file, caption=uploaded_file.name, width=image_width, use_column_width=False)
 if show_details:
     # Text input for additional details about the image
     additional_details = st.text_area(
+        "Include specific information important for the alt text or reflect why the image is being used:"
     )
 # Toggle for modifying the prompt for complex images
 complex_image = st.checkbox("Is this a complex image?", value=False)
 if complex_image:
     st.caption(
+        "By selecting this, the app will create a description exceeding the 125-character limit. "
+        "Add the description in a placeholder behind the image and 'Description in the content placeholder' in the alt text box."
     )
 # Button to trigger the analysis
 analyze_button = st.button("Analyze the Image")
+# Optimized prompt for complex images
+complex_image_prompt_text = (
+    "As an expert in image accessibility and alternative text, thoroughly describe the image provided. "
+    "Provide a brief description using not more than 500 characters that convey the essential information conveyed by the image in eight or fewer clear and concise sentences. "
+    "Skip phrases like 'image of' or 'picture of.' "
+    "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
+)
+# Check if an image has been uploaded and if the button has been pressed
 if uploaded_file is not None and analyze_button:
     with st.spinner("Analyzing the image ..."):
+        # Read the image file
         image_bytes = uploaded_file.read()
+        # Determine which prompt to use based on the complexity of the image
+        if complex_image:
+            prompt_text = complex_image_prompt_text
+        else:
+            prompt_text = (
+                "As an expert in image accessibility and alternative text, succinctly describe the image provided in less than 125 characters. "
+                "Provide a brief description using not more than 125 characters that convey the essential information conveyed by the image in three or fewer clear and concise sentences for use as alt text. "
+                "Skip phrases like 'image of' or 'picture of.' "
+                "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative that serves as effective alternative text for accessibility purposes."
+            )
         if show_details and additional_details:
+            prompt_text += (
+                f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
+            )
+        # Query the model
         try:
+            response = query_image(image_bytes, prompt_text)
+            # Extract the generated text from the response
+            if isinstance(response, dict) and 'generated_text' in response:
+                alt_text = response['generated_text']
+            elif isinstance(response, list) and 'generated_text' in response[0]:
+                alt_text = response[0]['generated_text']
             else:
+                alt_text = "No description generated."
+            # Display the generated alt text
+            st.markdown(f"**Generated Alt Text:** {alt_text}")
+            st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
         except Exception as e:
             st.error(f"An error occurred: {e}")
 else:
     # Warning for user action required
     if not uploaded_file and analyze_button: