Spaces:

ProfessorLeVesseur
/

VisionTexts

Sleeping

App Files Files Community

ProfessorLeVesseur commited on Nov 20, 2024

Commit

ee54df8

verified ·

1 Parent(s): 1e06fb8

Update app.py

Browse files

Files changed (1) hide show

app.py +172 -36

app.py CHANGED Viewed

@@ -1,10 +1,139 @@
 import streamlit as st
 import requests
 from PIL import Image
 import io
 # Streamlit page setup
-st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
 # Add the logo image with a specified width
 image_width = 300  # Set the desired width in pixels
@@ -16,14 +145,8 @@ st.subheader('Image Alt Text Creator')
 # Retrieve the Hugging Face API Key from secrets
 huggingface_api_key = st.secrets["huggingface_api_key"]
-# API endpoints
-API_URL_CAPTION = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
-API_URL_LLM = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
-headers = {
-    "Authorization": f"Bearer {huggingface_api_key}",
-    "Content-Type": "application/json"
-}
 # File uploader allows user to add their own image
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
@@ -60,32 +183,48 @@ complex_image_prompt_text = (
 )
 # Functions to query the Hugging Face Inference API
 def query_image_caption(image):
     # Convert PIL image to bytes
     buffered = io.BytesIO()
     image.save(buffered, format="JPEG")
     image_bytes = buffered.getvalue()
-    response = requests.post(API_URL_CAPTION, headers={"Authorization": f"Bearer {huggingface_api_key}"}, data=image_bytes)
-    return response.json()
 def query_llm(prompt):
-    payload = {
-        "inputs": prompt,
-        "parameters": {
-            "max_new_tokens": 500,
-            "return_full_text": False,
-            "do_sample": True,
-            "temperature": 0.7,
-            "top_p": 0.9
-        },
-        "options": {
-            "wait_for_model": True
-        }
-    }
-    response = requests.post(API_URL_LLM, headers=headers, json=payload)
-    return response.json()
 # Check if an image has been uploaded and if the button has been pressed
 if uploaded_file is not None and analyze_button:
@@ -97,6 +236,7 @@ if uploaded_file is not None and analyze_button:
         if isinstance(caption_response, dict) and caption_response.get("error"):
             st.error(f"Error with image captioning model: {caption_response['error']}")
         else:
             image_caption = caption_response[0]['generated_text']
             # Use the complex image prompt text
@@ -106,20 +246,16 @@ if uploaded_file is not None and analyze_button:
             if additional_details:
                 prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
-            # Create the prompt for the language model
             full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
             # Use the language model to generate the alt text description
             llm_response = query_llm(full_prompt)
-            # Handle potential errors from the API
-            if isinstance(llm_response, dict) and llm_response.get("error"):
-                st.error(f"Error with language model: {llm_response['error']}")
-            else:
-                generated_text = llm_response[0]['generated_text'].strip()
-                st.markdown("### Generated Alt Text:")
-                st.write(generated_text)
-                st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
 else:
     st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")

+# import streamlit as st
+# import requests
+# from PIL import Image
+# import io
+# # Streamlit page setup
+# st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
+# # Add the logo image with a specified width
+# image_width = 300  # Set the desired width in pixels
+# st.image('MTSS.ai_Logo.png', width=image_width)
+# st.header('VisionTexts™ | Accessibility')
+# st.subheader('Image Alt Text Creator')
+# # Retrieve the Hugging Face API Key from secrets
+# huggingface_api_key = st.secrets["huggingface_api_key"]
+# # API endpoints
+# # API_URL_CAPTION = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
+# API_URL_CAPTION = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
+# API_URL_LLM = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
+# headers = {
+#     "Authorization": f"Bearer {huggingface_api_key}",
+#     "Content-Type": "application/json"
+# }
+# # File uploader allows user to add their own image
+# uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
+# if uploaded_file:
+#     # Display the uploaded image
+#     image = Image.open(uploaded_file).convert('RGB')
+#     image_width = 200  # Set the desired width in pixels
+#     with st.expander("Image", expanded=True):
+#         st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
+# else:
+#     st.warning("Please upload an image.")
+# # Option for adding additional details
+# show_details = st.checkbox("Add additional details about the image.", value=False)
+# if show_details:
+#     # Text input for additional details about the image
+#     additional_details = st.text_area(
+#         "Provide specific information that is important to include in the alt text or reflect why the image is being used:"
+#     )
+# else:
+#     additional_details = ""
+# # Button to trigger the analysis
+# analyze_button = st.button("Analyze the Image", type="secondary")
+# # Prompt for complex image description
+# complex_image_prompt_text = (
+#     "As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. "
+#     "Provide a detailed description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. "
+#     "Skip phrases like 'image of' or 'picture of.' "
+#     "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
+# )
+# # Functions to query the Hugging Face Inference API
+# def query_image_caption(image):
+#     # Convert PIL image to bytes
+#     buffered = io.BytesIO()
+#     image.save(buffered, format="JPEG")
+#     image_bytes = buffered.getvalue()
+#     response = requests.post(API_URL_CAPTION, headers={"Authorization": f"Bearer {huggingface_api_key}"}, data=image_bytes)
+#     return response.json()
+# def query_llm(prompt):
+#     payload = {
+#         "inputs": prompt,
+#         "parameters": {
+#             "max_new_tokens": 500,
+#             "return_full_text": False,
+#             "do_sample": True,
+#             "temperature": 0.7,
+#             "top_p": 0.9
+#         },
+#         "options": {
+#             "wait_for_model": True
+#         }
+#     }
+#     response = requests.post(API_URL_LLM, headers=headers, json=payload)
+#     return response.json()
+# # Check if an image has been uploaded and if the button has been pressed
+# if uploaded_file is not None and analyze_button:
+#     with st.spinner("Analyzing the image..."):
+#         # Get the caption from the image using the image captioning API
+#         caption_response = query_image_caption(image)
+#         # Handle potential errors from the API
+#         if isinstance(caption_response, dict) and caption_response.get("error"):
+#             st.error(f"Error with image captioning model: {caption_response['error']}")
+#         else:
+#             image_caption = caption_response[0]['generated_text']
+#             # Use the complex image prompt text
+#             prompt_text = complex_image_prompt_text
+#             # Include additional details if provided
+#             if additional_details:
+#                 prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
+#             # Create the prompt for the language model
+#             full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
+#             # Use the language model to generate the alt text description
+#             llm_response = query_llm(full_prompt)
+#             # Handle potential errors from the API
+#             if isinstance(llm_response, dict) and llm_response.get("error"):
+#                 st.error(f"Error with language model: {llm_response['error']}")
+#             else:
+#                 generated_text = llm_response[0]['generated_text'].strip()
+#                 st.markdown("### Generated Alt Text:")
+#                 st.write(generated_text)
+#                 st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
+# else:
+#     st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")
 import streamlit as st
 import requests
 from PIL import Image
 import io
+from huggingface_hub import InferenceClient
 # Streamlit page setup
+st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered")
 # Add the logo image with a specified width
 image_width = 300  # Set the desired width in pixels
 # Retrieve the Hugging Face API Key from secrets
 huggingface_api_key = st.secrets["huggingface_api_key"]
+# Initialize the Hugging Face inference client
+client = InferenceClient(api_token=huggingface_api_key)
 # File uploader allows user to add their own image
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 )
 # Functions to query the Hugging Face Inference API
 def query_image_caption(image):
     # Convert PIL image to bytes
     buffered = io.BytesIO()
     image.save(buffered, format="JPEG")
     image_bytes = buffered.getvalue()
+    # Use the InferenceClient to query the image captioning model
+    response = client.post(
+        model="Salesforce/blip-image-captioning-large",
+        data=image_bytes,
+        headers={"Content-Type": "application/octet-stream"},
+    )
+    return response
 def query_llm(prompt):
+    # System prompt (optional)
+    system_prompt = "You are an expert in image accessibility and alternative text."
+    # Generate the response using the Hugging Face InferenceClient's chat completion
+    response = client.chat_completions.create(
+        model="meta-llama/Llama-2-7b-chat-hf",
+        messages=[
+            {"role": "system", "content": system_prompt},  # Optional system prompt
+            {"role": "user", "content": prompt}
+        ],
+        stream=True,
+        temperature=0.5,
+        max_tokens=1024,
+        top_p=0.7
+    )
+    # Collect the streamed response
+    response_content = ""
+    for message in response:
+        if "choices" in message and len(message["choices"]) > 0:
+            delta = message["choices"][0].get("delta", {})
+            content = delta.get("content", "")
+            response_content += content
+            # Optionally, you can update the progress to the user here
+    return response_content.strip()
 # Check if an image has been uploaded and if the button has been pressed
 if uploaded_file is not None and analyze_button:
         if isinstance(caption_response, dict) and caption_response.get("error"):
             st.error(f"Error with image captioning model: {caption_response['error']}")
         else:
+            # Extract the generated caption
             image_caption = caption_response[0]['generated_text']
             # Use the complex image prompt text
             if additional_details:
                 prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
+            # Create the full prompt
             full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
             # Use the language model to generate the alt text description
             llm_response = query_llm(full_prompt)
+            # Display the generated alt text
+            st.markdown("### Generated Alt Text:")
+            st.write(llm_response)
+            st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
 else:
     st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")