Spaces:

ProfessorLeVesseur
/

VisionTexts

Sleeping

App Files Files Community

ProfessorLeVesseur commited on Nov 26, 2024

Commit

7f1c702

verified ·

1 Parent(s): 2418a7f

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -129

app.py CHANGED Viewed

@@ -1,131 +1,3 @@
-# import streamlit as st
-# import requests
-# from PIL import Image
-# import io
-# # Streamlit page setup
-# st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
-# # Add the logo image with a specified width
-# image_width = 300  # Set the desired width in pixels
-# st.image('MTSS.ai_Logo.png', width=image_width)
-# st.header('VisionTexts™ | Accessibility')
-# st.subheader('Image Alt Text Creator')
-# # Retrieve the Hugging Face API Key from secrets
-# huggingface_api_key = st.secrets["huggingface_api_key"]
-# # API endpoints
-# # API_URL_CAPTION = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
-# API_URL_CAPTION = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
-# API_URL_LLM = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
-# headers = {
-#     "Authorization": f"Bearer {huggingface_api_key}",
-#     "Content-Type": "application/json"
-# }
-# # File uploader allows user to add their own image
-# uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
-# if uploaded_file:
-#     # Display the uploaded image
-#     image = Image.open(uploaded_file).convert('RGB')
-#     image_width = 200  # Set the desired width in pixels
-#     with st.expander("Image", expanded=True):
-#         st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
-# else:
-#     st.warning("Please upload an image.")
-# # Option for adding additional details
-# show_details = st.checkbox("Add additional details about the image.", value=False)
-# if show_details:
-#     # Text input for additional details about the image
-#     additional_details = st.text_area(
-#         "Provide specific information that is important to include in the alt text or reflect why the image is being used:"
-#     )
-# else:
-#     additional_details = ""
-# # Button to trigger the analysis
-# analyze_button = st.button("Analyze the Image", type="secondary")
-# # Prompt for complex image description
-# complex_image_prompt_text = (
-#     "As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. "
-#     "Provide a detailed description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. "
-#     "Skip phrases like 'image of' or 'picture of.' "
-#     "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
-# )
-# # Functions to query the Hugging Face Inference API
-# def query_image_caption(image):
-#     # Convert PIL image to bytes
-#     buffered = io.BytesIO()
-#     image.save(buffered, format="JPEG")
-#     image_bytes = buffered.getvalue()
-#     response = requests.post(API_URL_CAPTION, headers={"Authorization": f"Bearer {huggingface_api_key}"}, data=image_bytes)
-#     return response.json()
-# def query_llm(prompt):
-#     payload = {
-#         "inputs": prompt,
-#         "parameters": {
-#             "max_new_tokens": 500,
-#             "return_full_text": False,
-#             "do_sample": True,
-#             "temperature": 0.7,
-#             "top_p": 0.9
-#         },
-#         "options": {
-#             "wait_for_model": True
-#         }
-#     }
-#     response = requests.post(API_URL_LLM, headers=headers, json=payload)
-#     return response.json()
-# # Check if an image has been uploaded and if the button has been pressed
-# if uploaded_file is not None and analyze_button:
-#     with st.spinner("Analyzing the image..."):
-#         # Get the caption from the image using the image captioning API
-#         caption_response = query_image_caption(image)
-#         # Handle potential errors from the API
-#         if isinstance(caption_response, dict) and caption_response.get("error"):
-#             st.error(f"Error with image captioning model: {caption_response['error']}")
-#         else:
-#             image_caption = caption_response[0]['generated_text']
-#             # Use the complex image prompt text
-#             prompt_text = complex_image_prompt_text
-#             # Include additional details if provided
-#             if additional_details:
-#                 prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
-#             # Create the prompt for the language model
-#             full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
-#             # Use the language model to generate the alt text description
-#             llm_response = query_llm(full_prompt)
-#             # Handle potential errors from the API
-#             if isinstance(llm_response, dict) and llm_response.get("error"):
-#                 st.error(f"Error with language model: {llm_response['error']}")
-#             else:
-#                 generated_text = llm_response[0]['generated_text'].strip()
-#                 st.markdown("### Generated Alt Text:")
-#                 st.write(generated_text)
-#                 st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
-# else:
-#     st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")
 import streamlit as st
 import requests
 from PIL import Image
@@ -193,7 +65,7 @@ def query_image_caption(image):
     # Use the InferenceClient's image_to_text method
     response = client.image_to_text(
         # model="Salesforce/blip-image-captioning-large",
-        model="microsoft/Florence-2-base-ft",
         image=image_bytes,
     )
     return response

 import streamlit as st
 import requests
 from PIL import Image
     # Use the InferenceClient's image_to_text method
     response = client.image_to_text(
         # model="Salesforce/blip-image-captioning-large",
+        model="nlpconnect/vit-gpt2-image-captioning",
         image=image_bytes,
     )
     return response