James040's picture
Update app.py
fbfeb7c verified
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
print("Loading BLIP Processor and Model...")
# 1. Load the specific components directly (Bypasses the buggy pipeline names)
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
print("Model loaded successfully!")
def generate_prompt(input_img):
if input_img is None:
return "Please upload an image."
try:
# 2. Convert to RGB to prevent transparent PNG crashes
clean_image = input_img.convert('RGB')
# 3. Process the image into numbers the AI understands
inputs = processor(clean_image, return_tensors="pt")
# 4. Generate the text (max_new_tokens forces a detailed description)
output = model.generate(**inputs, max_new_tokens=75)
# 5. Decode the numbers back into human-readable text
generated_text = processor.decode(output[0], skip_special_tokens=True)
return generated_text
except Exception as e:
print(f"Error processing image: {e}")
return f"System Error: {str(e)}"
# Build the User Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# ๐Ÿ” Image to Simple Text Interrogator
Upload an image to reverse-engineer its contents into a simple Single line Text.
"""
)
with gr.Row():
img_input = gr.Image(type="pil", label="Upload Image")
text_output = gr.Textbox(label="Generated AI Prompt", lines=4, interactive=False, show_copy_button=True)
btn = gr.Button("Analyze Image", variant="primary")
# Connect the button and declare the API name for the blog
btn.click(
fn=generate_prompt,
inputs=img_input,
outputs=text_output,
api_name="get_prompt"
)
# Launch with strict queuing to protect the CPU
if __name__ == "__main__":
demo.queue(default_concurrency_limit=1).launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)