import gradio as gr from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image print("Loading BLIP Processor and Model...") # 1. Load the specific components directly (Bypasses the buggy pipeline names) processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") print("Model loaded successfully!") def generate_prompt(input_img): if input_img is None: return "Please upload an image." try: # 2. Convert to RGB to prevent transparent PNG crashes clean_image = input_img.convert('RGB') # 3. Process the image into numbers the AI understands inputs = processor(clean_image, return_tensors="pt") # 4. Generate the text (max_new_tokens forces a detailed description) output = model.generate(**inputs, max_new_tokens=75) # 5. Decode the numbers back into human-readable text generated_text = processor.decode(output[0], skip_special_tokens=True) return generated_text except Exception as e: print(f"Error processing image: {e}") return f"System Error: {str(e)}" # Build the User Interface with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( """ # 🔍 Image to Simple Text Interrogator Upload an image to reverse-engineer its contents into a simple Single line Text. """ ) with gr.Row(): img_input = gr.Image(type="pil", label="Upload Image") text_output = gr.Textbox(label="Generated AI Prompt", lines=4, interactive=False, show_copy_button=True) btn = gr.Button("Analyze Image", variant="primary") # Connect the button and declare the API name for the blog btn.click( fn=generate_prompt, inputs=img_input, outputs=text_output, api_name="get_prompt" ) # Launch with strict queuing to protect the CPU if __name__ == "__main__": demo.queue(default_concurrency_limit=1).launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)