Spaces:

gowshiselva
/

image-description

Runtime error

App Files Files Community

gowshiselva commited on Mar 23, 2025

Commit

6ec3614

verified ·

1 Parent(s): 9cc23d2

Create app.py

Browse files

Files changed (1) hide show

app.py +149 -0

app.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import gradio as gr
+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration
+# Initial setup
+print("Loading models...")
+# Main model for detailed captions
+blip2_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b")
+blip2_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+# Secondary model for emotion and detail detection
+blip_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
+blip_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+# Move models to GPU if available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+blip2_model.to(device)
+blip_large.to(device)
+print(f"Models loaded. Using device: {device}")
+def generate_advanced_description(image, detail_level, emotion_focus, style_focus):
+    """
+    Generate an advanced description of the image with varying levels of detail.
+    Args:
+        image: Input image
+        detail_level: Level of detail (1-5)
+        emotion_focus: Focus on emotions (0-5)
+        style_focus: Focus on artistic style (0-5)
+    """
+    if image is None:
+        return "Please upload an image to generate a description."
+    # Process image for both models
+    blip2_inputs = blip2_processor(images=image, return_tensors="pt").to(device)
+    blip_inputs = blip_processor(images=image, return_tensors="pt").to(device)
+    # Basic prompts for different aspects
+    detail_prompt = f"Describe this image with extreme detail, focus on {'all elements including tiny details' if detail_level > 3 else 'main elements'}"
+    emotion_prompt = "Describe the mood, emotions, and atmosphere conveyed in this image" if emotion_focus > 2 else ""
+    style_prompt = "Describe the artistic style, lighting, colors, and composition" if style_focus > 2 else ""
+    # Combine prompts based on focus areas
+    combined_prompt = f"{detail_prompt}. {emotion_prompt}. {style_prompt}"
+    try:
+        # Generate both basic and detailed descriptions
+        with torch.no_grad():
+            # Get basic caption from BLIP large
+            basic_outputs = blip_large.generate(**blip_inputs, max_length=50)
+            basic_caption = blip_processor.decode(basic_outputs[0], skip_special_tokens=True)
+            # Get detailed description from BLIP-2
+            outputs = blip2_model.generate(
+                **blip2_inputs,
+                max_length=150 + (detail_level * 50),
+                prompt=combined_prompt,
+                num_beams=5,
+                min_length=50,
+                top_p=0.9,
+                repetition_penalty=1.5,
+                length_penalty=1.0
+            )
+            detailed_description = blip2_processor.decode(outputs[0], skip_special_tokens=True)
+        # Format results for AI image generation
+        formatted_result = ""
+        # Add basic subject identification
+        formatted_result += f"## Basic Caption:\n{basic_caption}\n\n"
+        # Add detailed description
+        formatted_result += f"## Detailed Description for AI Image Recreation:\n{detailed_description}\n\n"
+        # Add formatting guide based on detail level
+        if detail_level >= 4:
+            # Extract potential elements for structured description
+            elements = []
+            if "person" in detailed_description.lower() or "people" in detailed_description.lower():
+                elements.append("subjects")
+            if any(word in detailed_description.lower() for word in ["background", "scene", "setting"]):
+                elements.append("setting")
+            if any(word in detailed_description.lower() for word in ["light", "shadow", "bright", "dark"]):
+                elements.append("lighting")
+            if any(word in detailed_description.lower() for word in ["color", "red", "blue", "green", "yellow", "tone"]):
+                elements.append("colors")
+            # Create a structured breakdown
+            formatted_result += "## Structured Elements:\n"
+            for element in elements:
+                formatted_result += f"- {element.capitalize()}: " + \
+                                   f"[Extract relevant details about {element} from the description]\n"
+            # Add prompt suggestion
+            formatted_result += "\n## Suggested AI Image Prompt:\n"
+            formatted_result += f"{basic_caption}, {', '.join(detailed_description.split('.')[:3])}, " + \
+                               f"{'high detail' if detail_level > 3 else 'moderate detail'}, " + \
+                               f"{'emotional' if emotion_focus > 3 else ''}, " + \
+                               f"{'artistic' if style_focus > 3 else ''}"
+        return formatted_result
+    except Exception as e:
+        return f"Error generating description: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="Advanced Image Description Generator") as demo:
+    gr.Markdown("# Advanced Image Description Generator for AI Image Recreation")
+    gr.Markdown("Upload an image to generate a detailed description that can help AI image generators recreate similar images.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(label="Upload Image", type="pil")
+            with gr.Row():
+                detail_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Detail Level")
+                emotion_slider = gr.Slider(minimum=0, maximum=5, value=3, step=1, label="Emotion Focus")
+                style_slider = gr.Slider(minimum=0, maximum=5, value=3, step=1, label="Style/Artistic Focus")
+            submit_btn = gr.Button("Generate Description")
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(label="Image Description", lines=20)
+    submit_btn.click(
+        fn=generate_advanced_description,
+        inputs=[input_image, detail_slider, emotion_slider, style_slider],
+        outputs=output_text
+    )
+    gr.Markdown("""
+    ## How to Use
+    1. Upload an image
+    2. Adjust the sliders to control description detail:
+       - Detail Level: How comprehensive the description should be
+       - Emotion Focus: Emphasis on mood and feelings
+       - Style Focus: Emphasis on artistic elements
+    3. Click "Generate Description"
+    4. Use the generated text to prompt AI image generators
+    ## About
+    This app uses BLIP-2 and BLIP large models to analyze images and generate detailed descriptions
+    suitable for recreating similar images with AI image generators like Stable Diffusion, DALL-E, or Midjourney.
+    """)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()