gowshiselva commited on
Commit
6ec3614
·
verified ·
1 Parent(s): 9cc23d2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -0
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from transformers import AutoProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration
5
+
6
+ # Initial setup
7
+ print("Loading models...")
8
+
9
+ # Main model for detailed captions
10
+ blip2_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b")
11
+ blip2_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
12
+
13
+ # Secondary model for emotion and detail detection
14
+ blip_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
15
+ blip_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
16
+
17
+ # Move models to GPU if available
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+ blip2_model.to(device)
20
+ blip_large.to(device)
21
+
22
+ print(f"Models loaded. Using device: {device}")
23
+
24
+ def generate_advanced_description(image, detail_level, emotion_focus, style_focus):
25
+ """
26
+ Generate an advanced description of the image with varying levels of detail.
27
+
28
+ Args:
29
+ image: Input image
30
+ detail_level: Level of detail (1-5)
31
+ emotion_focus: Focus on emotions (0-5)
32
+ style_focus: Focus on artistic style (0-5)
33
+ """
34
+ if image is None:
35
+ return "Please upload an image to generate a description."
36
+
37
+ # Process image for both models
38
+ blip2_inputs = blip2_processor(images=image, return_tensors="pt").to(device)
39
+ blip_inputs = blip_processor(images=image, return_tensors="pt").to(device)
40
+
41
+ # Basic prompts for different aspects
42
+ detail_prompt = f"Describe this image with extreme detail, focus on {'all elements including tiny details' if detail_level > 3 else 'main elements'}"
43
+ emotion_prompt = "Describe the mood, emotions, and atmosphere conveyed in this image" if emotion_focus > 2 else ""
44
+ style_prompt = "Describe the artistic style, lighting, colors, and composition" if style_focus > 2 else ""
45
+
46
+ # Combine prompts based on focus areas
47
+ combined_prompt = f"{detail_prompt}. {emotion_prompt}. {style_prompt}"
48
+
49
+ try:
50
+ # Generate both basic and detailed descriptions
51
+ with torch.no_grad():
52
+ # Get basic caption from BLIP large
53
+ basic_outputs = blip_large.generate(**blip_inputs, max_length=50)
54
+ basic_caption = blip_processor.decode(basic_outputs[0], skip_special_tokens=True)
55
+
56
+ # Get detailed description from BLIP-2
57
+ outputs = blip2_model.generate(
58
+ **blip2_inputs,
59
+ max_length=150 + (detail_level * 50),
60
+ prompt=combined_prompt,
61
+ num_beams=5,
62
+ min_length=50,
63
+ top_p=0.9,
64
+ repetition_penalty=1.5,
65
+ length_penalty=1.0
66
+ )
67
+ detailed_description = blip2_processor.decode(outputs[0], skip_special_tokens=True)
68
+
69
+ # Format results for AI image generation
70
+ formatted_result = ""
71
+
72
+ # Add basic subject identification
73
+ formatted_result += f"## Basic Caption:\n{basic_caption}\n\n"
74
+
75
+ # Add detailed description
76
+ formatted_result += f"## Detailed Description for AI Image Recreation:\n{detailed_description}\n\n"
77
+
78
+ # Add formatting guide based on detail level
79
+ if detail_level >= 4:
80
+ # Extract potential elements for structured description
81
+ elements = []
82
+ if "person" in detailed_description.lower() or "people" in detailed_description.lower():
83
+ elements.append("subjects")
84
+ if any(word in detailed_description.lower() for word in ["background", "scene", "setting"]):
85
+ elements.append("setting")
86
+ if any(word in detailed_description.lower() for word in ["light", "shadow", "bright", "dark"]):
87
+ elements.append("lighting")
88
+ if any(word in detailed_description.lower() for word in ["color", "red", "blue", "green", "yellow", "tone"]):
89
+ elements.append("colors")
90
+
91
+ # Create a structured breakdown
92
+ formatted_result += "## Structured Elements:\n"
93
+ for element in elements:
94
+ formatted_result += f"- {element.capitalize()}: " + \
95
+ f"[Extract relevant details about {element} from the description]\n"
96
+
97
+ # Add prompt suggestion
98
+ formatted_result += "\n## Suggested AI Image Prompt:\n"
99
+ formatted_result += f"{basic_caption}, {', '.join(detailed_description.split('.')[:3])}, " + \
100
+ f"{'high detail' if detail_level > 3 else 'moderate detail'}, " + \
101
+ f"{'emotional' if emotion_focus > 3 else ''}, " + \
102
+ f"{'artistic' if style_focus > 3 else ''}"
103
+
104
+ return formatted_result
105
+
106
+ except Exception as e:
107
+ return f"Error generating description: {str(e)}"
108
+
109
+ # Create Gradio interface
110
+ with gr.Blocks(title="Advanced Image Description Generator") as demo:
111
+ gr.Markdown("# Advanced Image Description Generator for AI Image Recreation")
112
+ gr.Markdown("Upload an image to generate a detailed description that can help AI image generators recreate similar images.")
113
+
114
+ with gr.Row():
115
+ with gr.Column(scale=1):
116
+ input_image = gr.Image(label="Upload Image", type="pil")
117
+ with gr.Row():
118
+ detail_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Detail Level")
119
+ emotion_slider = gr.Slider(minimum=0, maximum=5, value=3, step=1, label="Emotion Focus")
120
+ style_slider = gr.Slider(minimum=0, maximum=5, value=3, step=1, label="Style/Artistic Focus")
121
+ submit_btn = gr.Button("Generate Description")
122
+
123
+ with gr.Column(scale=1):
124
+ output_text = gr.Textbox(label="Image Description", lines=20)
125
+
126
+ submit_btn.click(
127
+ fn=generate_advanced_description,
128
+ inputs=[input_image, detail_slider, emotion_slider, style_slider],
129
+ outputs=output_text
130
+ )
131
+
132
+ gr.Markdown("""
133
+ ## How to Use
134
+ 1. Upload an image
135
+ 2. Adjust the sliders to control description detail:
136
+ - Detail Level: How comprehensive the description should be
137
+ - Emotion Focus: Emphasis on mood and feelings
138
+ - Style Focus: Emphasis on artistic elements
139
+ 3. Click "Generate Description"
140
+ 4. Use the generated text to prompt AI image generators
141
+
142
+ ## About
143
+ This app uses BLIP-2 and BLIP large models to analyze images and generate detailed descriptions
144
+ suitable for recreating similar images with AI image generators like Stable Diffusion, DALL-E, or Midjourney.
145
+ """)
146
+
147
+ # Launch the app
148
+ if __name__ == "__main__":
149
+ demo.launch()