File size: 6,358 Bytes
a49af3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# -*- coding: utf-8 -*-
"""Untitled4.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1LWcAEqZ865KCYhK-crQ3RKcFJEcH4xhD
"""

from huggingface_hub import login

login(token="hf_xxxx")

import torch
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from PIL import Image
import gradio as gr
from io import BytesIO
import requests

# -----------------------------
# 1️⃣ Device and model setup
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Use a smaller model for memory efficiency
model_name = "Salesforce/blip2-flan-t5-xl-coco"  # Updated model with preprocessor_config.json
processor = Blip2Processor.from_pretrained(model_name)
model = Blip2ForConditionalGeneration.from_pretrained(model_name)
model.to(device)

# -----------------------------
# 2️⃣ Inference function
# -----------------------------
def analyze_eo_image(image, question):
    """
    Analyze Earth Observation (EO) image with a natural language question.
    """
    try:
        if image is None or question.strip() == "":
            return "Please upload an EO image and ask a question."

        # Convert image to RGB if needed
        if image.mode != "RGB":
            image = image.convert("RGB")

        # Prepare inputs
        inputs = processor(image, question, return_tensors="pt").to(
            device, torch.float16 if device == "cuda" else torch.float32
        )

        # Generate answer
        out = model.generate(
            **inputs,
            max_new_tokens=80,  # allow slightly longer explanations
            do_sample=True,
            temperature=0.7
        )

        # Decode and return
        answer = processor.decode(out[0], skip_special_tokens=True)
        return answer

    except Exception as e:
        return f"Error: {e}"

# -----------------------------
# 3️⃣ Optional: Analyze image from URL
# -----------------------------
def analyze_eo_url(url, question):
    try:
        response = requests.get(url)
        image = Image.open(BytesIO(response.content)).convert("RGB")
        return analyze_eo_image(image, question)
    except Exception as e:
        return f"Error loading image: {e}"

# -----------------------------
# 4️⃣ Gradio Interface
# -----------------------------
interface = gr.Interface(
    fn=analyze_eo_image,
    inputs=[
        gr.Image(type="pil", label="Upload EO Image"),
        gr.Textbox(label="Ask a Question about the EO Image")
    ],
    outputs="text",
    title="🌍 EO Image Analysis with Multimodal GPT-OSS",
    description="""
    Ask questions about Earth Observation (EO) images.
    Powered by BLIP-2 + FLAN-T5 (small, memory-efficient).
    Examples: "Identify land cover types", "Where is the river?",
    "Has urban area expanded?".
    """,
    allow_flagging="never"
)

# Launch the interface (shareable link)
interface.launch(share=True)

import torch
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from PIL import Image
import gradio as gr
from io import BytesIO
import requests

# -----------------------------
# Device and Model Setup
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

model_name = "Salesforce/blip2-flan-t5-xl-coco"  # public model
processor = Blip2Processor.from_pretrained(model_name)
model = Blip2ForConditionalGeneration.from_pretrained(model_name)
model.to(device)

# -----------------------------
# Inference Function
# -----------------------------
def analyze_eo_image(image, question):
    if image is None or question.strip() == "":
        return "Please upload an EO image and ask a question."

    if image.mode != "RGB":
        image = image.convert("RGB")

    inputs = processor(image, question, return_tensors="pt").to(
        device, torch.float16 if device=="cuda" else torch.float32
    )

    out = model.generate(
        **inputs,
        max_new_tokens=80,
        do_sample=True,
        temperature=0.7
    )

    return processor.decode(out[0], skip_special_tokens=True)

# -----------------------------
# Optional: URL input
# -----------------------------
def analyze_eo_url(url, question):
    try:
        response = requests.get(url)
        image = Image.open(BytesIO(response.content)).convert("RGB")
        return analyze_eo_image(image, question)
    except Exception as e:
        return f"Error loading image: {e}"

# -----------------------------
# Beautiful Gradio Layout
# -----------------------------
with gr.Blocks(title="🌍 EO Image Analysis") as demo:

    gr.Markdown(
        """
        # 🌍 Earth Observation Image Analysis
        Ask questions about EO images using a multimodal AI model.
        Powered by BLIP-2 + FLAN-T5.
        **Examples:** "Identify land cover types", "Where is the river?", "Has urban area expanded?"
        """
    )

    with gr.Tabs():
        with gr.Tab("Upload Image"):
            with gr.Row():
                with gr.Column(scale=1):
                    img_input = gr.Image(type="pil", label="Upload EO Image")
                    question_input = gr.Textbox(label="Ask a question about the image", placeholder="E.g. Where is the river?")
                    submit_btn = gr.Button("Analyze 🌟")
                with gr.Column(scale=1):
                    output_text = gr.Textbox(label="AI Answer", interactive=False)

            submit_btn.click(analyze_eo_image, inputs=[img_input, question_input], outputs=output_text)

        with gr.Tab("Use Image URL"):
            with gr.Row():
                with gr.Column(scale=1):
                    url_input = gr.Textbox(label="Enter Image URL")
                    url_question = gr.Textbox(label="Ask a question about the image")
                    url_btn = gr.Button("Analyze 🌟")
                with gr.Column(scale=1):
                    url_output = gr.Textbox(label="AI Answer", interactive=False)

            url_btn.click(analyze_eo_url, inputs=[url_input, url_question], outputs=url_output)

    gr.Markdown(
        "πŸ’‘ Tip: Use clear, simple questions for best results. Supports natural language queries about EO images."
    )

demo.launch(share=True)

from huggingface_hub import login

login(token="hf_xxxx")