File size: 12,429 Bytes
9bfda2f
 
 
 
 
 
 
 
d1e470d
9bfda2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
001552b
 
ac823bb
001552b
 
 
 
 
 
 
 
 
 
 
 
ac823bb
 
001552b
9bfda2f
 
 
 
 
ac823bb
 
9bfda2f
ac823bb
 
 
9bfda2f
 
001552b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bfda2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac823bb
9bfda2f
 
 
 
 
 
 
 
 
 
ac823bb
9bfda2f
 
96239e5
9bfda2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac823bb
9bfda2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import gradio as gr
import json
from mistralai import Mistral
from pydantic import BaseModel, Field
from datetime import datetime
import base64
from io import BytesIO
from PIL import Image
import os

class ClimateData(BaseModel):
    document_type: str = Field(..., description="Type of document: report, policy, research, assessment, etc.")
    title: str = Field(..., description="Document title or main heading")
    organization: str = Field(..., description="Publishing organization or agency")
    publication_date: str = Field(..., description="Publication or release date")
    temperature_data: list[str] = Field(default=[], description="Temperature readings, anomalies, projections")
    precipitation_data: list[str] = Field(default=[], description="Precipitation measurements and forecasts")
    co2_levels: list[str] = Field(default=[], description="CO2 concentration data and emissions")
    sea_level_data: list[str] = Field(default=[], description="Sea level rise measurements")
    extreme_events: list[str] = Field(default=[], description="Extreme weather events and frequencies")
    year_ranges: list[str] = Field(default=[], description="Time periods and date ranges covered")
    baseline_periods: list[str] = Field(default=[], description="Reference or baseline periods used")
    projection_periods: list[str] = Field(default=[], description="Future projection timeframes")
    policy_recommendations: list[str] = Field(default=[], description="Policy recommendations and actions")
    targets_goals: list[str] = Field(default=[], description="Climate targets, goals, and commitments")
    mitigation_strategies: list[str] = Field(default=[], description="Mitigation approaches and strategies")
    adaptation_measures: list[str] = Field(default=[], description="Adaptation measures and plans")
    regions_covered: list[str] = Field(default=[], description="Geographical regions or countries covered")
    sectors_affected: list[str] = Field(default=[], description="Economic sectors or systems affected")
    main_conclusions: list[str] = Field(default=[], description="Primary conclusions and findings")
    risk_assessments: list[str] = Field(default=[], description="Risk levels and assessments")
    uncertainty_levels: list[str] = Field(default=[], description="Uncertainty ranges and confidence levels")

class ChartDescription(BaseModel):
    chart_type: str = Field(..., description="Type of visualization: line chart, bar chart, map, table, etc.")
    data_type: str = Field(..., description="Type of data shown: temperature, emissions, policy timeline, etc.")
    trend_description: str = Field(..., description="Description of trends, patterns, and changes")
    key_insights: str = Field(..., description="Important findings and takeaways from the visualization")
    time_period: str = Field(..., description="Time period or range covered in the chart")
    geographical_scope: str = Field(..., description="Geographical area or regions shown")

def initialize_client(api_key):
    if not api_key:
        raise ValueError("Please provide a valid Mistral API key")
    return Mistral(api_key=api_key)

def extract_climate_data(api_key, file_path=None, url=None):
    try:
        client = initialize_client(api_key)
        from mistralai.extra import response_format_from_pydantic_model
        if file_path:
            uploaded_file = client.files.upload(
                file={"file_name": os.path.basename(file_path), "content": open(file_path, "rb")},
                purpose="ocr"
            )
            signed_url = client.files.get_signed_url(file_id=uploaded_file.id)
            document_url = signed_url.url
        elif url:
            document_url = url
        else:
            return {"error": "No file or URL provided"}
        response = client.ocr.process(
            model="mistral-ocr-latest",
            document={"type": "document_url", "document_url": document_url},
            bbox_annotation_format=response_format_from_pydantic_model(ChartDescription),
            document_annotation_format=response_format_from_pydantic_model(ClimateData),
            include_image_base64=True
        )
        extracted_text = response.text if hasattr(response, 'text') else ""
        bbox_annotations = response.bbox_annotations if hasattr(response, 'bbox_annotations') else []
        doc_annotations = response.document_annotation if hasattr(response, 'document_annotation') else {}
        return {
            "success": True,
            "extracted_text": extracted_text,
            "climate_data": doc_annotations,
            "chart_descriptions": bbox_annotations,
            "raw_response": str(response)
        }
    except Exception as e:
        return {"error": f"OCR processing failed: {str(e)}"}

def process_climate_document(api_key, file, url_input):
    """
    The function `process_climate_document` extracts climate data from either a file or URL input and
    returns structured JSON data.
    
    :param api_key: The `api_key` parameter is typically a unique identifier or access token that allows
    you to authenticate and access a specific API or service. It is used in the
    `process_climate_document` function to authenticate and make requests to the `extract_climate_data`
    function. You need to provide a valid
    :param file: The `file` parameter in the `process_climate_document` function is used to pass a file
    object containing climate document data. If this parameter is provided, the function will extract
    climate data from the file using the `extract_climate_data` function
    :param url_input: The `url_input` parameter in the `process_climate_document` function is used to
    provide a URL input for extracting climate data. This URL should point to a document or webpage
    containing climate-related information that needs to be analyzed. The function will extract data
    from this URL if it is provided
    :return: The function `process_climate_document` returns a JSON object containing the analysis
    results of a climate document including climate_data, chart_descriptions, and extracted_text.
    """
    if file:
        result = extract_climate_data(api_key, file_path=file.name)
    elif url_input.strip():
        result = extract_climate_data(api_key, url=url_input.strip())
    else:
        return {"error": "Please provide either a file or URL"}
    
    if "error" in result:
        return {"error": result['error']}
    
    return result

def analyze_image(api_key, image):
    """
    The function `analyze_image` takes an image, analyzes it using a chat model, and returns JSON output
    with information about the image content.
    
    :param api_key: The `api_key` parameter is a string that represents the API key required for
    authentication to access the chat API service. This key is used to initialize the client for making
    requests to the service
    :param image: The `analyze_image` function you provided seems to be a Python function that takes an
    API key and an image as input parameters. The function is designed to analyze the image using a chat
    completion model and provide a JSON output with specific fields related to the image content
    :return: The `analyze_image` function returns a JSON string containing information about the
    analyzed image. The JSON output includes fields such as image_type, climate_feature, location,
    date_captured, cloud_density, temperature_anomaly, and description. If the image analysis is
    successful, the function returns the analyzed results in JSON format. If there is an error during
    processing, it returns an error message with default values for
    """
    try:
        client = initialize_client(api_key)
        buffered = BytesIO()
        image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        prompt = """Analyze this image and provide a JSON output with the following fields:
        - image_type: Type of image (e.g., satellite, ground, aerial)
        - climate_feature: Primary climate feature observed (e.g., cloud_cover, precipitation)
        - location: Estimated or general location (e.g., Pacific Ocean, Sahara Desert)
        - date_captured: Current date in YYYY-MM-DD format
        - cloud_density: Estimated cloud density (0.0 to 1.0) if applicable
        - temperature_anomaly: Estimated temperature anomaly in Celsius (e.g., 1.2)
        - description: Brief description of the image content
        """
        response = client.chat.complete(
            model="pixtral-large-latest",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {"type": "image_url", "image_url": f"data:image/png;base64,{img_str}"}
                    ]
                }
            ]
        )
        response_text = response.choices[0].message.content
        try:
            response_text = response_text.replace("```json", "").replace("```", "").strip()
            result = json.loads(response_text)
        except json.JSONDecodeError:
            result = {
                "image_type": "unknown",
                "climate_feature": "unknown",
                "location": "unknown",
                "date_captured": datetime.now().strftime("%Y-%m-DD"),
                "cloud_density": 0.0,
                "temperature_anomaly": 0.0,
                "description": "Error parsing model output."
            }
        return result
    except Exception as e:
        error_result = {
            "image_type": "error",
            "climate_feature": "none",
            "location": "none",
            "date_captured": datetime.now().strftime("%Y-%m-DD"),
            "cloud_density": 0.0,
            "temperature_anomaly": 0.0,
            "description": f"Error processing image: {str(e)}"
        }
        return error_result

with gr.Blocks(title="Climate Data and Image Analyzer") as demo:
    gr.Markdown("# Climate Data and Image Analysis Tool\nAnalyze climate documents or images using Mistral OCR and Pixtral models")
    api_key_input = gr.Textbox(
        label="Mistral API Key",
        placeholder="Enter your Mistral API key here",
        type="password"
    )
    with gr.Tabs():
        with gr.Tab(label="Document Analysis"):
            gr.Markdown("## Document Analysis\nExtract data from climate reports, policies, or research papers")
            with gr.Row():
                with gr.Column():
                    file_input = gr.File(
                        label="Upload Climate Document",
                        file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".pptx"]
                    )
                    url_input = gr.Textbox(
                        label="Or Enter Document URL",
                        placeholder="https://example.com/climate-policy.pdf"
                    )
                    process_btn = gr.Button("Analyze Document", variant="primary")
                with gr.Column():
                    doc_output = gr.JSON(label="Document Analysis Results")
            process_btn.click(
                fn=process_climate_document,
                inputs=[api_key_input, file_input, url_input],
                outputs=doc_output
            )
            gr.Examples(
                examples=[
                    [None, "https://static.pib.gov.in/WriteReadData/specificdocs/documents/2021/dec/doc202112101.pdf"],
                    [None, "https://www.ipcc.ch/site/assets/uploads/2018/02/WG1AR5_Chapter02_FINAL.pdf"],
                    [None, "https://unfccc.int/sites/default/files/resource/parisagreement_publication.pdf"]
                ],
                inputs=[file_input, url_input]
            )
        with gr.Tab(label="Image Analysis"):
            gr.Markdown("## Image Analysis\nAnalyze climate-related images for features like cloud cover or temperature anomalies")
            image_input = gr.Image(type="pil", label="Upload Image")
            image_btn = gr.Button("Analyze Image", variant="primary")
            image_output = gr.JSON(label="Image Analysis Result")
            image_btn.click(
                fn=analyze_image,
                inputs=[api_key_input, image_input],
                outputs=image_output
            )

if __name__ == "__main__":
    demo.launch(mcp_server=True)