File size: 12,429 Bytes
9bfda2f d1e470d 9bfda2f 001552b ac823bb 001552b ac823bb 001552b 9bfda2f ac823bb 9bfda2f ac823bb 9bfda2f 001552b 9bfda2f ac823bb 9bfda2f ac823bb 9bfda2f 96239e5 9bfda2f ac823bb 9bfda2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
import gradio as gr
import json
from mistralai import Mistral
from pydantic import BaseModel, Field
from datetime import datetime
import base64
from io import BytesIO
from PIL import Image
import os
class ClimateData(BaseModel):
document_type: str = Field(..., description="Type of document: report, policy, research, assessment, etc.")
title: str = Field(..., description="Document title or main heading")
organization: str = Field(..., description="Publishing organization or agency")
publication_date: str = Field(..., description="Publication or release date")
temperature_data: list[str] = Field(default=[], description="Temperature readings, anomalies, projections")
precipitation_data: list[str] = Field(default=[], description="Precipitation measurements and forecasts")
co2_levels: list[str] = Field(default=[], description="CO2 concentration data and emissions")
sea_level_data: list[str] = Field(default=[], description="Sea level rise measurements")
extreme_events: list[str] = Field(default=[], description="Extreme weather events and frequencies")
year_ranges: list[str] = Field(default=[], description="Time periods and date ranges covered")
baseline_periods: list[str] = Field(default=[], description="Reference or baseline periods used")
projection_periods: list[str] = Field(default=[], description="Future projection timeframes")
policy_recommendations: list[str] = Field(default=[], description="Policy recommendations and actions")
targets_goals: list[str] = Field(default=[], description="Climate targets, goals, and commitments")
mitigation_strategies: list[str] = Field(default=[], description="Mitigation approaches and strategies")
adaptation_measures: list[str] = Field(default=[], description="Adaptation measures and plans")
regions_covered: list[str] = Field(default=[], description="Geographical regions or countries covered")
sectors_affected: list[str] = Field(default=[], description="Economic sectors or systems affected")
main_conclusions: list[str] = Field(default=[], description="Primary conclusions and findings")
risk_assessments: list[str] = Field(default=[], description="Risk levels and assessments")
uncertainty_levels: list[str] = Field(default=[], description="Uncertainty ranges and confidence levels")
class ChartDescription(BaseModel):
chart_type: str = Field(..., description="Type of visualization: line chart, bar chart, map, table, etc.")
data_type: str = Field(..., description="Type of data shown: temperature, emissions, policy timeline, etc.")
trend_description: str = Field(..., description="Description of trends, patterns, and changes")
key_insights: str = Field(..., description="Important findings and takeaways from the visualization")
time_period: str = Field(..., description="Time period or range covered in the chart")
geographical_scope: str = Field(..., description="Geographical area or regions shown")
def initialize_client(api_key):
if not api_key:
raise ValueError("Please provide a valid Mistral API key")
return Mistral(api_key=api_key)
def extract_climate_data(api_key, file_path=None, url=None):
try:
client = initialize_client(api_key)
from mistralai.extra import response_format_from_pydantic_model
if file_path:
uploaded_file = client.files.upload(
file={"file_name": os.path.basename(file_path), "content": open(file_path, "rb")},
purpose="ocr"
)
signed_url = client.files.get_signed_url(file_id=uploaded_file.id)
document_url = signed_url.url
elif url:
document_url = url
else:
return {"error": "No file or URL provided"}
response = client.ocr.process(
model="mistral-ocr-latest",
document={"type": "document_url", "document_url": document_url},
bbox_annotation_format=response_format_from_pydantic_model(ChartDescription),
document_annotation_format=response_format_from_pydantic_model(ClimateData),
include_image_base64=True
)
extracted_text = response.text if hasattr(response, 'text') else ""
bbox_annotations = response.bbox_annotations if hasattr(response, 'bbox_annotations') else []
doc_annotations = response.document_annotation if hasattr(response, 'document_annotation') else {}
return {
"success": True,
"extracted_text": extracted_text,
"climate_data": doc_annotations,
"chart_descriptions": bbox_annotations,
"raw_response": str(response)
}
except Exception as e:
return {"error": f"OCR processing failed: {str(e)}"}
def process_climate_document(api_key, file, url_input):
"""
The function `process_climate_document` extracts climate data from either a file or URL input and
returns structured JSON data.
:param api_key: The `api_key` parameter is typically a unique identifier or access token that allows
you to authenticate and access a specific API or service. It is used in the
`process_climate_document` function to authenticate and make requests to the `extract_climate_data`
function. You need to provide a valid
:param file: The `file` parameter in the `process_climate_document` function is used to pass a file
object containing climate document data. If this parameter is provided, the function will extract
climate data from the file using the `extract_climate_data` function
:param url_input: The `url_input` parameter in the `process_climate_document` function is used to
provide a URL input for extracting climate data. This URL should point to a document or webpage
containing climate-related information that needs to be analyzed. The function will extract data
from this URL if it is provided
:return: The function `process_climate_document` returns a JSON object containing the analysis
results of a climate document including climate_data, chart_descriptions, and extracted_text.
"""
if file:
result = extract_climate_data(api_key, file_path=file.name)
elif url_input.strip():
result = extract_climate_data(api_key, url=url_input.strip())
else:
return {"error": "Please provide either a file or URL"}
if "error" in result:
return {"error": result['error']}
return result
def analyze_image(api_key, image):
"""
The function `analyze_image` takes an image, analyzes it using a chat model, and returns JSON output
with information about the image content.
:param api_key: The `api_key` parameter is a string that represents the API key required for
authentication to access the chat API service. This key is used to initialize the client for making
requests to the service
:param image: The `analyze_image` function you provided seems to be a Python function that takes an
API key and an image as input parameters. The function is designed to analyze the image using a chat
completion model and provide a JSON output with specific fields related to the image content
:return: The `analyze_image` function returns a JSON string containing information about the
analyzed image. The JSON output includes fields such as image_type, climate_feature, location,
date_captured, cloud_density, temperature_anomaly, and description. If the image analysis is
successful, the function returns the analyzed results in JSON format. If there is an error during
processing, it returns an error message with default values for
"""
try:
client = initialize_client(api_key)
buffered = BytesIO()
image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
prompt = """Analyze this image and provide a JSON output with the following fields:
- image_type: Type of image (e.g., satellite, ground, aerial)
- climate_feature: Primary climate feature observed (e.g., cloud_cover, precipitation)
- location: Estimated or general location (e.g., Pacific Ocean, Sahara Desert)
- date_captured: Current date in YYYY-MM-DD format
- cloud_density: Estimated cloud density (0.0 to 1.0) if applicable
- temperature_anomaly: Estimated temperature anomaly in Celsius (e.g., 1.2)
- description: Brief description of the image content
"""
response = client.chat.complete(
model="pixtral-large-latest",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": f"data:image/png;base64,{img_str}"}
]
}
]
)
response_text = response.choices[0].message.content
try:
response_text = response_text.replace("```json", "").replace("```", "").strip()
result = json.loads(response_text)
except json.JSONDecodeError:
result = {
"image_type": "unknown",
"climate_feature": "unknown",
"location": "unknown",
"date_captured": datetime.now().strftime("%Y-%m-DD"),
"cloud_density": 0.0,
"temperature_anomaly": 0.0,
"description": "Error parsing model output."
}
return result
except Exception as e:
error_result = {
"image_type": "error",
"climate_feature": "none",
"location": "none",
"date_captured": datetime.now().strftime("%Y-%m-DD"),
"cloud_density": 0.0,
"temperature_anomaly": 0.0,
"description": f"Error processing image: {str(e)}"
}
return error_result
with gr.Blocks(title="Climate Data and Image Analyzer") as demo:
gr.Markdown("# Climate Data and Image Analysis Tool\nAnalyze climate documents or images using Mistral OCR and Pixtral models")
api_key_input = gr.Textbox(
label="Mistral API Key",
placeholder="Enter your Mistral API key here",
type="password"
)
with gr.Tabs():
with gr.Tab(label="Document Analysis"):
gr.Markdown("## Document Analysis\nExtract data from climate reports, policies, or research papers")
with gr.Row():
with gr.Column():
file_input = gr.File(
label="Upload Climate Document",
file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".pptx"]
)
url_input = gr.Textbox(
label="Or Enter Document URL",
placeholder="https://example.com/climate-policy.pdf"
)
process_btn = gr.Button("Analyze Document", variant="primary")
with gr.Column():
doc_output = gr.JSON(label="Document Analysis Results")
process_btn.click(
fn=process_climate_document,
inputs=[api_key_input, file_input, url_input],
outputs=doc_output
)
gr.Examples(
examples=[
[None, "https://static.pib.gov.in/WriteReadData/specificdocs/documents/2021/dec/doc202112101.pdf"],
[None, "https://www.ipcc.ch/site/assets/uploads/2018/02/WG1AR5_Chapter02_FINAL.pdf"],
[None, "https://unfccc.int/sites/default/files/resource/parisagreement_publication.pdf"]
],
inputs=[file_input, url_input]
)
with gr.Tab(label="Image Analysis"):
gr.Markdown("## Image Analysis\nAnalyze climate-related images for features like cloud cover or temperature anomalies")
image_input = gr.Image(type="pil", label="Upload Image")
image_btn = gr.Button("Analyze Image", variant="primary")
image_output = gr.JSON(label="Image Analysis Result")
image_btn.click(
fn=analyze_image,
inputs=[api_key_input, image_input],
outputs=image_output
)
if __name__ == "__main__":
demo.launch(mcp_server=True) |