File size: 4,407 Bytes
83721a5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | # === Standard Library ===
import os
import re
import json
import base64
import mimetypes
from pathlib import Path
# === Third-Party ===
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from dotenv import load_dotenv
from openai import OpenAI
from anthropic import Anthropic
from html import escape
# === Env & Clients ===
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
# Both clients read keys from env by default; explicit is also fine:
openai_client = OpenAI(api_key=openai_api_key) if openai_api_key else OpenAI()
anthropic_client = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else Anthropic()
def get_response(model: str, prompt: str) -> str:
"""Get response from LLM (OpenAI or Anthropic)."""
if "claude" in model.lower() or "anthropic" in model.lower():
# Anthropic Claude format
message = anthropic_client.messages.create(
model=model,
max_tokens=1000,
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
)
return message.content[0].text
else:
# Default to OpenAI format for all other models (gpt-4, o3-mini, o1, etc.)
response = openai_client.responses.create(
model=model,
input=prompt,
)
return response.output_text
# === Data Loading ===
def load_and_prepare_data(csv_path: str) -> pd.DataFrame:
"""Load CSV and derive date parts commonly used in charts."""
df = pd.read_csv(csv_path)
# Be tolerant if 'date' exists
if "date" in df.columns:
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["quarter"] = df["date"].dt.quarter
df["month"] = df["date"].dt.month
df["year"] = df["date"].dt.year
return df
# === Helpers ===
def make_schema_text(df: pd.DataFrame) -> str:
"""Return a human-readable schema from a DataFrame."""
return "\n".join(f"- {c}: {dt}" for c, dt in df.dtypes.items())
def ensure_execute_python_tags(text: str) -> str:
"""Normalize code to be wrapped in <execute_python>...</execute_python>."""
text = text.strip()
# Strip ```python fences if present
text = re.sub(r"^```(?:python)?\s*|\s*```$", "", text).strip()
if "<execute_python>" not in text:
text = f"<execute_python>\n{text}\n</execute_python>"
return text
def encode_image_b64(path: str) -> tuple[str, str]:
"""Return (media_type, base64_str) for an image file path."""
mime, _ = mimetypes.guess_type(path)
media_type = mime or "image/png"
with open(path, "rb") as f:
b64 = base64.b64encode(f.read()).decode("utf-8")
return media_type, b64
def image_anthropic_call(model_name: str, prompt: str, media_type: str, b64: str) -> str:
"""
Call Anthropic Claude (messages.create) with text+image and return *all* text blocks concatenated.
Adds a system message to enforce strict JSON output.
"""
msg = anthropic_client.messages.create(
model=model_name,
max_tokens=2000,
temperature=0,
system=(
"You are a careful assistant. Respond with a single valid JSON object only. "
"Do not include markdown, code fences, or commentary outside JSON."
),
messages=[{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image", "source": {"type": "base64", "media_type": media_type, "data": b64}},
],
}],
)
# Anthropic returns a list of content blocks; collect all text
parts = []
for block in (msg.content or []):
if getattr(block, "type", None) == "text":
parts.append(block.text)
return "".join(parts).strip()
def image_openai_call(model_name: str, prompt: str, media_type: str, b64: str) -> str:
"""Call OpenAI with text+image input."""
data_url = f"data:{media_type};base64,{b64}"
resp = openai_client.responses.create(
model=model_name,
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": prompt},
{"type": "input_image", "image_url": data_url},
],
}
],
)
content = (resp.output_text or "").strip()
return content
|