#!/usr/bin/env python3
"""Cicero Cola - Design Studio Screen Analysis Platform"""
import os
import asyncio
import tempfile
import base64
from pathlib import Path
from typing import Optional
from PIL import Image
from loguru import logger
import uvicorn
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic_ai import Agent
from pydantic_ai.models.gemini import GeminiModel
from pydantic_ai.messages import BinaryContent
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
def setup_environment():
"""Setup environment variables and configurations."""
hf_token = os.environ.get("HF_TOKEN")
gemini_key = os.environ.get("GEMINI_API_KEY")
if hf_token:
print(f"✅ HF_TOKEN loaded...")
else:
print("⚠️ HF_TOKEN not found in environment")
if gemini_key:
print(f"✅ GEMINI_API_KEY loaded...")
else:
print("⚠️ GEMINI_API_KEY not found in environment")
return hf_token, gemini_key
async def analyze_image_with_ai(image_data: bytes, question: str | None, api_key: str) -> str:
"""Analyze image using pydantic_ai with BinaryContent."""
try:
# Set the API key in environment for Gemini
#os.environ['GEMINI_API_KEY'] = api_key
if question is None or question is "":
logger.info(f"Question is {question}. Enforcing the default.")
question = """
Analyze the provided image containing a multiple-choice question.
Generate a highly structured, accurate, and succinct response in Markdown.
Minimize token usage.
Markdown.Include a confidence score (0.00-1.00).
1. Explanation for each incorrect alternative (A, B, C, etc.).
2. Explanation for the correct alternative.
3. The final correct alternative.
Maximum of 10 words per explanation.
Based on the image, identify the question and the correct alternative.
Provide the answer using the following strict structure:
1. **Confidence:** (e.g., `Confidence: 0.98`)
2. **Incorrect Rationale:** For each option that is not the answer, state why it is wrong.
3. **Correct Rationale:** State why the correct option is the answer.
4. **Answer:** The letter corresponding to the correct alternative.
"""
# Create agent
agent = Agent('gemini-2.5-flash')
logger.info(f"Agent created with Gemini model")
# Create binary content for the image
image_content = BinaryContent(
data=image_data,
media_type='image/png'
)
# Run the agent with image and question
result = await agent.run([question, image_content])
logger.info(f"Analysis completed successfully")
return result.output
except Exception as e:
logger.error(f"Analysis failed: {str(e)}")
return f"Analysis failed: {str(e)}"
# Initialize FastAPI app
app = FastAPI(title="Cicero Passa a Cola", description="Screen Analysis Platform")
# Configure CORS
origins = [
"http://localhost",
"https://localhost",
"http://localhost:7864",
"https://localhost:7864",
"http://127.0.0.1:7864",
"https://127.0.0.1:7864",
"http://localhost:8864",
"https://localhost:8864",
"http://127.0.0.1:8864",
"https://127.0.0.1:8864",
"https://*.cicero.im",
"http://*.cicero.im",
"https://huggingface.co",
"http://huggingface.co",
"https://*.huggingface.co",
"http://*.huggingface.co",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Setup environment
hf_token, gemini_key = setup_environment()
@app.get("/", response_class=HTMLResponse)
async def get_main_page():
"""Serve the main HTML page with Cicero Cola design studio interface."""
html_content = f"""
Cicero Passa a Cola
CICERO
Capture. Analyze. Cole.
Ready to start recording
Analysis Results
Live Recording Studio
Capture your screen in real-time with professional-grade recording capabilities
AI Analysis Lab
Upload images for intelligent design and content analysis
Click to upload or drag and drop
PNG, JPG, GIF up to 10MB
"""
return html_content
@app.get("/get-env-key")
async def get_env_key():
"""Get API key from environment variables."""
try:
api_key = os.environ.get("GEMINI_API_KEY")
if api_key:
return JSONResponse(content={"api_key": "uga"})
else:
return JSONResponse(content={"api_key": None}, status_code=404)
except Exception as e:
logger.error(f"Error getting environment key: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/analyze-image")
async def analyze_image_endpoint(
image: UploadFile = File(...),
question: str = Form(...),
api_key: str = Form(...)
):
"""Analyze uploaded image with AI."""
try:
# Read image data
image_data = await image.read()
# Validate image type
if image.content_type and not image.content_type.startswith('image/'):
raise HTTPException(status_code=400, detail=f"Invalid file type: {image.content_type}")
# Analyze with AI
result = await analyze_image_with_ai(image_data, question, api_key)
return JSONResponse(content={"analysis": result})
except Exception as e:
logger.error(f"Error analyzing image: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
hf_token, gemini_key = setup_environment()
print("🚀 Starting Cicero Passa a Cola...")
uvicorn.run(app, host="0.0.0.0", port=7860)