#!/usr/bin/env python3 """Cicero Cola - Design Studio Screen Analysis Platform""" import os import asyncio import tempfile import base64 from pathlib import Path from typing import Optional from PIL import Image from loguru import logger import uvicorn from fastapi import FastAPI, File, UploadFile, Form, HTTPException from fastapi.staticfiles import StaticFiles from fastapi.responses import HTMLResponse, JSONResponse from fastapi.middleware.cors import CORSMiddleware from pydantic_ai import Agent from pydantic_ai.models.gemini import GeminiModel from pydantic_ai.messages import BinaryContent from dotenv import load_dotenv # Load environment variables load_dotenv() def setup_environment(): """Setup environment variables and configurations.""" hf_token = os.environ.get("HF_TOKEN") gemini_key = os.environ.get("GEMINI_API_KEY") if hf_token: print(f"✅ HF_TOKEN loaded...") else: print("⚠️ HF_TOKEN not found in environment") if gemini_key: print(f"✅ GEMINI_API_KEY loaded...") else: print("⚠️ GEMINI_API_KEY not found in environment") return hf_token, gemini_key async def analyze_image_with_ai(image_data: bytes, question: str | None, api_key: str) -> str: """Analyze image using pydantic_ai with BinaryContent.""" try: # Set the API key in environment for Gemini #os.environ['GEMINI_API_KEY'] = api_key if question is None or question is "": logger.info(f"Question is {question}. Enforcing the default.") question = """ Analyze the provided image containing a multiple-choice question. Generate a highly structured, accurate, and succinct response in Markdown. Minimize token usage. Markdown. Include a confidence score (0.00-1.00). 1. Explanation for each incorrect alternative (A, B, C, etc.). 2. Explanation for the correct alternative. 3. The final correct alternative. Maximum of 10 words per explanation. Based on the image, identify the question and the correct alternative. Provide the answer using the following strict structure: 1. **Confidence:** (e.g., `Confidence: 0.98`) 2. **Incorrect Rationale:** For each option that is not the answer, state why it is wrong. 3. **Correct Rationale:** State why the correct option is the answer. 4. **Answer:** The letter corresponding to the correct alternative. """ # Create agent agent = Agent('gemini-2.5-flash') logger.info(f"Agent created with Gemini model") # Create binary content for the image image_content = BinaryContent( data=image_data, media_type='image/png' ) # Run the agent with image and question result = await agent.run([question, image_content]) logger.info(f"Analysis completed successfully") return result.output except Exception as e: logger.error(f"Analysis failed: {str(e)}") return f"Analysis failed: {str(e)}" # Initialize FastAPI app app = FastAPI(title="Cicero Passa a Cola", description="Screen Analysis Platform") # Configure CORS origins = [ "http://localhost", "https://localhost", "http://localhost:7864", "https://localhost:7864", "http://127.0.0.1:7864", "https://127.0.0.1:7864", "http://localhost:8864", "https://localhost:8864", "http://127.0.0.1:8864", "https://127.0.0.1:8864", "https://*.cicero.im", "http://*.cicero.im", "https://huggingface.co", "http://huggingface.co", "https://*.huggingface.co", "http://*.huggingface.co", ] app.add_middleware( CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Setup environment hf_token, gemini_key = setup_environment() @app.get("/", response_class=HTMLResponse) async def get_main_page(): """Serve the main HTML page with Cicero Cola design studio interface.""" html_content = f""" Cicero Passa a Cola

Capture. Analyze. Cole.

Ready to start recording

Live Recording Studio

Capture your screen in real-time with professional-grade recording capabilities

AI Analysis Lab

Upload images for intelligent design and content analysis

Gemini API Key

Analysis Question

Click to upload or drag and drop
PNG, JPG, GIF up to 10MB

""" return html_content @app.get("/get-env-key") async def get_env_key(): """Get API key from environment variables.""" try: api_key = os.environ.get("GEMINI_API_KEY") if api_key: return JSONResponse(content={"api_key": "uga"}) else: return JSONResponse(content={"api_key": None}, status_code=404) except Exception as e: logger.error(f"Error getting environment key: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @app.post("/analyze-image") async def analyze_image_endpoint( image: UploadFile = File(...), question: str = Form(...), api_key: str = Form(...) ): """Analyze uploaded image with AI.""" try: # Read image data image_data = await image.read() # Validate image type if image.content_type and not image.content_type.startswith('image/'): raise HTTPException(status_code=400, detail=f"Invalid file type: {image.content_type}") # Analyze with AI result = await analyze_image_with_ai(image_data, question, api_key) return JSONResponse(content={"analysis": result}) except Exception as e: logger.error(f"Error analyzing image: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": hf_token, gemini_key = setup_environment() print("🚀 Starting Cicero Passa a Cola...") uvicorn.run(app, host="0.0.0.0", port=7860)