Spaces:
Sleeping
Sleeping
| """ | |
| Cost tracking utility for Invoice OCR system | |
| Tracks Document AI and Gemini API usage costs | |
| """ | |
| from typing import Dict, Any | |
| from datetime import datetime | |
| import json | |
| from pathlib import Path | |
| class CostTracker: | |
| """Track and calculate costs for Document AI + Gemini usage""" | |
| # Pricing (as of 2025) | |
| # Document AI OCR: $1.50 per 1,000 pages = $0.0015 per page | |
| DOCUMENT_AI_PER_PAGE = 0.0015 # Document OCR: $1.50 per 1,000 pages | |
| # Gemini 2.0 Flash (includes text, images, and videos) | |
| GEMINI_INPUT_PER_TOKEN = 0.10 / 1_000_000 # $0.10 per 1M tokens (input) | |
| GEMINI_OUTPUT_PER_TOKEN = 0.40 / 1_000_000 # $0.40 per 1M tokens (output) | |
| def __init__(self): | |
| self.usage_log = [] | |
| self.log_file = Path("usage_costs.json") | |
| self.load_usage() | |
| def estimate_tokens(self, text: str) -> int: | |
| """Estimate token count (roughly 1 token per 4 characters)""" | |
| return len(text) // 4 | |
| def calculate_invoice_cost( | |
| self, | |
| input_tokens: int = None, | |
| output_tokens: int = None, | |
| input_text: str = None, | |
| output_text: str = None, | |
| includes_image: bool = True | |
| ) -> Dict[str, float]: | |
| """ | |
| Calculate cost for processing one invoice | |
| Args: | |
| input_tokens: Number of input tokens (if known) | |
| output_tokens: Number of output tokens (if known) | |
| input_text: Input text to estimate tokens from | |
| output_text: Output text to estimate tokens from | |
| includes_image: Whether image is sent to Gemini (adds ~258 tokens) | |
| Returns: | |
| Dictionary with cost breakdown | |
| """ | |
| # Estimate tokens if not provided | |
| if input_tokens is None and input_text: | |
| input_tokens = self.estimate_tokens(input_text) | |
| # Add image tokens if image is included | |
| if includes_image: | |
| input_tokens += 258 # Approximate tokens for image vision | |
| if output_tokens is None and output_text: | |
| output_tokens = self.estimate_tokens(json.dumps(output_text)) | |
| # Default estimates if nothing provided (invoices typically larger) | |
| input_tokens = input_tokens or 2000 # Invoices have more text | |
| output_tokens = output_tokens or 800 # More line items | |
| # Calculate costs | |
| docai_cost = self.DOCUMENT_AI_PER_PAGE | |
| gemini_input_cost = input_tokens * self.GEMINI_INPUT_PER_TOKEN | |
| gemini_output_cost = output_tokens * self.GEMINI_OUTPUT_PER_TOKEN | |
| gemini_total = gemini_input_cost + gemini_output_cost | |
| total_cost = docai_cost + gemini_total | |
| return { | |
| "document_ai": docai_cost, | |
| "gemini_input": gemini_input_cost, | |
| "gemini_output": gemini_output_cost, | |
| "gemini_total": gemini_total, | |
| "total": total_cost, | |
| "tokens": { | |
| "input": input_tokens, | |
| "output": output_tokens, | |
| "total": input_tokens + output_tokens | |
| } | |
| } | |
| def print_invoice_cost(self, costs: Dict[str, float]): | |
| """Pretty print cost for an invoice""" | |
| print("\n" + "="*70) | |
| print("💰 INVOICE PROCESSING COST") | |
| print("="*70) | |
| print(f"Document AI OCR: ${costs['document_ai']:.6f}") | |
| print(f"Gemini Input: ${costs['gemini_input']:.6f} ({costs['tokens']['input']:,} tokens)") | |
| print(f"Gemini Output: ${costs['gemini_output']:.6f} ({costs['tokens']['output']:,} tokens)") | |
| print("-" * 70) | |
| print(f"TOTAL COST: ${costs['total']:.6f} ({costs['tokens']['total']:,} tokens)") | |
| print("="*70 + "\n") | |
| def save_usage(self): | |
| """Save usage log to file""" | |
| try: | |
| with open(self.log_file, 'w') as f: | |
| json.dump(self.usage_log, f, indent=2) | |
| except Exception as e: | |
| print(f"Warning: Could not save usage log: {e}") | |
| def load_usage(self): | |
| """Load usage log from file""" | |
| try: | |
| if self.log_file.exists(): | |
| with open(self.log_file, 'r') as f: | |
| self.usage_log = json.load(f) | |
| except Exception as e: | |
| print(f"Warning: Could not load usage log: {e}") | |
| self.usage_log = [] | |