Spaces:
Sleeping
Sleeping
| """ | |
| Cost tracking utility for OCR system | |
| Tracks Document AI and Gemini API usage costs | |
| """ | |
| from typing import Dict, Any | |
| from datetime import datetime | |
| import json | |
| from pathlib import Path | |
| class CostTracker: | |
| """Track and calculate costs for Document AI + Gemini usage""" | |
| # Pricing (as of 2025) | |
| # Document AI Receipt Parser: $0.10 per document (1-10 pages) | |
| # Since receipts are typically 1 page, cost is $0.10 per receipt | |
| DOCUMENT_AI_PER_PAGE = 0.10 # Receipt Parser: $0.10 for 1-10 page documents | |
| # Gemini 2.5 Flash Lite | |
| GEMINI_INPUT_PER_TOKEN = 0.10 / 1_000_000 # $0.10 per 1M tokens (text/image/video) | |
| GEMINI_OUTPUT_PER_TOKEN = 0.40 / 1_000_000 # $0.40 per 1M tokens (including thinking) | |
| def __init__(self): | |
| self.usage_log = [] | |
| self.log_file = Path("usage_costs.json") | |
| self.load_usage() | |
| def estimate_tokens(self, text: str) -> int: | |
| """Estimate token count (roughly 1 token per 4 characters)""" | |
| return len(text) // 4 | |
| def calculate_receipt_cost( | |
| self, | |
| input_tokens: int = None, | |
| output_tokens: int = None, | |
| input_text: str = None, | |
| output_text: str = None, | |
| includes_image: bool = True | |
| ) -> Dict[str, float]: | |
| """ | |
| Calculate cost for processing one receipt | |
| Args: | |
| input_tokens: Number of input tokens (if known) | |
| output_tokens: Number of output tokens (if known) | |
| input_text: Input text to estimate tokens from | |
| output_text: Output text to estimate tokens from | |
| includes_image: Whether image is sent to Gemini (adds ~258 tokens) | |
| Returns: | |
| Dictionary with cost breakdown | |
| """ | |
| # Estimate tokens if not provided | |
| if input_tokens is None and input_text: | |
| input_tokens = self.estimate_tokens(input_text) | |
| # Add image tokens if image is included | |
| if includes_image: | |
| input_tokens += 258 # Approximate tokens for image vision | |
| if output_tokens is None and output_text: | |
| output_tokens = self.estimate_tokens(json.dumps(output_text)) | |
| # Default estimates if nothing provided (with image) | |
| input_tokens = input_tokens or 1408 # Average with image | |
| output_tokens = output_tokens or 600 # Average | |
| # Calculate costs | |
| docai_cost = self.DOCUMENT_AI_PER_PAGE | |
| gemini_input_cost = input_tokens * self.GEMINI_INPUT_PER_TOKEN | |
| gemini_output_cost = output_tokens * self.GEMINI_OUTPUT_PER_TOKEN | |
| gemini_total = gemini_input_cost + gemini_output_cost | |
| total_cost = docai_cost + gemini_total | |
| return { | |
| "document_ai": docai_cost, | |
| "gemini_input": gemini_input_cost, | |
| "gemini_output": gemini_output_cost, | |
| "gemini_total": gemini_total, | |
| "total": total_cost, | |
| "tokens": { | |
| "input": input_tokens, | |
| "output": output_tokens, | |
| "total": input_tokens + output_tokens | |
| } | |
| } | |
| def log_usage( | |
| self, | |
| receipt_id: int, | |
| filename: str, | |
| input_tokens: int = None, | |
| output_tokens: int = None, | |
| input_text: str = None, | |
| output_text: str = None | |
| ): | |
| """Log usage for a receipt""" | |
| costs = self.calculate_receipt_cost( | |
| input_tokens=input_tokens, | |
| output_tokens=output_tokens, | |
| input_text=input_text, | |
| output_text=output_text | |
| ) | |
| entry = { | |
| "timestamp": datetime.utcnow().isoformat(), | |
| "receipt_id": receipt_id, | |
| "filename": filename, | |
| **costs | |
| } | |
| self.usage_log.append(entry) | |
| self.save_usage() | |
| return costs | |
| def get_total_costs(self) -> Dict[str, Any]: | |
| """Get total costs across all processed receipts""" | |
| if not self.usage_log: | |
| return { | |
| "total_receipts": 0, | |
| "total_cost": 0.0, | |
| "document_ai_cost": 0.0, | |
| "gemini_cost": 0.0, | |
| "total_tokens": 0 | |
| } | |
| total_receipts = len(self.usage_log) | |
| total_cost = sum(entry["total"] for entry in self.usage_log) | |
| docai_cost = sum(entry["document_ai"] for entry in self.usage_log) | |
| gemini_cost = sum(entry["gemini_total"] for entry in self.usage_log) | |
| total_tokens = sum(entry["tokens"]["total"] for entry in self.usage_log) | |
| return { | |
| "total_receipts": total_receipts, | |
| "total_cost": round(total_cost, 6), | |
| "document_ai_cost": round(docai_cost, 6), | |
| "gemini_cost": round(gemini_cost, 6), | |
| "total_tokens": total_tokens, | |
| "avg_cost_per_receipt": round(total_cost / total_receipts, 6), | |
| "avg_tokens_per_receipt": round(total_tokens / total_receipts, 0) | |
| } | |
| def print_receipt_cost(self, costs: Dict[str, float]): | |
| """Pretty print cost for a receipt""" | |
| print("\n" + "="*60) | |
| print("💰 COST BREAKDOWN") | |
| print("="*60) | |
| print(f"Document AI: ${costs['document_ai']:.6f}") | |
| print(f"Gemini Input: ${costs['gemini_input']:.6f} ({costs['tokens']['input']:,} tokens)") | |
| print(f"Gemini Output: ${costs['gemini_output']:.6f} ({costs['tokens']['output']:,} tokens)") | |
| print("-" * 60) | |
| print(f"TOTAL COST: ${costs['total']:.6f} ({costs['tokens']['total']:,} tokens)") | |
| print("="*60 + "\n") | |
| def save_usage(self): | |
| """Save usage log to file""" | |
| try: | |
| with open(self.log_file, 'w') as f: | |
| json.dump(self.usage_log, f, indent=2) | |
| except Exception as e: | |
| print(f"Warning: Could not save usage log: {e}") | |
| def load_usage(self): | |
| """Load usage log from file""" | |
| try: | |
| if self.log_file.exists(): | |
| with open(self.log_file, 'r') as f: | |
| self.usage_log = json.load(f) | |
| except Exception as e: | |
| print(f"Warning: Could not load usage log: {e}") | |
| self.usage_log = [] | |
| # Example usage | |
| if __name__ == "__main__": | |
| tracker = CostTracker() | |
| # Example: Calculate cost for a receipt | |
| costs = tracker.calculate_receipt_cost( | |
| input_text="Sample receipt text with 709 characters...", | |
| output_text='{"merchant_name": "Test", "items": [...]}' | |
| ) | |
| tracker.print_receipt_cost(costs) | |
| # Show totals | |
| totals = tracker.get_total_costs() | |
| print("\n📊 TOTAL USAGE STATISTICS") | |
| print("="*60) | |
| for key, value in totals.items(): | |
| print(f"{key.replace('_', ' ').title()}: {value}") | |
| print("="*60) | |