Spaces:

chefcode
/

chefcodeocr

Sleeping

App Files Files Community

chefcodeocr / cost_tracker.py

Mariem-Daha

Upload 22 files

8c33e8e verified 6 months ago

raw

history blame

4.46 kB

	"""
	Cost tracking utility for Invoice OCR system
	Tracks Document AI and Gemini API usage costs
	"""

	from typing import Dict, Any
	from datetime import datetime
	import json
	from pathlib import Path


	class CostTracker:
	"""Track and calculate costs for Document AI + Gemini usage"""

	# Pricing (as of 2025)
	# Document AI OCR: $1.50 per 1,000 pages = $0.0015 per page
	DOCUMENT_AI_PER_PAGE = 0.0015 # Document OCR: $1.50 per 1,000 pages
	# Gemini 2.0 Flash (includes text, images, and videos)
	GEMINI_INPUT_PER_TOKEN = 0.10 / 1_000_000 # $0.10 per 1M tokens (input)
	GEMINI_OUTPUT_PER_TOKEN = 0.40 / 1_000_000 # $0.40 per 1M tokens (output)

	def __init__(self):
	self.usage_log = []
	self.log_file = Path("usage_costs.json")
	self.load_usage()

	def estimate_tokens(self, text: str) -> int:
	"""Estimate token count (roughly 1 token per 4 characters)"""
	return len(text) // 4

	def calculate_invoice_cost(
	self,
	input_tokens: int = None,
	output_tokens: int = None,
	input_text: str = None,
	output_text: str = None,
	includes_image: bool = True
	) -> Dict[str, float]:
	"""
	Calculate cost for processing one invoice

	Args:
	input_tokens: Number of input tokens (if known)
	output_tokens: Number of output tokens (if known)
	input_text: Input text to estimate tokens from
	output_text: Output text to estimate tokens from
	includes_image: Whether image is sent to Gemini (adds ~258 tokens)

	Returns:
	Dictionary with cost breakdown
	"""
	# Estimate tokens if not provided
	if input_tokens is None and input_text:
	input_tokens = self.estimate_tokens(input_text)
	# Add image tokens if image is included
	if includes_image:
	input_tokens += 258 # Approximate tokens for image vision
	if output_tokens is None and output_text:
	output_tokens = self.estimate_tokens(json.dumps(output_text))

	# Default estimates if nothing provided (invoices typically larger)
	input_tokens = input_tokens or 2000 # Invoices have more text
	output_tokens = output_tokens or 800 # More line items

	# Calculate costs
	docai_cost = self.DOCUMENT_AI_PER_PAGE
	gemini_input_cost = input_tokens * self.GEMINI_INPUT_PER_TOKEN
	gemini_output_cost = output_tokens * self.GEMINI_OUTPUT_PER_TOKEN
	gemini_total = gemini_input_cost + gemini_output_cost
	total_cost = docai_cost + gemini_total

	return {
	"document_ai": docai_cost,
	"gemini_input": gemini_input_cost,
	"gemini_output": gemini_output_cost,
	"gemini_total": gemini_total,
	"total": total_cost,
	"tokens": {
	"input": input_tokens,
	"output": output_tokens,
	"total": input_tokens + output_tokens
	}
	}

	def print_invoice_cost(self, costs: Dict[str, float]):
	"""Pretty print cost for an invoice"""
	print("\n" + "="*70)
	print("💰 INVOICE PROCESSING COST")
	print("="*70)
	print(f"Document AI OCR: ${costs['document_ai']:.6f}")
	print(f"Gemini Input: ${costs['gemini_input']:.6f} ({costs['tokens']['input']:,} tokens)")
	print(f"Gemini Output: ${costs['gemini_output']:.6f} ({costs['tokens']['output']:,} tokens)")
	print("-" * 70)
	print(f"TOTAL COST: ${costs['total']:.6f} ({costs['tokens']['total']:,} tokens)")
	print("="*70 + "\n")

	def save_usage(self):
	"""Save usage log to file"""
	try:
	with open(self.log_file, 'w') as f:
	json.dump(self.usage_log, f, indent=2)
	except Exception as e:
	print(f"Warning: Could not save usage log: {e}")

	def load_usage(self):
	"""Load usage log from file"""
	try:
	if self.log_file.exists():
	with open(self.log_file, 'r') as f:
	self.usage_log = json.load(f)
	except Exception as e:
	print(f"Warning: Could not load usage log: {e}")
	self.usage_log = []