Spaces:

Aivis
/

Token_counter

Sleeping

App Files Files Community

Token_counter / app.py

Aivis

Update app.py

400e980 verified over 1 year ago

raw

history blame contribute delete

6.94 kB

	import gradio as gr
	import json
	import pandas as pd
	import tiktoken
	import anthropic

	def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
	# Check if file is uploaded
	if file is None:
	return "Please upload a CSV file."

	# Read the CSV file
	try:
	df = pd.read_csv(file)#.name)
	except Exception as e:
	return f"Error reading CSV file: {e}"

	# Initialize output string
	output = ""

	if calculate_openai:
	# Get the OpenAI tokenizer for the selected model
	try:
	openai_encoding = tiktoken.encoding_for_model(openai_model)
	except KeyError:
	# Default encoding if model is not found
	openai_encoding = tiktoken.get_encoding("cl100k_base")

	token_counts_openai = {}
	try:
	total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False)))
	except Exception as e:
	return f"Error counting tokens with OpenAI model: {e}"

	# Iterate over columns
	for col in df.columns:
	#tokens_col_openai = 0
	try:
	tokens_openai = openai_encoding.encode('\n'.join([col]+list(df[col].astype(str).values)))
	except Exception as e:
	return f"Error counting tokens with OpenAI model: {e}"
	# for cell in df[col].astype(str):
	# tokens_openai = openai_encoding.encode(cell)
	# tokens_col_openai += len(tokens_openai)
	token_counts_openai[col] = len(tokens_openai)
	#total_tokens_openai += tokens_openai

	# Prepare OpenAI output
	output += f"\nTotal OpenAI Tokens ({openai_model}): {total_tokens_openai}\n"
	output += f"\nOpenAI Token Counts per Column ({openai_model}):\n\n"
	for col, count in token_counts_openai.items():
	output += f"- {col}: {count} tokens\n"


	if calculate_anthropic:
	# Get the Anthropic API key from environment variables
	#anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
	#if not anthropic_api_key:
	# return "Please set the ANTHROPIC_API_KEY environment variable."

	# Initialize the Anthropic client
	#client = anthropic.Anthropic(api_key=anthropic_api_key)
	client = anthropic.Anthropic()

	token_counts_anthropic = {}
	#total_tokens_anthropic = client.count_tokens(df.to_csv(index=False))
	try:
	response = client.beta.messages.count_tokens(
	betas=["token-counting-2024-11-01"],
	model=anthropic_model, #"claude-3-5-sonnet-20241022",
	#system="You are a scientist",
	messages=[{
	"role": "user",
	"content": df.to_csv(index=False)
	}],
	)
	total_tokens_anthropic = json.loads(response.json())['input_tokens']
	except Exception as e:
	return f"Error counting tokens with Anthropic model: {e}"


	# Iterate over columns
	for col in df.columns:
	#tokens_col_anthropic = 0
	try:
	#tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values))) #0.37.1 version
	response = client.beta.messages.count_tokens(
	betas=["token-counting-2024-11-01"],
	model=anthropic_model,
	messages=[{
	"role": "user",
	"content": '\n'.join([col]+list(df[col].astype(str).values))
	}],
	)
	tokens_anthropic = json.loads(response.json())['input_tokens']
	except Exception as e:
	return f"Error counting tokens with Anthropic model: {e}"
	# for cell in df[col].astype(str):
	# try:
	# tokens_anthropic = client.count_tokens(cell)
	# except Exception as e:
	# return f"Error counting tokens with Anthropic model: {e}"
	# tokens_col_anthropic += tokens_anthropic
	token_counts_anthropic[col] = tokens_anthropic
	#total_tokens_anthropic += tokens_anthropic

	# Prepare Anthropic output
	output += f"\nTotal Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}\n"
	output += f"\nAnthropic Token Counts per Column ({anthropic_model}):\n"
	for col, count in token_counts_anthropic.items():
	output += f"- {col}: {count} tokens\n"


	if not calculate_openai and not calculate_anthropic:
	output = "Please select at least one model to calculate tokens."

	return output

	def main():
	with gr.Blocks() as demo:
	gr.Markdown("# Token Counter")
	gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
	gr.Markdown("""
	For OpenAI models Python package `tiktoken` is used.
	For Anthropic models beta version of [Token counting](https://docs.anthropic.com/en/docs/build-with-claude/token-counting) is used.
	""")

	with gr.Row():
	file_input = gr.File(label="Upload CSV File", type="filepath")

	with gr.Row():
	calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models")
	calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models")

	with gr.Row():
	openai_model = gr.Dropdown(
	choices=['gpt-4o', 'gpt-4o-mini', 'gpt-4'],
	label="Select OpenAI Model",
	visible=False
	)
	anthropic_model = gr.Dropdown(
	choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest', 'claude-3-haiku-20240307'],
	label="Select Anthropic Model",
	visible=False
	)

	def update_openai_visibility(selected):
	return gr.update(visible=selected)

	def update_anthropic_visibility(selected):
	return gr.update(visible=selected)

	calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model)
	calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model)

	submit_button = gr.Button("Calculate Tokens")
	output = gr.Markdown()

	inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
	submit_button.click(fn=process_csv, inputs=inputs, outputs=output)

	#demo.launch(share=True)
	demo.launch()

	if __name__ == "__main__":
	main()