Spaces:

Aivis
/

Token_counter

Sleeping

App Files Files Community

Aivis commited on Nov 8, 2024

Commit

400e980

verified ·

1 Parent(s): d49c2cc

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -15

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import gradio as gr
 import pandas as pd
 import tiktoken
 import anthropic
-#import os
 def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
     # Check if file is uploaded
@@ -27,7 +27,10 @@ def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthr
             openai_encoding = tiktoken.get_encoding("cl100k_base")
         token_counts_openai = {}
-        total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False)))
         # Iterate over columns
         for col in df.columns:
@@ -43,7 +46,7 @@ def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthr
             #total_tokens_openai += tokens_openai
         # Prepare OpenAI output
-        output += f"**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n"
         output += f"\n**OpenAI Token Counts per Column ({openai_model}):**\n\n"
         for col, count in token_counts_openai.items():
             output += f"- {col}: {count} tokens\n"
@@ -57,24 +60,39 @@ def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthr
         # Initialize the Anthropic client
         #client = anthropic.Anthropic(api_key=anthropic_api_key)
-        #client = anthropic.Anthropic()
-        try:
-            client = anthropic.Anthropic()
-            print("Anthropic client initialized successfully")
-        except Exception as e:
-            return f"Error initializing Anthropic client: {e}"
         token_counts_anthropic = {}
         try:
-            total_tokens_anthropic = client.count_tokens(df.to_csv(index=False))
         except Exception as e:
-            return f"Error counting tokens with Anthropic model ({anthropic.__version__}): {e}"
         # Iterate over columns
         for col in df.columns:
             #tokens_col_anthropic = 0
             try:
-                tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values)))
             except Exception as e:
                 return f"Error counting tokens with Anthropic model: {e}"
             # for cell in df[col].astype(str):
@@ -87,7 +105,7 @@ def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthr
             #total_tokens_anthropic += tokens_anthropic
         # Prepare Anthropic output
-        output += f"**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
         output += f"\n**Anthropic Token Counts per Column ({anthropic_model}):**\n"
         for col, count in token_counts_anthropic.items():
             output += f"- {col}: {count} tokens\n"
@@ -102,6 +120,10 @@ def main():
     with gr.Blocks() as demo:
         gr.Markdown("# Token Counter")
         gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
         with gr.Row():
             file_input = gr.File(label="Upload CSV File", type="filepath")
@@ -117,7 +139,7 @@ def main():
                 visible=False
             )
             anthropic_model = gr.Dropdown(
-                choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest'],
                 label="Select Anthropic Model",
                 visible=False
             )
@@ -137,7 +159,8 @@ def main():
         inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
         submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
-    demo.launch(share=True)
 if __name__ == "__main__":
     main()

 import gradio as gr
+import json
 import pandas as pd
 import tiktoken
 import anthropic
 def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
     # Check if file is uploaded
             openai_encoding = tiktoken.get_encoding("cl100k_base")
         token_counts_openai = {}
+        try:
+            total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False)))
+        except Exception as e:
+            return f"Error counting tokens with OpenAI model: {e}"
         # Iterate over columns
         for col in df.columns:
             #total_tokens_openai += tokens_openai
         # Prepare OpenAI output
+        output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n"
         output += f"\n**OpenAI Token Counts per Column ({openai_model}):**\n\n"
         for col, count in token_counts_openai.items():
             output += f"- {col}: {count} tokens\n"
         # Initialize the Anthropic client
         #client = anthropic.Anthropic(api_key=anthropic_api_key)
+        client = anthropic.Anthropic()
         token_counts_anthropic = {}
+        #total_tokens_anthropic = client.count_tokens(df.to_csv(index=False))
         try:
+            response = client.beta.messages.count_tokens(
+                betas=["token-counting-2024-11-01"],
+                model=anthropic_model, #"claude-3-5-sonnet-20241022",
+                #system="You are a scientist",
+                messages=[{
+                    "role": "user",
+                    "content": df.to_csv(index=False)
+                    }],
+                    )
+            total_tokens_anthropic = json.loads(response.json())['input_tokens']
         except Exception as e:
+            return f"Error counting tokens with Anthropic model: {e}"
         # Iterate over columns
         for col in df.columns:
             #tokens_col_anthropic = 0
             try:
+                #tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values))) #0.37.1 version
+                response = client.beta.messages.count_tokens(
+                    betas=["token-counting-2024-11-01"],
+                    model=anthropic_model,
+                    messages=[{
+                        "role": "user",
+                        "content": '\n'.join([col]+list(df[col].astype(str).values))
+                        }],
+                        )
+                tokens_anthropic = json.loads(response.json())['input_tokens']
             except Exception as e:
                 return f"Error counting tokens with Anthropic model: {e}"
             # for cell in df[col].astype(str):
             #total_tokens_anthropic += tokens_anthropic
         # Prepare Anthropic output
+        output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
         output += f"\n**Anthropic Token Counts per Column ({anthropic_model}):**\n"
         for col, count in token_counts_anthropic.items():
             output += f"- {col}: {count} tokens\n"
     with gr.Blocks() as demo:
         gr.Markdown("# Token Counter")
         gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
+        gr.Markdown("""
+                    For OpenAI models Python package `tiktoken` is used.
+                    For Anthropic models beta version of [Token counting](https://docs.anthropic.com/en/docs/build-with-claude/token-counting) is used.
+                    """)
         with gr.Row():
             file_input = gr.File(label="Upload CSV File", type="filepath")
                 visible=False
             )
             anthropic_model = gr.Dropdown(
+                choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest', 'claude-3-haiku-20240307'],
                 label="Select Anthropic Model",
                 visible=False
             )
         inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
         submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
+    #demo.launch(share=True)
+    demo.launch()
 if __name__ == "__main__":
     main()