Spaces:

luminoussg
/

token_counter

Sleeping

luminoussg commited on Oct 16, 2024

Commit

0cc0797

verified ·

1 Parent(s): ee3031b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,13 +6,9 @@ import json
 def count_tokens(json_file, encoding_name):
     encoding = tiktoken.get_encoding(encoding_name)
-    # Validate that the file is a .jsonl file
-    if not json_file.name.endswith('.jsonl'):
-        return {"error": "Please upload a valid .jsonl file."}, 0
-    # Load the JSONL data
     with open(json_file.name, 'r') as f:
-        data = [json.loads(line) for line in f.readlines()]
     total_token_count = 0
     token_counts = []
@@ -56,10 +52,10 @@ encoding_options = [
 # Gradio UI setup
 with gr.Blocks() as app:
-    gr.Markdown("# Token Counter for JSONL Datasets (OpenAI Fine-Tuning)")
     with gr.Row():
-        json_input = gr.File(label="Upload .jsonl File", type="file")  # Accept only file uploads
         encoding_dropdown = gr.Dropdown(choices=encoding_options, label="Select Encoding", value="o200k_base (gpt-4o, gpt-4o-mini)")
     # Output for individual conversation token counts

 def count_tokens(json_file, encoding_name):
     encoding = tiktoken.get_encoding(encoding_name)
+    # Load the JSON or JSONL data
     with open(json_file.name, 'r') as f:
+        data = json.load(f) if json_file.name.endswith('.json') else [json.loads(line) for line in f.readlines()]
     total_token_count = 0
     token_counts = []
 # Gradio UI setup
 with gr.Blocks() as app:
+    gr.Markdown("# Token Counter for JSON/JSONL Datasets")
     with gr.Row():
+        json_input = gr.File(label="Upload JSON/JSONL File")
         encoding_dropdown = gr.Dropdown(choices=encoding_options, label="Select Encoding", value="o200k_base (gpt-4o, gpt-4o-mini)")
     # Output for individual conversation token counts