Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,13 +6,9 @@ import json
|
|
| 6 |
def count_tokens(json_file, encoding_name):
|
| 7 |
encoding = tiktoken.get_encoding(encoding_name)
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
if not json_file.name.endswith('.jsonl'):
|
| 11 |
-
return {"error": "Please upload a valid .jsonl file."}, 0
|
| 12 |
-
|
| 13 |
-
# Load the JSONL data
|
| 14 |
with open(json_file.name, 'r') as f:
|
| 15 |
-
data = [json.loads(line) for line in f.readlines()]
|
| 16 |
|
| 17 |
total_token_count = 0
|
| 18 |
token_counts = []
|
|
@@ -56,10 +52,10 @@ encoding_options = [
|
|
| 56 |
|
| 57 |
# Gradio UI setup
|
| 58 |
with gr.Blocks() as app:
|
| 59 |
-
gr.Markdown("# Token Counter for JSONL Datasets
|
| 60 |
|
| 61 |
with gr.Row():
|
| 62 |
-
json_input = gr.File(label="Upload
|
| 63 |
encoding_dropdown = gr.Dropdown(choices=encoding_options, label="Select Encoding", value="o200k_base (gpt-4o, gpt-4o-mini)")
|
| 64 |
|
| 65 |
# Output for individual conversation token counts
|
|
|
|
| 6 |
def count_tokens(json_file, encoding_name):
|
| 7 |
encoding = tiktoken.get_encoding(encoding_name)
|
| 8 |
|
| 9 |
+
# Load the JSON or JSONL data
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
with open(json_file.name, 'r') as f:
|
| 11 |
+
data = json.load(f) if json_file.name.endswith('.json') else [json.loads(line) for line in f.readlines()]
|
| 12 |
|
| 13 |
total_token_count = 0
|
| 14 |
token_counts = []
|
|
|
|
| 52 |
|
| 53 |
# Gradio UI setup
|
| 54 |
with gr.Blocks() as app:
|
| 55 |
+
gr.Markdown("# Token Counter for JSON/JSONL Datasets")
|
| 56 |
|
| 57 |
with gr.Row():
|
| 58 |
+
json_input = gr.File(label="Upload JSON/JSONL File")
|
| 59 |
encoding_dropdown = gr.Dropdown(choices=encoding_options, label="Select Encoding", value="o200k_base (gpt-4o, gpt-4o-mini)")
|
| 60 |
|
| 61 |
# Output for individual conversation token counts
|