tokenCounter / app.py
namaai's picture
Update app.py
a3a2a4b verified
import streamlit as st
import pandas as pd
import tiktoken
import PyPDF2
import os
import logging
# Configure logging
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize encoding for the model
encoding = tiktoken.get_encoding("cl100k_base")
# Function to count tokens in a text
def count_tokens(text):
try:
tokens = encoding.encode(text)
return len(tokens)
except Exception as e:
logging.error(f"Tokenization error: {e}")
st.error("Error counting tokens.")
return 0
# Function to load CSV and count tokens
def count_tokens_in_csv(file):
try:
# Load CSV file into a DataFrame
df = pd.read_csv(file)
st.write("Uploaded CSV Data:")
st.dataframe(df)
# Concatenate all text data from the DataFrame
text = " ".join(df.astype(str).values.flatten())
# Calculate the number of tokens
num_tokens = count_tokens(text)
return num_tokens
except pd.errors.EmptyDataError:
logging.error("CSV file is empty.")
st.error("The CSV file is empty or cannot be read.")
except pd.errors.ParserError:
logging.error("CSV file parsing error.")
st.error("Error parsing the CSV file.")
except Exception as e:
logging.error(f"Error processing CSV file: {e}")
st.error("An error occurred while processing the CSV file.")
return 0
# Function to load TXT and count tokens
def count_tokens_in_txt(file):
try:
text = file.read().decode('utf-8')
num_tokens = count_tokens(text)
return num_tokens
except UnicodeDecodeError:
logging.error("TXT file decoding error.")
st.error("Error decoding the TXT file. Ensure it's in UTF-8 format.")
except Exception as e:
logging.error(f"Error processing TXT file: {e}")
st.error("An error occurred while processing the TXT file.")
return 0
# Function to load PDF and count tokens
def count_tokens_in_pdf(file):
try:
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in range(len(pdf_reader.pages)):
text += pdf_reader.pages[page].extract_text() or ""
num_tokens = count_tokens(text)
return num_tokens
except PyPDF2.errors.PdfReadError:
logging.error("PDF file reading error.")
st.error("Error reading the PDF file. Ensure it's not corrupted.")
except Exception as e:
logging.error(f"Error processing PDF file: {e}")
st.error("An error occurred while processing the PDF file.")
return 0
# Streamlit app
def main():
st.title('Token Counter')
st.write('This product belongs to: NAMA AI')
# Text input
text_input = st.text_area("Enter text to calculate tokens:")
if text_input:
token_count = count_tokens(text_input)
if st.button("Count"):
st.write(f'The input text contains {token_count} tokens.')
# File uploader
uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt", "pdf"])
if uploaded_file is not None:
file_extension = os.path.splitext(uploaded_file.name)[1].lower()
if file_extension == ".csv":
token_count = count_tokens_in_csv(uploaded_file)
st.write(f'The CSV file contains {token_count} tokens.')
elif file_extension == ".txt":
token_count = count_tokens_in_txt(uploaded_file)
st.write(f'The TXT file contains {token_count} tokens.')
elif file_extension == ".pdf":
token_count = count_tokens_in_pdf(uploaded_file)
st.write(f'The PDF file contains {token_count} tokens.')
else:
st.error("Unsupported file type.")
if __name__ == "__main__":
main()