Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import tiktoken | |
| import PyPDF2 | |
| import os | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s') | |
| # Initialize encoding for the model | |
| encoding = tiktoken.get_encoding("cl100k_base") | |
| # Function to count tokens in a text | |
| def count_tokens(text): | |
| try: | |
| tokens = encoding.encode(text) | |
| return len(tokens) | |
| except Exception as e: | |
| logging.error(f"Tokenization error: {e}") | |
| st.error("Error counting tokens.") | |
| return 0 | |
| # Function to load CSV and count tokens | |
| def count_tokens_in_csv(file): | |
| try: | |
| # Load CSV file into a DataFrame | |
| df = pd.read_csv(file) | |
| st.write("Uploaded CSV Data:") | |
| st.dataframe(df) | |
| # Concatenate all text data from the DataFrame | |
| text = " ".join(df.astype(str).values.flatten()) | |
| # Calculate the number of tokens | |
| num_tokens = count_tokens(text) | |
| return num_tokens | |
| except pd.errors.EmptyDataError: | |
| logging.error("CSV file is empty.") | |
| st.error("The CSV file is empty or cannot be read.") | |
| except pd.errors.ParserError: | |
| logging.error("CSV file parsing error.") | |
| st.error("Error parsing the CSV file.") | |
| except Exception as e: | |
| logging.error(f"Error processing CSV file: {e}") | |
| st.error("An error occurred while processing the CSV file.") | |
| return 0 | |
| # Function to load TXT and count tokens | |
| def count_tokens_in_txt(file): | |
| try: | |
| text = file.read().decode('utf-8') | |
| num_tokens = count_tokens(text) | |
| return num_tokens | |
| except UnicodeDecodeError: | |
| logging.error("TXT file decoding error.") | |
| st.error("Error decoding the TXT file. Ensure it's in UTF-8 format.") | |
| except Exception as e: | |
| logging.error(f"Error processing TXT file: {e}") | |
| st.error("An error occurred while processing the TXT file.") | |
| return 0 | |
| # Function to load PDF and count tokens | |
| def count_tokens_in_pdf(file): | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in range(len(pdf_reader.pages)): | |
| text += pdf_reader.pages[page].extract_text() or "" | |
| num_tokens = count_tokens(text) | |
| return num_tokens | |
| except PyPDF2.errors.PdfReadError: | |
| logging.error("PDF file reading error.") | |
| st.error("Error reading the PDF file. Ensure it's not corrupted.") | |
| except Exception as e: | |
| logging.error(f"Error processing PDF file: {e}") | |
| st.error("An error occurred while processing the PDF file.") | |
| return 0 | |
| # Streamlit app | |
| def main(): | |
| st.title('Token Counter') | |
| st.write('This product belongs to: NAMA AI') | |
| # Text input | |
| text_input = st.text_area("Enter text to calculate tokens:") | |
| if text_input: | |
| token_count = count_tokens(text_input) | |
| if st.button("Count"): | |
| st.write(f'The input text contains {token_count} tokens.') | |
| # File uploader | |
| uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt", "pdf"]) | |
| if uploaded_file is not None: | |
| file_extension = os.path.splitext(uploaded_file.name)[1].lower() | |
| if file_extension == ".csv": | |
| token_count = count_tokens_in_csv(uploaded_file) | |
| st.write(f'The CSV file contains {token_count} tokens.') | |
| elif file_extension == ".txt": | |
| token_count = count_tokens_in_txt(uploaded_file) | |
| st.write(f'The TXT file contains {token_count} tokens.') | |
| elif file_extension == ".pdf": | |
| token_count = count_tokens_in_pdf(uploaded_file) | |
| st.write(f'The PDF file contains {token_count} tokens.') | |
| else: | |
| st.error("Unsupported file type.") | |
| if __name__ == "__main__": | |
| main() | |