Spaces:
Configuration error
Configuration error
Commit ·
2b127d5
1
Parent(s): e48753b
Upload 4 files
Browse files- README.md +35 -13
- app.py +35 -0
- requirements.txt +5 -0
- token_counter.log +1 -0
README.md
CHANGED
|
@@ -1,13 +1,35 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Token Counter
|
| 2 |
+
|
| 3 |
+

|
| 4 |
+

|
| 5 |
+

|
| 6 |
+

|
| 7 |
+

|
| 8 |
+
|
| 9 |
+
Token Counter is a simple Python script that counts the number of tokens in a Markdown file. It's useful for analyzing and processing text data in natural language processing tasks.
|
| 10 |
+
|
| 11 |
+
## Installation
|
| 12 |
+
|
| 13 |
+
To use Token Counter, simply clone the repository:
|
| 14 |
+
|
| 15 |
+
```bash
|
| 16 |
+
git clone https://github.com/LightningRalf/token_counter.git
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
## Usage
|
| 20 |
+
|
| 21 |
+
To count the tokens in a Markdown file, run the `token_counter.py` script with the file path as an argument:
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
python token_counter.py path/to/your/markdown_file.md
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
The script will print the token count and also log the results in a log file.
|
| 28 |
+
|
| 29 |
+
## Contributing
|
| 30 |
+
|
| 31 |
+
We welcome contributions to improve Token Counter! Please feel free to open an issue or submit a pull request if you have any suggestions or improvements.
|
| 32 |
+
|
| 33 |
+
## License
|
| 34 |
+
|
| 35 |
+
This project is licensed under the CC0-1.0 License - see the [LICENSE](LICENSE) file for details.
|
app.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from transformers import AutoTokenizer
|
| 3 |
+
import requests
|
| 4 |
+
import datetime
|
| 5 |
+
from dateutil.relativedelta import relativedelta
|
| 6 |
+
|
| 7 |
+
# Count tokens in a text string using a specified language model.
|
| 8 |
+
def count_tokens_text(text, model_name='gpt4'):
|
| 9 |
+
# (same as before)
|
| 10 |
+
|
| 11 |
+
# Fetch the most popular models from the last month
|
| 12 |
+
def get_popular_models():
|
| 13 |
+
one_month_ago = (datetime.datetime.now() - relativedelta(months=1)).strftime("%Y-%m-%d")
|
| 14 |
+
api_url = f"https://huggingface.co/api/models?sort=downloads&direction=desc&start_date={one_month_ago}"
|
| 15 |
+
response = requests.get(api_url)
|
| 16 |
+
data = response.json()
|
| 17 |
+
popular_models = [model["modelId"] for model in data["results"]]
|
| 18 |
+
return popular_models
|
| 19 |
+
|
| 20 |
+
# Streamlit app
|
| 21 |
+
st.title("Token Counter")
|
| 22 |
+
text = st.text_area("Text:", value="", height=200)
|
| 23 |
+
|
| 24 |
+
popular_models = get_popular_models()
|
| 25 |
+
model_name = st.selectbox("Model:", options=popular_models, index=0)
|
| 26 |
+
manual_entry = st.text_input("Or enter a model manually:", value="")
|
| 27 |
+
if manual_entry:
|
| 28 |
+
model_name = manual_entry
|
| 29 |
+
|
| 30 |
+
if st.button("Count Tokens"):
|
| 31 |
+
token_count, error = count_tokens_text(text, model_name)
|
| 32 |
+
if token_count is not None:
|
| 33 |
+
st.success(f"Token count: {token_count}")
|
| 34 |
+
elif error is not None:
|
| 35 |
+
st.error(f"Error: {error}")
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
transformers
|
| 3 |
+
requests
|
| 4 |
+
datetime
|
| 5 |
+
dateutil.relativedelta
|
token_counter.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
2023-04-29 10:16:15,841 - INFO - Token count for C:\Users\mjpa\Documents\Obsidian\20-29_Projekte\21_jPAw\21.96_MultiAgentSystem\OBJECTIVE-MAS-GITHUB-basedOnAgentLLM.md: 227
|