| import requests | |
| from bs4 import BeautifulSoup | |
| import tiktoken | |
| tokenizer = tiktoken.get_encoding('cl100k_base') | |
| def process_input(text): | |
| r = requests.get(text, verify=False) | |
| soup = BeautifulSoup(r.text, "html.parser") | |
| print(soup) | |
| list_text = str(soup).split('parts":["') | |
| #print(list_text) | |
| s = '' | |
| for item in list_text[1:int(len(list_text)/2)]: | |
| if list_text.index(item)%2 == 1: | |
| s = s + item.split('"]')[0] | |
| amout_token = tiktoken_len(s) | |
| return amout_token | |
| def tiktoken_len(text): | |
| tokens = tokenizer.encode( | |
| text, | |
| disallowed_special=() | |
| ) | |
| return len(tokens) | |
| answer = process_input('https://chatgpt.com/share/6737b9b5-56fc-8002-a212-35339f5b1d5a') | |
| print(answer) |