File size: 1,135 Bytes
5143a4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9174d8
5143a4d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
import base64
import hashlib
import sqlite3

# Create the SQLite database and table if they don't exist
conn = sqlite3.connect('tokens.db')
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS tokens
             (token TEXT)''')
conn.commit()
conn.close()

def tokenize_file(file):
    tokens = []

    # Base64 encode the file
    encoded_file = base64.b64encode(file.read()).decode('utf-8')

    # Split the encoded file into 40-character chunks
    chunks = [encoded_file[i:i+40] for i in range(0, len(encoded_file), 40)]

    # Hash each chunk using Keccak256 and store in the database
    for chunk in chunks:
        hashed_chunk = hashlib.sha3_256(chunk.encode()).hexdigest()
        tokens.append(hashed_chunk)
        conn = sqlite3.connect('tokens.db')
        c = conn.cursor()
        c.execute('INSERT INTO tokens (token) VALUES (?)', (hashed_chunk,))
        conn.commit()
        conn.close()

    return tokens

file_input = gr.inputs.File(label="Upload a file")
output_text = gr.outputs.Textbox(label="Tokens")

gr.Interface(fn=tokenize_file, inputs=file_input, outputs=output_text).launch()