File size: 950 Bytes
4fd8982
d86b6b2
a2ed616
d86b6b2
 
a2ed616
 
 
 
d86b6b2
 
f4b2eac
a2ed616
d8089fd
 
 
a2ed616
f4b2eac
d8089fd
d86b6b2
f4b2eac
d86b6b2
 
a2ed616
 
d86b6b2
a2ed616
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gradio as gr
import torch
import sys
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "https://huggingface.co/imsuprtwo2/NanoBit-300M"

print("Starting MASA Boot Sequence...")
sys.stdout.flush() # Forces the logs to actually show up

tokenizer = AutoTokenizer.from_pretrained(model_id)

# The "Low RAM" loader is mandatory for a 1.4GB file on a free Space
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float32,
    trust_remote_code=True
)

def chat(message, history):
    inputs = tokenizer(message, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=50)
    return tokenizer.decode(outputs[0], skip_special_tokens=True).replace(message, "").strip()

# We MUST bind to 0.0.0.0 and port 7860 for Hugging Face to see us
demo = gr.ChatInterface(chat)
demo.launch(server_name="0.0.0.0", server_port=7860)