imsuprtwo2 commited on
Commit
d8089fd
·
verified ·
1 Parent(s): 5d6845b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -2,17 +2,27 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
- model_id = "imsuprtwo2/NanoBit-300M"
6
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_id)
8
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
 
 
 
 
 
 
9
 
10
- def predict(message, history):
11
  inputs = tokenizer(message, return_tensors="pt")
12
  with torch.no_grad():
13
- outputs = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.7)
14
 
15
- full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
16
- return full_text[len(message):].strip()
17
 
18
- gr.ChatInterface(predict).launch()
 
 
 
 
 
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ model_id = "imsuprtwo2/FastBit-450M-DeepCoder"
6
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_id,
10
+ weight_files=["nanorons.safetensors"],
11
+ dtype=torch.float32,
12
+ low_cpu_mem_usage=True,
13
+ trust_remote_code=True
14
+ )
15
 
16
+ def chat_response(message, history):
17
  inputs = tokenizer(message, return_tensors="pt")
18
  with torch.no_grad():
19
+ outputs = model.generate(**inputs, max_new_tokens=128, do_sample=True, temperature=0.8)
20
 
21
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
22
+ return response[len(message):].strip()
23
 
24
+ gr.ChatInterface(
25
+ chat_response,
26
+ title="FastBit-450M DeepCoder",
27
+ description="The official project coding assistant."
28
+ ).launch()