Datangtang commited on
Commit
4eb395f
·
verified ·
1 Parent(s): 519cb0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -1,17 +1,24 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
 
5
  model_id = "Datangtang/GGUF_New_1B"
6
 
7
- tokenizer = AutoTokenizer.from_pretrained(model_id)
8
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
 
 
 
 
9
 
10
  def chat_fn(message):
11
- inputs = tokenizer(message, return_tensors="pt")
12
- outputs = model.generate(**inputs, max_new_tokens=150)
13
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
14
 
15
- demo = gr.Interface(fn=chat_fn, inputs="text", outputs="text", title="My Finetuned LLM Chat")
 
 
 
 
 
16
 
17
  demo.launch()
 
1
  import gradio as gr
2
+ from ctransformers import AutoModelForCausalLM
 
3
 
4
  model_id = "Datangtang/GGUF_New_1B"
5
 
6
+ model = AutoModelForCausalLM.from_pretrained(
7
+ model_id,
8
+ model_file="your_model.gguf", # 改成你的 gguf 文件名
9
+ model_type="llama",
10
+ gpu_layers=0
11
+ )
12
 
13
  def chat_fn(message):
14
+ response = model(message, max_new_tokens=200)
15
+ return response
 
16
 
17
+ demo = gr.Interface(
18
+ fn=chat_fn,
19
+ inputs="text",
20
+ outputs="text",
21
+ title="My GGUF Model"
22
+ )
23
 
24
  demo.launch()