felya97 commited on
Commit
8e2427e
·
verified ·
1 Parent(s): eca12d4

updated app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -55
app.py CHANGED
@@ -1,57 +1,4 @@
1
  import gradio as gr
2
- import torch
3
- from threading import Thread
4
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
5
 
6
- MODEL_ID = "TildeAI/TildeOpen-30b"
7
-
8
- # Tokenizer MUST be slow version per model card
9
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
10
-
11
- # Load model on GPU with BF16
12
- model = AutoModelForCausalLM.from_pretrained(
13
- MODEL_ID,
14
- torch_dtype=torch.bfloat16,
15
- device_map="auto",
16
- )
17
-
18
- SYSTEM = (
19
- "You are a helpful multilingual assistant. "
20
- "The model is base (not instruction-tuned), so follow the user's request precisely."
21
- )
22
-
23
- def format_history(history, user_msg):
24
- prompt = SYSTEM + "\n\n"
25
- for u, a in history:
26
- prompt += f"<|user|>\n{u}\n<|assistant|>\n{a}\n"
27
- prompt += f"<|user|>\n{user_msg}\n<|assistant|>\n"
28
- return prompt
29
-
30
- def chat_fn(message, history):
31
- prompt = format_history(history, message)
32
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
33
-
34
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
35
- gen_kwargs = dict(
36
- **inputs,
37
- max_new_tokens=512,
38
- do_sample=True,
39
- temperature=0.7,
40
- top_p=0.9,
41
- repetition_penalty=1.1,
42
- streamer=streamer,
43
- )
44
-
45
- thread = Thread(target=model.generate, kwargs=gen_kwargs)
46
- thread.start()
47
- partial = ""
48
- for new_text in streamer:
49
- partial += new_text
50
- yield partial
51
-
52
- demo = gr.ChatInterface(
53
- fn=chat_fn,
54
- title="TildeOpen-30B (Transformers, BF16)",
55
- description="Base model (not instruction-tuned). Multilingual; context length 8192.",
56
- )
57
- demo.queue().launch()
 
1
  import gradio as gr
 
 
 
2
 
3
+ def greet(name): return "Hello " + name + "!!"
4
+ gr.Interface(fn=greet, inputs="text", outputs="text", title="Hello test").launch()