jonathanjordan21 commited on
Commit
b232ee4
·
verified ·
1 Parent(s): fd2ed4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -6,13 +6,15 @@ from huggingface_hub import InferenceClient
6
  # """
7
  # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
- from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM#, MambaForCausalLM
10
  from peft import PeftConfig, PeftModel
11
 
12
  config = PeftConfig.from_pretrained("jonathanjordan21/mos-mamba-6x130m-trainer")
13
 
14
  tokenizer = AutoTokenizer.from_pretrained("jonathanjordan21/mos-mamba-6x130m-trainer", trust_remote_code=True)
15
 
 
 
16
  model = AutoModelForCausalLM.from_pretrained(
17
  "jonathanjordan21/mos-mamba-6x130m-trainer",
18
  eos_token_id=tokenizer.eos_token_id,
@@ -25,6 +27,7 @@ model = model.merge_and_unload()
25
  print(model.config.eos_token_id)
26
 
27
 
 
28
  def invoke(
29
  message,
30
  history: list[tuple[str, str]],
@@ -45,11 +48,21 @@ def invoke(
45
 
46
  tokens = tokenizer.apply_chat_template(messages, return_tensors='pt', add_generation_prompt=True)
47
 
48
- out = model.generate(tokens, eos_token_id=model.config.eos_token_id, max_new_tokens=max_tokens, repetition_penalty=1.05, temperature=temperature, top_p=top_p)
 
 
 
 
 
 
 
 
 
 
49
 
50
- res = tokenizer.batch_decode(out)
51
 
52
- yield res
53
 
54
 
55
  def respond(
 
6
  # """
7
  # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
+ from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, TextStreamer
10
  from peft import PeftConfig, PeftModel
11
 
12
  config = PeftConfig.from_pretrained("jonathanjordan21/mos-mamba-6x130m-trainer")
13
 
14
  tokenizer = AutoTokenizer.from_pretrained("jonathanjordan21/mos-mamba-6x130m-trainer", trust_remote_code=True)
15
 
16
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
17
+
18
  model = AutoModelForCausalLM.from_pretrained(
19
  "jonathanjordan21/mos-mamba-6x130m-trainer",
20
  eos_token_id=tokenizer.eos_token_id,
 
27
  print(model.config.eos_token_id)
28
 
29
 
30
+
31
  def invoke(
32
  message,
33
  history: list[tuple[str, str]],
 
48
 
49
  tokens = tokenizer.apply_chat_template(messages, return_tensors='pt', add_generation_prompt=True)
50
 
51
+ response =
52
+
53
+ for res in model.generate(
54
+ tokens,
55
+ streamer=streamer,
56
+ eos_token_id=model.config.eos_token_id,
57
+ max_new_tokens=max_tokens,
58
+ temperature=temperature
59
+ ):
60
+ response += res
61
+ yield response
62
 
63
+ # res = tokenizer.batch_decode(out)
64
 
65
+ # yield res
66
 
67
 
68
  def respond(