FrostIce commited on
Commit
4d21fdb
·
verified ·
1 Parent(s): 98fad21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -29
app.py CHANGED
@@ -5,7 +5,6 @@ import re
5
  import numpy as np
6
  import json
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
8
- from transformers_stream_generator import patch_streaming
9
  import gradio as gr
10
 
11
  # Применяем патч для streaming
@@ -82,31 +81,16 @@ def find_tool_calls_buffer(buffer: str):
82
  return blocks, buffer
83
 
84
  # === Генерация ===
85
- def generate_stream(prompt, max_new_tokens=256, temperature=0.7, top_p=0.9):
86
- if isinstance(prompt, str):
87
- messages = [{"role": "user", "content": prompt}]
88
- else:
89
- messages = prompt
90
-
91
- try:
92
- inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
93
- inputs = inputs.to(model.device)
94
- except Exception as e:
95
- yield f"Ошибка: {e}"
96
- return
97
-
98
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
99
 
100
  def generate():
101
  with torch.no_grad():
102
  model.generate(
103
- inputs,
104
- max_new_tokens=int(max_new_tokens),
105
- temperature=float(temperature),
106
- top_p=float(top_p),
107
- do_sample=True,
108
- pad_token_id=tokenizer.pad_token_id,
109
- eos_token_id=tokenizer.eos_token_id,
110
  streamer=streamer,
111
  use_cache=True
112
  )
@@ -114,14 +98,8 @@ def generate_stream(prompt, max_new_tokens=256, temperature=0.7, top_p=0.9):
114
  thread = threading.Thread(target=generate)
115
  thread.start()
116
 
117
- buffer = ""
118
- for new_text in streamer:
119
- buffer += new_text
120
- blocks, _ = find_tool_calls_buffer(buffer)
121
- for block in blocks:
122
- result = execute_tool_calls([block["data"]])
123
- buffer = buffer.replace(block["block"], f"\n\n{result}\n\n")
124
- yield buffer
125
 
126
  # === Gradio ===
127
  with gr.Blocks() as demo:
 
5
  import numpy as np
6
  import json
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 
8
  import gradio as gr
9
 
10
  # Применяем патч для streaming
 
81
  return blocks, buffer
82
 
83
  # === Генерация ===
84
+ def generate_stream(prompt):
85
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
 
 
 
86
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
87
 
88
  def generate():
89
  with torch.no_grad():
90
  model.generate(
91
+ **inputs,
92
+ max_new_tokens=256,
93
+ temperature=0.7,
 
 
 
 
94
  streamer=streamer,
95
  use_cache=True
96
  )
 
98
  thread = threading.Thread(target=generate)
99
  thread.start()
100
 
101
+ for text in streamer:
102
+ yield text
 
 
 
 
 
 
103
 
104
  # === Gradio ===
105
  with gr.Blocks() as demo: