akhaliq HF Staff commited on
Commit
47d913b
·
verified ·
1 Parent(s): c8eabc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -12
app.py CHANGED
@@ -25,19 +25,32 @@ def respond(message, history):
25
  # Add current message
26
  prompt += f"User: {message}\nAssistant: "
27
 
28
- # Generate response
29
- response = pipe(
30
- prompt,
31
- max_new_tokens=10000,
32
- temperature=0.7,
33
- do_sample=True,
34
- pad_token_id=pipe.tokenizer.eos_token_id,
35
- return_full_text=False,
36
- )
37
 
38
- # Extract and yield the generated text
39
- generated_text = response[0]["generated_text"]
40
- yield generated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  # Create the chat interface
43
  demo = gr.ChatInterface(
 
25
  # Add current message
26
  prompt += f"User: {message}\nAssistant: "
27
 
28
+ # Generate response with streaming
29
+ streamer = pipe.tokenizer.decode
 
 
 
 
 
 
 
30
 
31
+ # Generate tokens
32
+ inputs = pipe.tokenizer(prompt, return_tensors="pt").to(pipe.model.device)
33
+
34
+ with torch.no_grad():
35
+ outputs = pipe.model.generate(
36
+ **inputs,
37
+ max_new_tokens=10000,
38
+ temperature=0.7,
39
+ do_sample=True,
40
+ pad_token_id=pipe.tokenizer.eos_token_id,
41
+ return_full_text=False,
42
+ )
43
+
44
+ # Decode the generated tokens, skipping the input tokens
45
+ generated_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
46
+
47
+ # Stream the output token by token
48
+ response_text = ""
49
+ for i in range(len(generated_tokens)):
50
+ token = generated_tokens[i:i+1]
51
+ token_text = pipe.tokenizer.decode(token, skip_special_tokens=True)
52
+ response_text += token_text
53
+ yield response_text
54
 
55
  # Create the chat interface
56
  demo = gr.ChatInterface(