akhaliq HF Staff commited on
Commit
f96aa87
·
verified ·
1 Parent(s): 30e360b

Update Gradio app with multiple files

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -16,21 +16,9 @@ print("Model loaded successfully!")
16
 
17
 
18
  @spaces.GPU
19
- def generate_response(messages):
20
- """Generate response using the pipeline."""
21
- response = pipe(
22
- messages,
23
- max_new_tokens=4096,
24
- do_sample=True,
25
- temperature=0.6,
26
- top_p=0.95
27
- )
28
- return response[0]["generated_text"][-1]["content"]
29
-
30
-
31
  def respond(message, history):
32
  """
33
- Generate response for the chatbot.
34
 
35
  Args:
36
  message: The user's current message
@@ -45,10 +33,30 @@ def respond(message, history):
45
  # Add current message
46
  messages.append({"role": "user", "content": message})
47
 
48
- # Generate response
49
- response = generate_response(messages)
50
-
51
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
 
54
  # Create the Gradio interface
@@ -74,8 +82,10 @@ with gr.Blocks(
74
  title="",
75
  description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
76
  examples=[
 
77
  "Explain quantum computing in simple terms",
78
  "Write a short poem about artificial intelligence",
 
79
  ],
80
  cache_examples=False,
81
  )
 
16
 
17
 
18
  @spaces.GPU
 
 
 
 
 
 
 
 
 
 
 
 
19
  def respond(message, history):
20
  """
21
+ Generate streaming response for the chatbot.
22
 
23
  Args:
24
  message: The user's current message
 
33
  # Add current message
34
  messages.append({"role": "user", "content": message})
35
 
36
+ # Generate response with streaming
37
+ full_response = ""
38
+ for output in pipe(
39
+ messages,
40
+ max_new_tokens=4096,
41
+ do_sample=True,
42
+ temperature=0.6,
43
+ top_p=0.95,
44
+ return_full_text=False,
45
+ streamer=None
46
+ ):
47
+ # Get the generated text
48
+ generated_text = output[0]["generated_text"]
49
+
50
+ # Extract only the assistant's response
51
+ if isinstance(generated_text, list):
52
+ assistant_response = generated_text[-1]["content"]
53
+ else:
54
+ assistant_response = generated_text
55
+
56
+ # Stream character by character
57
+ for char in assistant_response[len(full_response):]:
58
+ full_response += char
59
+ yield full_response
60
 
61
 
62
  # Create the Gradio interface
 
82
  title="",
83
  description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
84
  examples=[
85
+ "What is the meaning of life?",
86
  "Explain quantum computing in simple terms",
87
  "Write a short poem about artificial intelligence",
88
+ "How can I improve my productivity?",
89
  ],
90
  cache_examples=False,
91
  )