kdevoe commited on
Commit
80d593f
·
1 Parent(s): 157f397

Trying streaming again

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
 
3
 
4
  # Load the model and tokenizer
5
  model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
@@ -10,11 +11,13 @@ model = AutoModelForCausalLM.from_pretrained(model_name)
10
  pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
11
 
12
  # Define the inference function with streaming
13
- def generate_text(prompt):
 
14
  output = ""
15
  for token in pipeline(prompt, max_length=100, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95):
16
  output += token['generated_text']
17
  yield output # Stream the output
 
18
 
19
  # Create a Gradio interface
20
  iface = gr.Interface(
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
3
+ import asyncio
4
 
5
  # Load the model and tokenizer
6
  model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
11
  pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
12
 
13
  # Define the inference function with streaming
14
+ async def generate_text(prompt):
15
+ loop = asyncio.get_event_loop()
16
  output = ""
17
  for token in pipeline(prompt, max_length=100, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95):
18
  output += token['generated_text']
19
  yield output # Stream the output
20
+ await loop.run_in_executor(None, lambda: None) # Yield control to the event loop
21
 
22
  # Create a Gradio interface
23
  iface = gr.Interface(