Spaces:
Running
on
Zero
Running
on
Zero
v1
Browse files
app.py
CHANGED
|
@@ -32,7 +32,7 @@ freeze_model(meteor)
|
|
| 32 |
# previous length
|
| 33 |
previous_length = 0
|
| 34 |
|
| 35 |
-
def threading_function(inputs, image_token_number, streamer, device):
|
| 36 |
|
| 37 |
# Meteor Mamba
|
| 38 |
mmamba_inputs = mmamba.eval_process(inputs=inputs, tokenizer=tok_meteor, device=device, img_token_number=image_token_number)
|
|
@@ -50,14 +50,14 @@ def threading_function(inputs, image_token_number, streamer, device):
|
|
| 50 |
generation_kwargs = meteor_inputs
|
| 51 |
generation_kwargs.update({'streamer': streamer})
|
| 52 |
generation_kwargs.update({'do_sample': True})
|
| 53 |
-
generation_kwargs.update({'max_new_tokens':
|
| 54 |
-
generation_kwargs.update({'top_p':
|
| 55 |
-
generation_kwargs.update({'temperature':
|
| 56 |
generation_kwargs.update({'use_cache': True})
|
| 57 |
return meteor.generate(**generation_kwargs)
|
| 58 |
|
| 59 |
@spaces.GPU
|
| 60 |
-
def bot_streaming(message, history):
|
| 61 |
|
| 62 |
# param
|
| 63 |
for param in mmamba.parameters():
|
|
@@ -80,7 +80,13 @@ def bot_streaming(message, history):
|
|
| 80 |
streamer = TextIteratorStreamer(tok_meteor, skip_special_tokens=True)
|
| 81 |
|
| 82 |
# Threading generation
|
| 83 |
-
thread = Thread(target=threading_function, kwargs=dict(inputs=inputs,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
thread.start()
|
| 85 |
|
| 86 |
# generated text
|
|
@@ -98,7 +104,9 @@ def bot_streaming(message, history):
|
|
| 98 |
time.sleep(0.02)
|
| 99 |
yield buffer
|
| 100 |
|
| 101 |
-
demo = gr.ChatInterface(fn=bot_streaming,
|
|
|
|
|
|
|
| 102 |
description="Meteor is efficient 7B size Large Language and Vision Model built on the help of traversal of rationale",
|
| 103 |
stop_btn="Stop Generation", multimodal=True)
|
| 104 |
demo.launch()
|
|
|
|
| 32 |
# previous length
|
| 33 |
previous_length = 0
|
| 34 |
|
| 35 |
+
def threading_function(inputs, image_token_number, streamer, device, temperature, new_max_token, top_p):
|
| 36 |
|
| 37 |
# Meteor Mamba
|
| 38 |
mmamba_inputs = mmamba.eval_process(inputs=inputs, tokenizer=tok_meteor, device=device, img_token_number=image_token_number)
|
|
|
|
| 50 |
generation_kwargs = meteor_inputs
|
| 51 |
generation_kwargs.update({'streamer': streamer})
|
| 52 |
generation_kwargs.update({'do_sample': True})
|
| 53 |
+
generation_kwargs.update({'max_new_tokens': new_max_token})
|
| 54 |
+
generation_kwargs.update({'top_p': top_p})
|
| 55 |
+
generation_kwargs.update({'temperature': temperature})
|
| 56 |
generation_kwargs.update({'use_cache': True})
|
| 57 |
return meteor.generate(**generation_kwargs)
|
| 58 |
|
| 59 |
@spaces.GPU
|
| 60 |
+
def bot_streaming(message, history, temperature, new_max_token, top_p):
|
| 61 |
|
| 62 |
# param
|
| 63 |
for param in mmamba.parameters():
|
|
|
|
| 80 |
streamer = TextIteratorStreamer(tok_meteor, skip_special_tokens=True)
|
| 81 |
|
| 82 |
# Threading generation
|
| 83 |
+
thread = Thread(target=threading_function, kwargs=dict(inputs=inputs,
|
| 84 |
+
image_token_number=image_token_number,
|
| 85 |
+
streamer=streamer,
|
| 86 |
+
device=accel.device,
|
| 87 |
+
temperature=temperature,
|
| 88 |
+
new_max_token=new_max_token,
|
| 89 |
+
top_p=top_p))
|
| 90 |
thread.start()
|
| 91 |
|
| 92 |
# generated text
|
|
|
|
| 104 |
time.sleep(0.02)
|
| 105 |
yield buffer
|
| 106 |
|
| 107 |
+
demo = gr.ChatInterface(fn=bot_streaming,
|
| 108 |
+
additional_inputs = [gr.Slider(0, 1, 0.9, label="temperature"), gr.Slider(1, 2048, 128, label="new_max_token"), gr.Slider(0, 1, 0.95, label="top_p")],
|
| 109 |
+
title="☄️ Meteor",
|
| 110 |
description="Meteor is efficient 7B size Large Language and Vision Model built on the help of traversal of rationale",
|
| 111 |
stop_btn="Stop Generation", multimodal=True)
|
| 112 |
demo.launch()
|