Removing standard model
Browse files
app.py
CHANGED
|
@@ -17,12 +17,12 @@ freeze(quantized_model)
|
|
| 17 |
# Define the inference function
|
| 18 |
def generate_text(prompt):
|
| 19 |
# Measure time and generate text for the normal model
|
| 20 |
-
start_time_normal = time.time()
|
| 21 |
-
inputs = tokenizer(prompt, return_tensors='pt')
|
| 22 |
-
outputs_normal = model.generate(**inputs, max_length=100, num_return_sequences=1)
|
| 23 |
-
generated_text_normal = tokenizer.decode(outputs_normal[0], skip_special_tokens=True)
|
| 24 |
-
end_time_normal = time.time()
|
| 25 |
-
response_time_normal = end_time_normal - start_time_normal
|
| 26 |
|
| 27 |
# Measure time and generate text for the quantized model
|
| 28 |
start_time_quantized = time.time()
|
|
@@ -31,7 +31,7 @@ def generate_text(prompt):
|
|
| 31 |
end_time_quantized = time.time()
|
| 32 |
response_time_quantized = end_time_quantized - start_time_quantized
|
| 33 |
|
| 34 |
-
return (generated_text_normal, f"{response_time_normal:.2f} seconds",
|
| 35 |
generated_text_quantized, f"{response_time_quantized:.2f} seconds")
|
| 36 |
|
| 37 |
# Create a Gradio interface
|
|
@@ -39,8 +39,8 @@ iface = gr.Interface(
|
|
| 39 |
fn=generate_text,
|
| 40 |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
|
| 41 |
outputs=[
|
| 42 |
-
gr.Textbox(label="Generated Text (Normal Model)"),
|
| 43 |
-
gr.Textbox(label="Response Time (Normal Model)"),
|
| 44 |
gr.Textbox(label="Generated Text (Quantized Model)"),
|
| 45 |
gr.Textbox(label="Response Time (Quantized Model)")
|
| 46 |
],
|
|
|
|
| 17 |
# Define the inference function
|
| 18 |
def generate_text(prompt):
|
| 19 |
# Measure time and generate text for the normal model
|
| 20 |
+
# start_time_normal = time.time()
|
| 21 |
+
# inputs = tokenizer(prompt, return_tensors='pt')
|
| 22 |
+
# outputs_normal = model.generate(**inputs, max_length=100, num_return_sequences=1)
|
| 23 |
+
# generated_text_normal = tokenizer.decode(outputs_normal[0], skip_special_tokens=True)
|
| 24 |
+
# end_time_normal = time.time()
|
| 25 |
+
# response_time_normal = end_time_normal - start_time_normal
|
| 26 |
|
| 27 |
# Measure time and generate text for the quantized model
|
| 28 |
start_time_quantized = time.time()
|
|
|
|
| 31 |
end_time_quantized = time.time()
|
| 32 |
response_time_quantized = end_time_quantized - start_time_quantized
|
| 33 |
|
| 34 |
+
return (#generated_text_normal, f"{response_time_normal:.2f} seconds",
|
| 35 |
generated_text_quantized, f"{response_time_quantized:.2f} seconds")
|
| 36 |
|
| 37 |
# Create a Gradio interface
|
|
|
|
| 39 |
fn=generate_text,
|
| 40 |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
|
| 41 |
outputs=[
|
| 42 |
+
# gr.Textbox(label="Generated Text (Normal Model)"),
|
| 43 |
+
# gr.Textbox(label="Response Time (Normal Model)"),
|
| 44 |
gr.Textbox(label="Generated Text (Quantized Model)"),
|
| 45 |
gr.Textbox(label="Response Time (Quantized Model)")
|
| 46 |
],
|