Spaces:
Running on Zero
Running on Zero
Address feedback
Browse filesSigned-off-by: SimJeg <sjegou@nvidia.com>
app.py
CHANGED
|
@@ -30,13 +30,13 @@ def process_request(url, question, press_name, compression_ratio):
|
|
| 30 |
""" """
|
| 31 |
|
| 32 |
if press_name not in press_dict:
|
| 33 |
-
return f"Invalid press type selected: {press_name}", -1
|
| 34 |
|
| 35 |
# Fetch the Wikipedia article
|
| 36 |
try:
|
| 37 |
content = requests.get(url).content
|
| 38 |
except requests.exceptions.RequestException as e:
|
| 39 |
-
return f"Error fetching the Wikipedia article: {str(e)}", -1
|
| 40 |
|
| 41 |
try:
|
| 42 |
# Parse the Wikipedia HTML
|
|
@@ -48,12 +48,12 @@ def process_request(url, question, press_name, compression_ratio):
|
|
| 48 |
num_tokens = pipe.tokenizer(context, return_tensors="pt")["input_ids"].shape[1]
|
| 49 |
pred_answer = pipe(context, question=question, press=press)["answer"]
|
| 50 |
|
| 51 |
-
return pred_answer, num_tokens
|
| 52 |
except Exception as e:
|
| 53 |
if "CUDA out of memory" in str(e):
|
| 54 |
return "Error: CUDA out of memory. Try using a smaller article or a lower compression ratio.", -1
|
| 55 |
else:
|
| 56 |
-
return str(e), -1
|
| 57 |
|
| 58 |
|
| 59 |
def gradio_interface():
|
|
@@ -82,10 +82,11 @@ def gradio_interface():
|
|
| 82 |
value="ExpectedAttentionPress",
|
| 83 |
label="Select Press Type",
|
| 84 |
)
|
| 85 |
-
compression_slider = gr.Slider(minimum=0.
|
| 86 |
|
| 87 |
output = gr.Textbox(label="Output", lines=10)
|
| 88 |
-
output_num_tokens = gr.Number(label="Number of
|
|
|
|
| 89 |
|
| 90 |
submit_button = gr.Button("Submit")
|
| 91 |
|
|
@@ -97,6 +98,12 @@ def gradio_interface():
|
|
| 97 |
"ExpectedAttentionPress",
|
| 98 |
0.5,
|
| 99 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
],
|
| 101 |
inputs=[url_input, question_input, press_selector, compression_slider],
|
| 102 |
)
|
|
@@ -104,7 +111,7 @@ def gradio_interface():
|
|
| 104 |
submit_button.click(
|
| 105 |
process_request,
|
| 106 |
inputs=[url_input, question_input, press_selector, compression_slider],
|
| 107 |
-
outputs=[output, output_num_tokens],
|
| 108 |
)
|
| 109 |
|
| 110 |
return demo
|
|
|
|
| 30 |
""" """
|
| 31 |
|
| 32 |
if press_name not in press_dict:
|
| 33 |
+
return f"Invalid press type selected: {press_name}", -1, -1
|
| 34 |
|
| 35 |
# Fetch the Wikipedia article
|
| 36 |
try:
|
| 37 |
content = requests.get(url).content
|
| 38 |
except requests.exceptions.RequestException as e:
|
| 39 |
+
return f"Error fetching the Wikipedia article: {str(e)}", -1, -1
|
| 40 |
|
| 41 |
try:
|
| 42 |
# Parse the Wikipedia HTML
|
|
|
|
| 48 |
num_tokens = pipe.tokenizer(context, return_tensors="pt")["input_ids"].shape[1]
|
| 49 |
pred_answer = pipe(context, question=question, press=press)["answer"]
|
| 50 |
|
| 51 |
+
return pred_answer, num_tokens, int(num_tokens * (1 - compression_ratio))
|
| 52 |
except Exception as e:
|
| 53 |
if "CUDA out of memory" in str(e):
|
| 54 |
return "Error: CUDA out of memory. Try using a smaller article or a lower compression ratio.", -1
|
| 55 |
else:
|
| 56 |
+
return str(e), -1, -1
|
| 57 |
|
| 58 |
|
| 59 |
def gradio_interface():
|
|
|
|
| 82 |
value="ExpectedAttentionPress",
|
| 83 |
label="Select Press Type",
|
| 84 |
)
|
| 85 |
+
compression_slider = gr.Slider(minimum=0.0, maximum=0.9, step=0.1, value=0.5, label="Compression Ratio")
|
| 86 |
|
| 87 |
output = gr.Textbox(label="Output", lines=10)
|
| 88 |
+
output_num_tokens = gr.Number(label="Number of tokens before compression", interactive=False)
|
| 89 |
+
output_compressed_num_tokens = gr.Number(label="Number of tokens after compression", interactive=False)
|
| 90 |
|
| 91 |
submit_button = gr.Button("Submit")
|
| 92 |
|
|
|
|
| 98 |
"ExpectedAttentionPress",
|
| 99 |
0.5,
|
| 100 |
],
|
| 101 |
+
[
|
| 102 |
+
"https://en.wikipedia.org/wiki/Hugging_Face",
|
| 103 |
+
"What was the original name of the transformers library ?",
|
| 104 |
+
"ExpectedAttentionPress",
|
| 105 |
+
0.5,
|
| 106 |
+
],
|
| 107 |
],
|
| 108 |
inputs=[url_input, question_input, press_selector, compression_slider],
|
| 109 |
)
|
|
|
|
| 111 |
submit_button.click(
|
| 112 |
process_request,
|
| 113 |
inputs=[url_input, question_input, press_selector, compression_slider],
|
| 114 |
+
outputs=[output, output_num_tokens, output_compressed_num_tokens],
|
| 115 |
)
|
| 116 |
|
| 117 |
return demo
|