Spaces:
Running
Running
Commit
·
0d3c7d8
1
Parent(s):
e979a07
Add IP-based throttle
Browse files- app.py +46 -11
- constants.py +2 -0
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import datetime
|
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
import requests
|
|
|
|
| 6 |
from constants import *
|
| 7 |
|
| 8 |
API_IPADDR = os.environ.get('API_IPADDR', None)
|
|
@@ -11,17 +12,31 @@ max_size = os.environ.get('max_size', 100)
|
|
| 11 |
max_threads = os.environ.get('max_threads', 40)
|
| 12 |
debug = (os.environ.get('debug', 'False') != 'False')
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
| 16 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
| 17 |
-
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
| 18 |
data = {
|
| 19 |
'timestamp': timestamp,
|
|
|
|
|
|
|
| 20 |
'corpus': corpus,
|
| 21 |
'query_type': query_type,
|
| 22 |
'query': query,
|
| 23 |
}
|
| 24 |
print(json.dumps(data))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
if API_IPADDR is None:
|
| 26 |
raise ValueError(f'API_IPADDR envvar is not set!')
|
| 27 |
response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
|
|
@@ -33,18 +48,38 @@ def process(corpus_desc, query_desc, query):
|
|
| 33 |
print(result)
|
| 34 |
return result
|
| 35 |
|
| 36 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
| 38 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
| 39 |
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
| 40 |
data = {
|
| 41 |
'timestamp': timestamp,
|
|
|
|
|
|
|
| 42 |
'corpus': corpus,
|
| 43 |
'query_type': query_type,
|
| 44 |
'query': query,
|
| 45 |
'maxnum': maxnum,
|
| 46 |
}
|
| 47 |
print(json.dumps(data))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
if API_IPADDR is None:
|
| 49 |
raise ValueError(f'API_IPADDR envvar is not set!')
|
| 50 |
response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
|
|
@@ -60,7 +95,7 @@ def process_ard_cnf_multi(corpus_desc, query_desc, query, maxnum):
|
|
| 60 |
outputs = outputs[:maxnum]
|
| 61 |
while len(outputs) < 10:
|
| 62 |
outputs.append([])
|
| 63 |
-
return
|
| 64 |
|
| 65 |
with gr.Blocks() as demo:
|
| 66 |
with gr.Column():
|
|
@@ -281,14 +316,14 @@ If you find this tool useful, please kindly cite our paper:
|
|
| 281 |
ard_cnf_multi_clear.add([ard_cnf_multi_input, ard_cnf_multi_output_tokens, ard_cnf_multi_output_message, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9])
|
| 282 |
doc_analysis_clear.add([doc_analysis_input, doc_analysis_output])
|
| 283 |
|
| 284 |
-
count_submit.click(
|
| 285 |
-
ngram_submit.click(
|
| 286 |
-
ntd_submit.click(
|
| 287 |
-
infgram_submit.click(
|
| 288 |
-
infntd_submit.click(
|
| 289 |
# ard_cnf_submit.click(process, inputs=[corpus_desc, query_desc, ard_cnf_input], outputs=[ard_cnf_output, ard_cnf_output_tokens, ard_cnf_output_message], api_name=False)
|
| 290 |
-
ard_cnf_multi_submit.click(process_ard_cnf_multi, inputs=[corpus_desc, query_desc, ard_cnf_multi_input, ard_cnf_multi_maxnum], outputs=[
|
| 291 |
-
doc_analysis_submit.click(
|
| 292 |
|
| 293 |
def update_query_desc(selection):
|
| 294 |
return {
|
|
|
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
import requests
|
| 6 |
+
import time
|
| 7 |
from constants import *
|
| 8 |
|
| 9 |
API_IPADDR = os.environ.get('API_IPADDR', None)
|
|
|
|
| 12 |
max_threads = os.environ.get('max_threads', 40)
|
| 13 |
debug = (os.environ.get('debug', 'False') != 'False')
|
| 14 |
|
| 15 |
+
last_query_time_by_ip = {}
|
| 16 |
+
|
| 17 |
+
def process(corpus_desc, query_desc, query, ret_num, request: gr.Request):
|
| 18 |
+
global last_query_time_by_ip
|
| 19 |
+
ip = request.client.host if request else ''
|
| 20 |
+
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
| 21 |
+
t = time.time()
|
| 22 |
+
last_query_time = 0 if ip == '' else last_query_time_by_ip.get(ip, 0)
|
| 23 |
+
blocked = (t - last_query_time < MIN_QUERY_INTERVAL_SECONDS)
|
| 24 |
+
|
| 25 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
| 26 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
|
|
|
| 27 |
data = {
|
| 28 |
'timestamp': timestamp,
|
| 29 |
+
'ip': ip,
|
| 30 |
+
'blocked': blocked,
|
| 31 |
'corpus': corpus,
|
| 32 |
'query_type': query_type,
|
| 33 |
'query': query,
|
| 34 |
}
|
| 35 |
print(json.dumps(data))
|
| 36 |
+
if blocked:
|
| 37 |
+
return tuple([f'You queried too frequently. Please try again in {MIN_QUERY_INTERVAL_SECONDS} seconds.'] + [''] * (ret_num - 1))
|
| 38 |
+
if ip != '':
|
| 39 |
+
last_query_time_by_ip[ip] = t
|
| 40 |
if API_IPADDR is None:
|
| 41 |
raise ValueError(f'API_IPADDR envvar is not set!')
|
| 42 |
response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
|
|
|
|
| 48 |
print(result)
|
| 49 |
return result
|
| 50 |
|
| 51 |
+
def process_1(corpus_desc, query_desc, query, request: gr.Request):
|
| 52 |
+
return process(corpus_desc, query_desc, query, 1, request)
|
| 53 |
+
def process_2(corpus_desc, query_desc, query, request: gr.Request):
|
| 54 |
+
return process(corpus_desc, query_desc, query, 2, request)
|
| 55 |
+
def process_3(corpus_desc, query_desc, query, request: gr.Request):
|
| 56 |
+
return process(corpus_desc, query_desc, query, 3, request)
|
| 57 |
+
|
| 58 |
+
def process_ard_cnf_multi(corpus_desc, query_desc, query, maxnum, request: gr.Request):
|
| 59 |
+
global last_query_time_by_ip
|
| 60 |
+
ip = request.client.host if request else ''
|
| 61 |
+
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
| 62 |
+
t = time.time()
|
| 63 |
+
last_query_time = 0 if ip == '' else last_query_time_by_ip.get(ip, 0)
|
| 64 |
+
blocked = (t - last_query_time < MIN_QUERY_INTERVAL_SECONDS)
|
| 65 |
+
|
| 66 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
| 67 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
| 68 |
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
| 69 |
data = {
|
| 70 |
'timestamp': timestamp,
|
| 71 |
+
'ip': ip,
|
| 72 |
+
'blocked': blocked,
|
| 73 |
'corpus': corpus,
|
| 74 |
'query_type': query_type,
|
| 75 |
'query': query,
|
| 76 |
'maxnum': maxnum,
|
| 77 |
}
|
| 78 |
print(json.dumps(data))
|
| 79 |
+
if blocked:
|
| 80 |
+
return tuple([f'You queried too frequently. Please try again in {MIN_QUERY_INTERVAL_SECONDS} seconds.'] + [''] * 11)
|
| 81 |
+
if ip != '':
|
| 82 |
+
last_query_time_by_ip[ip] = t
|
| 83 |
if API_IPADDR is None:
|
| 84 |
raise ValueError(f'API_IPADDR envvar is not set!')
|
| 85 |
response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
|
|
|
|
| 95 |
outputs = outputs[:maxnum]
|
| 96 |
while len(outputs) < 10:
|
| 97 |
outputs.append([])
|
| 98 |
+
return message, output_tokens, outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6], outputs[7], outputs[8], outputs[9]
|
| 99 |
|
| 100 |
with gr.Blocks() as demo:
|
| 101 |
with gr.Column():
|
|
|
|
| 316 |
ard_cnf_multi_clear.add([ard_cnf_multi_input, ard_cnf_multi_output_tokens, ard_cnf_multi_output_message, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9])
|
| 317 |
doc_analysis_clear.add([doc_analysis_input, doc_analysis_output])
|
| 318 |
|
| 319 |
+
count_submit.click(process_2, inputs=[corpus_desc, query_desc, count_input], outputs=[count_output, count_output_tokens], api_name=False)
|
| 320 |
+
ngram_submit.click(process_2, inputs=[corpus_desc, query_desc, ngram_input], outputs=[ngram_output, ngram_output_tokens], api_name=False)
|
| 321 |
+
ntd_submit.click(process_2, inputs=[corpus_desc, query_desc, ntd_input], outputs=[ntd_output, ntd_output_tokens], api_name=False)
|
| 322 |
+
infgram_submit.click(process_3, inputs=[corpus_desc, query_desc, infgram_input], outputs=[infgram_output, infgram_output_tokens, infgram_longest_suffix], api_name=False)
|
| 323 |
+
infntd_submit.click(process_3, inputs=[corpus_desc, query_desc, infntd_input], outputs=[infntd_output, infntd_output_tokens, infntd_longest_suffix], api_name=False)
|
| 324 |
# ard_cnf_submit.click(process, inputs=[corpus_desc, query_desc, ard_cnf_input], outputs=[ard_cnf_output, ard_cnf_output_tokens, ard_cnf_output_message], api_name=False)
|
| 325 |
+
ard_cnf_multi_submit.click(process_ard_cnf_multi, inputs=[corpus_desc, query_desc, ard_cnf_multi_input, ard_cnf_multi_maxnum], outputs=[ard_cnf_multi_output_message, ard_cnf_multi_output_tokens, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9], api_name=False)
|
| 326 |
+
doc_analysis_submit.click(process_1, inputs=[corpus_desc, query_desc, doc_analysis_input], outputs=[doc_analysis_output], api_name=False)
|
| 327 |
|
| 328 |
def update_query_desc(selection):
|
| 329 |
return {
|
constants.py
CHANGED
|
@@ -30,3 +30,5 @@ MAX_DIFF_TOKENS = int(os.environ.get('MAX_DIFF_TOKENS', 100))
|
|
| 30 |
MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
|
| 31 |
MAX_CLAUSES_IN_CNF = int(os.environ.get('MAX_CLAUSES_IN_CNF', 4))
|
| 32 |
MAX_TERMS_IN_DISJ_CLAUSE = int(os.environ.get('MAX_TERMS_IN_DISJ_CLAUSE', 4))
|
|
|
|
|
|
|
|
|
| 30 |
MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
|
| 31 |
MAX_CLAUSES_IN_CNF = int(os.environ.get('MAX_CLAUSES_IN_CNF', 4))
|
| 32 |
MAX_TERMS_IN_DISJ_CLAUSE = int(os.environ.get('MAX_TERMS_IN_DISJ_CLAUSE', 4))
|
| 33 |
+
|
| 34 |
+
MIN_QUERY_INTERVAL_SECONDS = int(os.environ.get('MIN_QUERY_INTERVAL_SECONDS', 5))
|