dyxohjl666 commited on
Commit
e8deac5
·
unverified ·
1 Parent(s): 29c9600

Add application file

Browse files
Files changed (4) hide show
  1. app.py +61 -0
  2. description.py +57 -0
  3. reference_string_parsing.py +34 -0
  4. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from reference_string_parsing import *
3
+ from description import *
4
+
5
+
6
+
7
+ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
8
+ gr.Markdown("# Gradio Demo for SciAssist")
9
+ with gr.Tabs():
10
+ with gr.TabItem("Reference String Parsing"):
11
+ with gr.Box():
12
+ gr.Markdown(rsp_str_md)
13
+ with gr.Row():
14
+ with gr.Column():
15
+ rsp_str = gr.Textbox(label="Input String")
16
+ rsp_str_dehyphen = gr.Checkbox(label="dehyphen")
17
+ with gr.Row():
18
+ rsp_str_btn = gr.Button("Parse")
19
+ rsp_str_output = gr.HighlightedText(
20
+ elem_id="htext",
21
+ label="The Result of Parsing",
22
+ combine_adjacent=True,
23
+ adjacent_separator=" ",
24
+ )
25
+ rsp_str_examples = gr.Examples(examples=[[
26
+ "Waleed Ammar, Matthew E. Peters, Chandra Bhagavat- ula, and Russell Power. 2017. The ai2 system at semeval-2017 task 10 (scienceie): semi-supervised end-to-end entity and relation extraction. In ACL workshop (SemEval).",
27
+ True],
28
+ [
29
+ "Isabelle Augenstein, Mrinal Das, Sebastian Riedel, Lakshmi Vikraman, and Andrew D. McCallum. 2017. Semeval 2017 task 10 (scienceie): Extracting keyphrases and relations from scientific publications. In ACL workshop (SemEval).",
30
+ False]], inputs=[rsp_str, rsp_str_dehyphen])
31
+ with gr.Box():
32
+ gr.Markdown(rsp_file_md)
33
+ with gr.Row():
34
+ with gr.Column():
35
+ rsp_file = gr.File()
36
+ rsp_file_dehyphen = gr.Checkbox(label="dehyphen")
37
+ with gr.Row():
38
+ rsp_file_btn = gr.Button("Parse")
39
+
40
+ rsp_file_output = gr.HighlightedText(
41
+ elem_id="htext",
42
+ label="The Result of Parsing",
43
+ combine_adjacent=True,
44
+ adjacent_separator=" ",
45
+ )
46
+
47
+ with gr.TabItem("Source Code"):
48
+ gr.Markdown(value=gradio_code)
49
+
50
+ rsp_file_btn.click(
51
+ fn=rsp_for_file,
52
+ inputs=[rsp_file, rsp_file_dehyphen],
53
+ outputs=rsp_file_output
54
+ )
55
+ rsp_str_btn.click(
56
+ fn=rsp_for_str,
57
+ inputs=[rsp_str, rsp_str_dehyphen],
58
+ outputs=rsp_str_output
59
+ )
60
+
61
+ demo.launch(share=True)
description.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio_code = '''
2
+ If you'd like to generate a demo like this on your own, please go for [**our GitHub repo**](https://github.com/WING-NUS/SciAssist)
3
+ and try the following codes.
4
+
5
+ This is the command we actually run:
6
+ ```python
7
+
8
+ from typing import List, Tuple
9
+ from SciAssist import ReferenceStringParsing
10
+
11
+ rsp_pipeline = ReferenceStringParsing()
12
+
13
+
14
+ def rsp_for_str(input, dehyphen=False) -> List[Tuple[str, str]]:
15
+ results = rsp_pipeline.predict(input, type="str", dehyphen=dehyphen)
16
+ output = []
17
+ for res in results:
18
+ for token, tag in zip(res["tokens"], res["tags"]):
19
+ output.append((token, tag))
20
+ output.append(("\n\n", None))
21
+ return output
22
+
23
+
24
+ def rsp_for_file(input, dehyphen=False) -> List[Tuple[str, str]]:
25
+ if input == None:
26
+ return None
27
+ filename = input.name
28
+ # Identify the format of input and parse reference strings
29
+ if filename[-4:] == ".txt":
30
+ results = rsp_pipeline.predict(filename, type="txt", dehyphen=dehyphen)
31
+ elif filename[-4:] == ".pdf":
32
+ results = rsp_pipeline.predict(filename, dehyphen=dehyphen)
33
+ else:
34
+ return [("File Format Error !", None)]
35
+ # Prepare for the input gradio.HighlightedText accepts.
36
+ output = []
37
+ for res in results:
38
+ for token, tag in zip(res["tokens"], res["tags"]):
39
+ output.append((token, tag))
40
+ output.append(("\n\n", None))
41
+ return output
42
+
43
+ ```
44
+ '''
45
+
46
+ rsp_str_md = '''
47
+ To **test on strings**, simply input one or more strings.
48
+ '''
49
+
50
+ rsp_file_md = '''
51
+ To **test on a file**, the input can be either:
52
+
53
+ - A txt file which contains a reference string in each line.
54
+ - A pdf file which contains a whole scientific document without any processing (including title, author...).
55
+
56
+ '''
57
+
reference_string_parsing.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Tuple
2
+ from SciAssist import ReferenceStringParsing
3
+
4
+ rsp_pipeline = ReferenceStringParsing()
5
+
6
+
7
+ def rsp_for_str(input, dehyphen=False) -> List[Tuple[str, str]]:
8
+ results = rsp_pipeline.predict(input, type="str", dehyphen=dehyphen)
9
+ output = []
10
+ for res in results:
11
+ for token, tag in zip(res["tokens"], res["tags"]):
12
+ output.append((token, tag))
13
+ output.append(("\n\n", None))
14
+ return output
15
+
16
+
17
+ def rsp_for_file(input, dehyphen=False) -> List[Tuple[str, str]]:
18
+ if input == None:
19
+ return None
20
+ filename = input.name
21
+ # Identify the format of input and parse reference strings
22
+ if filename[-4:] == ".txt":
23
+ results = rsp_pipeline.predict(filename, type="txt", dehyphen=dehyphen)
24
+ elif filename[-4:] == ".pdf":
25
+ results = rsp_pipeline.predict(filename, dehyphen=dehyphen)
26
+ else:
27
+ return [("File Format Error !", None)]
28
+ # Prepare for the input gradio.HighlightedText accepts.
29
+ output = []
30
+ for res in results:
31
+ for token, tag in zip(res["tokens"], res["tags"]):
32
+ output.append((token, tag))
33
+ output.append(("\n\n", None))
34
+ return output
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ SciAssist==0.0.11