aivolcano commited on
Commit ·
ec7bc6b
1
Parent(s): adeb147
update the md
Browse files- .gitignore +0 -1
- Untitled +0 -1
- app.py +2 -2
- config.yaml +0 -9
- ms_deploy.json +10 -0
- src/analyzers/metadata_comparator.py +1 -1
.gitignore
CHANGED
|
@@ -39,7 +39,6 @@ env/
|
|
| 39 |
.LSOverride
|
| 40 |
|
| 41 |
# Project Specific Outputs
|
| 42 |
-
*.txt
|
| 43 |
*.md
|
| 44 |
!README.md
|
| 45 |
*_only_used_entry.bib
|
|
|
|
| 39 |
.LSOverride
|
| 40 |
|
| 41 |
# Project Specific Outputs
|
|
|
|
| 42 |
*.md
|
| 43 |
!README.md
|
| 44 |
*_only_used_entry.bib
|
Untitled
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
citescan.yaml
|
|
|
|
|
|
app.py
CHANGED
|
@@ -576,7 +576,7 @@ with gr.Blocks(title="CiteScan - Check References, Confirm Truth.", theme=gr.the
|
|
| 576 |
btn_total.click(fn=filter_to_total, inputs=[result_state], outputs=[output_html])
|
| 577 |
|
| 578 |
gr.Markdown("""
|
| 579 |
-
*False positive
|
| 580 |
|
| 581 |
1. **Authors Mismatch**:
|
| 582 |
- *Reason*: Different databases deal with a longer list of authors with different strategies, like truncation.
|
|
@@ -588,7 +588,7 @@ with gr.Blocks(title="CiteScan - Check References, Confirm Truth.", theme=gr.the
|
|
| 588 |
|
| 589 |
3. **Year GAP (±1 Year)**:
|
| 590 |
- *Reason*: Delay between preprint (arXiv) and final version publication
|
| 591 |
-
- *Action*: Verify which version you intend to cite, We recommend you to cite the version from the official press website.
|
| 592 |
|
| 593 |
4. **Non-academic Sources**:
|
| 594 |
- *Reason*: Blogs, and APIs are not indexed in academic databases.
|
|
|
|
| 576 |
btn_total.click(fn=filter_to_total, inputs=[result_state], outputs=[output_html])
|
| 577 |
|
| 578 |
gr.Markdown("""
|
| 579 |
+
*Case Study for False positive* in CiteScan:
|
| 580 |
|
| 581 |
1. **Authors Mismatch**:
|
| 582 |
- *Reason*: Different databases deal with a longer list of authors with different strategies, like truncation.
|
|
|
|
| 588 |
|
| 589 |
3. **Year GAP (±1 Year)**:
|
| 590 |
- *Reason*: Delay between preprint (arXiv) and final version publication
|
| 591 |
+
- *Action*: Verify which version you intend to cite, We recommend you to cite the version from the official press website. Less number of pre-print version bibs will make your submission more convincing.
|
| 592 |
|
| 593 |
4. **Non-academic Sources**:
|
| 594 |
- *Reason*: Blogs, and APIs are not indexed in academic databases.
|
config.yaml
CHANGED
|
@@ -1,7 +1,3 @@
|
|
| 1 |
-
files:
|
| 2 |
-
bib: "paper.bib"
|
| 3 |
-
output_dir: "citescan_output"
|
| 4 |
-
|
| 5 |
bibliography:
|
| 6 |
check_metadata: true
|
| 7 |
check_usage: true
|
|
@@ -23,11 +19,6 @@ submission:
|
|
| 23 |
citation_quality: true
|
| 24 |
anonymization: true
|
| 25 |
|
| 26 |
-
llm:
|
| 27 |
-
backend: "deepseeek"
|
| 28 |
-
model: "deepseek-chat"
|
| 29 |
-
api_key: "sk-d7c87a7386d94879a80282cee7bd3f45"
|
| 30 |
-
|
| 31 |
output:
|
| 32 |
quiet: false
|
| 33 |
minimal_verified: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
bibliography:
|
| 2 |
check_metadata: true
|
| 3 |
check_usage: true
|
|
|
|
| 19 |
citation_quality: true
|
| 20 |
anonymization: true
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
output:
|
| 23 |
quiet: false
|
| 24 |
minimal_verified: false
|
ms_deploy.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sdk_type": "gradio",
|
| 3 |
+
"sdk_version": "6.2.0",
|
| 4 |
+
"resource_configuration": "platform/2v-cpu-16g-mem",
|
| 5 |
+
"base_image": "ubuntu22.04-py311-torch2.3.1-modelscope1.31.0",
|
| 6 |
+
"environment_variables": [
|
| 7 |
+
{"name": "MODEL_NAME", "value": "deepseek-chat"},
|
| 8 |
+
{"name": "API_KEY", "value": "sk-d7c87a7386d94879a80282cee7bd3f45"}
|
| 9 |
+
]
|
| 10 |
+
}
|
src/analyzers/metadata_comparator.py
CHANGED
|
@@ -56,7 +56,7 @@ class MetadataComparator:
|
|
| 56 |
|
| 57 |
# Thresholds for matching
|
| 58 |
TITLE_THRESHOLD = 0.99
|
| 59 |
-
AUTHOR_THRESHOLD = 0.
|
| 60 |
|
| 61 |
def __init__(self):
|
| 62 |
self.normalizer = TextNormalizer
|
|
|
|
| 56 |
|
| 57 |
# Thresholds for matching
|
| 58 |
TITLE_THRESHOLD = 0.99
|
| 59 |
+
AUTHOR_THRESHOLD = 0.65
|
| 60 |
|
| 61 |
def __init__(self):
|
| 62 |
self.normalizer = TextNormalizer
|