aivolcano commited on
Commit
ec7bc6b
·
1 Parent(s): adeb147

update the md

Browse files
Files changed (6) hide show
  1. .gitignore +0 -1
  2. Untitled +0 -1
  3. app.py +2 -2
  4. config.yaml +0 -9
  5. ms_deploy.json +10 -0
  6. src/analyzers/metadata_comparator.py +1 -1
.gitignore CHANGED
@@ -39,7 +39,6 @@ env/
39
  .LSOverride
40
 
41
  # Project Specific Outputs
42
- *.txt
43
  *.md
44
  !README.md
45
  *_only_used_entry.bib
 
39
  .LSOverride
40
 
41
  # Project Specific Outputs
 
42
  *.md
43
  !README.md
44
  *_only_used_entry.bib
Untitled DELETED
@@ -1 +0,0 @@
1
- citescan.yaml
 
 
app.py CHANGED
@@ -576,7 +576,7 @@ with gr.Blocks(title="CiteScan - Check References, Confirm Truth.", theme=gr.the
576
  btn_total.click(fn=filter_to_total, inputs=[result_state], outputs=[output_html])
577
 
578
  gr.Markdown("""
579
- *False positive cases* occur for CiteScan:
580
 
581
  1. **Authors Mismatch**:
582
  - *Reason*: Different databases deal with a longer list of authors with different strategies, like truncation.
@@ -588,7 +588,7 @@ with gr.Blocks(title="CiteScan - Check References, Confirm Truth.", theme=gr.the
588
 
589
  3. **Year GAP (±1 Year)**:
590
  - *Reason*: Delay between preprint (arXiv) and final version publication
591
- - *Action*: Verify which version you intend to cite, We recommend you to cite the version from the official press website. Lower pre-print version bib will make your submission more confidence.
592
 
593
  4. **Non-academic Sources**:
594
  - *Reason*: Blogs, and APIs are not indexed in academic databases.
 
576
  btn_total.click(fn=filter_to_total, inputs=[result_state], outputs=[output_html])
577
 
578
  gr.Markdown("""
579
+ *Case Study for False positive* in CiteScan:
580
 
581
  1. **Authors Mismatch**:
582
  - *Reason*: Different databases deal with a longer list of authors with different strategies, like truncation.
 
588
 
589
  3. **Year GAP (±1 Year)**:
590
  - *Reason*: Delay between preprint (arXiv) and final version publication
591
+ - *Action*: Verify which version you intend to cite, We recommend you to cite the version from the official press website. Less number of pre-print version bibs will make your submission more convincing.
592
 
593
  4. **Non-academic Sources**:
594
  - *Reason*: Blogs, and APIs are not indexed in academic databases.
config.yaml CHANGED
@@ -1,7 +1,3 @@
1
- files:
2
- bib: "paper.bib"
3
- output_dir: "citescan_output"
4
-
5
  bibliography:
6
  check_metadata: true
7
  check_usage: true
@@ -23,11 +19,6 @@ submission:
23
  citation_quality: true
24
  anonymization: true
25
 
26
- llm:
27
- backend: "deepseeek"
28
- model: "deepseek-chat"
29
- api_key: "sk-d7c87a7386d94879a80282cee7bd3f45"
30
-
31
  output:
32
  quiet: false
33
  minimal_verified: false
 
 
 
 
 
1
  bibliography:
2
  check_metadata: true
3
  check_usage: true
 
19
  citation_quality: true
20
  anonymization: true
21
 
 
 
 
 
 
22
  output:
23
  quiet: false
24
  minimal_verified: false
ms_deploy.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sdk_type": "gradio",
3
+ "sdk_version": "6.2.0",
4
+ "resource_configuration": "platform/2v-cpu-16g-mem",
5
+ "base_image": "ubuntu22.04-py311-torch2.3.1-modelscope1.31.0",
6
+ "environment_variables": [
7
+ {"name": "MODEL_NAME", "value": "deepseek-chat"},
8
+ {"name": "API_KEY", "value": "sk-d7c87a7386d94879a80282cee7bd3f45"}
9
+ ]
10
+ }
src/analyzers/metadata_comparator.py CHANGED
@@ -56,7 +56,7 @@ class MetadataComparator:
56
 
57
  # Thresholds for matching
58
  TITLE_THRESHOLD = 0.99
59
- AUTHOR_THRESHOLD = 0.9
60
 
61
  def __init__(self):
62
  self.normalizer = TextNormalizer
 
56
 
57
  # Thresholds for matching
58
  TITLE_THRESHOLD = 0.99
59
+ AUTHOR_THRESHOLD = 0.65
60
 
61
  def __init__(self):
62
  self.normalizer = TextNormalizer