File size: 9,787 Bytes
46df5f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
# ==============================================================================
# BibGuard Configuration File
# ==============================================================================
#
# Usage: python main.py --config bibguard.yaml
# python main.py (auto-detect bibguard.yaml in current/parent directories)
#
# All paths are relative to this configuration file's directory.
# ==============================================================================
# π File Settings
# ==============================================================================
files:
# Required: Path to your .bib bibliography file
bib: "test.bib"
# Required: Path to your .tex LaTeX source file
tex: "test.tex"
# Optional: Directory path for recursive scanning (Experimental)
# When set, BibGuard will recursively search for all .tex and .bib files in this directory.
# This mode is parallel to 'bib' and 'tex'. Use either this OR bib/tex.
# input_dir: "./paper_project"
# Output directory for all generated reports and files (default: bibguard_output)
# All outputs including reports, cleaned .bib, and input file copies will be saved here
output_dir: "test"
# ==============================================================================
# π Conference Template
# ==============================================================================
# Specify a conference template for venue-specific checks and formatting rules.
# Available templates: acl, emnlp, naacl, cvpr, iccv, eccv, neurips, icml, iclr
# Leave empty ("") to skip template-specific checks.
template: ""
# ==============================================================================
# π Bibliography Checks
# ==============================================================================
bibliography:
# Metadata Validation - Verify bib entries against online databases (arXiv, CrossRef, etc.)
# Detects incorrect titles, authors, venues, and publication years
# β οΈ It will take some time to check metadata since it needs to query multiple online sources. Make it to false if you don't need to check metadata.
check_metadata: true
# Usage Check - Detect unused bib entries and missing citations
# Identifies entries in .bib not cited in .tex, and citations without bib entries
check_usage: true
# Duplicate Detection - Find duplicate entries with different keys
# Uses fuzzy matching on titles and DOIs to identify the same paper cited multiple times
check_duplicates: true
# Preprint Ratio Check - Warn if too many references are preprints
# Detects arXiv, bioRxiv, and other preprints. Warns if ratio exceeds threshold.
check_preprint_ratio: true
preprint_warning_threshold: 0.50 # Warn if more than 50% of used entries are preprints
# Relevance Assessment - Use LLM to evaluate if citations match their context
# Requires LLM configuration (see llm section below). Disabled by default due to API costs.
check_relevance: false
# ==============================================================================
# π Submission Quality Checks
# ==============================================================================
submission:
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Format Checks
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Caption Position - Ensure table captions are above, figure captions below
# Checks \caption placement relative to \begin{table}/\begin{figure}
caption: true
# Cross-References - Verify all figures/tables/sections are referenced in text
# Detects orphaned floats that are never mentioned
reference: true
# Formatting Standards - Check citation format, spacing, special characters
# Validates \cite{} usage, non-breaking spaces, proper quotation marks, etc.
formatting: true
# Equation Checks - Verify equation punctuation and numbering consistency
# Ensures equations end with proper punctuation and labels are used correctly
equation: true
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Writing Quality
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# AI Artifacts - Detect traces of AI-generated text
# Flags phrases like "Sure, here is...", "As an AI...", "It's important to note..."
ai_artifacts: true
# Sentence Quality - Identify overly long sentences, weak openings, redundant phrases
# Helps improve readability and academic writing style
sentence: true
# Terminology Consistency - Check for inconsistent spelling, hyphenation, US/UK variants
# Examples: "deep learning" vs "deep-learning", "color" vs "colour"
consistency: true
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Academic Standards
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Acronym Definitions - Ensure acronyms are defined on first use
# Example: "Natural Language Processing (NLP)" before using "NLP" alone
acronym: true
# Number Formatting - Check percentage formatting consistency
# Ensures no space before % sign and consistent use of '%' vs 'percent'
number: true
# Citation Quality - Flag outdated references and citation formatting issues
# Warns about papers older than 30 years and checks citation formatting (et al., hardcoded citations)
citation_quality: true
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Review Compliance
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Anonymization - Check double-blind review compliance
# Detects GitHub links, acknowledgments, self-citations that may reveal author identity
anonymization: true
# ==============================================================================
# π Metadata Check Workflow
# ==============================================================================
# Define the data sources and order for metadata validation.
# BibGuard will try each enabled source in sequence until a match is found.
# Set enabled: false to skip a particular source.
workflow:
- name: arxiv_id
enabled: true
description: "Lookup by arXiv ID (fastest, most reliable for preprints)"
- name: crossref_doi
enabled: true
description: "Lookup by DOI via CrossRef (authoritative for published papers)"
- name: semantic_scholar
enabled: true
description: "Semantic Scholar API (good coverage, includes citations)"
- name: dblp
enabled: true
description: "DBLP database (comprehensive for computer science papers)"
- name: openalex
enabled: true
description: "OpenAlex API (broad coverage across disciplines)"
- name: arxiv_title
enabled: true
description: "Search arXiv by title (fallback when ID unavailable)"
- name: crossref_title
enabled: true
description: "Search CrossRef by title (fallback when DOI unavailable)"
- name: google_scholar
enabled: false # May be rate-limited, disabled by default
description: "Google Scholar web scraping (use as last resort)"
# ==============================================================================
# π€ LLM Configuration (for Relevance Checking)
# ==============================================================================
llm:
# Backend provider: ollama, vllm, gemini, openai, anthropic, deepseek
# Each backend requires different setup (API keys, local installation, etc.)
backend: "gemini"
# Model name (leave empty to use backend default)
# Examples: "gpt-4", "claude-3-opus", "gemini-pro", "llama3"
model: ""
# API endpoint (leave empty to use backend default)
# Only needed for self-hosted models (vllm, ollama) or custom endpoints
endpoint: ""
# API key (recommended to use environment variables instead)
# Set GEMINI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, etc. in your environment
api_key: ""
# ==============================================================================
# π Output Settings
# ==============================================================================
output:
# Quiet mode - Suppress progress messages, only output final reports
# Useful for CI/CD pipelines or batch processing
quiet: false
# Minimal verified entries - Hide detailed info for entries that passed all checks
# Reduces report size when you only care about issues
minimal_verified: false
|