BibGuard / bibguard.yaml
thinkwee
init
46df5f0
# ==============================================================================
# BibGuard Configuration File
# ==============================================================================
#
# Usage: python main.py --config bibguard.yaml
# python main.py (auto-detect bibguard.yaml in current/parent directories)
#
# All paths are relative to this configuration file's directory.
# ==============================================================================
# πŸ“ File Settings
# ==============================================================================
files:
# Required: Path to your .bib bibliography file
bib: "test.bib"
# Required: Path to your .tex LaTeX source file
tex: "test.tex"
# Optional: Directory path for recursive scanning (Experimental)
# When set, BibGuard will recursively search for all .tex and .bib files in this directory.
# This mode is parallel to 'bib' and 'tex'. Use either this OR bib/tex.
# input_dir: "./paper_project"
# Output directory for all generated reports and files (default: bibguard_output)
# All outputs including reports, cleaned .bib, and input file copies will be saved here
output_dir: "test"
# ==============================================================================
# πŸŽ“ Conference Template
# ==============================================================================
# Specify a conference template for venue-specific checks and formatting rules.
# Available templates: acl, emnlp, naacl, cvpr, iccv, eccv, neurips, icml, iclr
# Leave empty ("") to skip template-specific checks.
template: ""
# ==============================================================================
# πŸ“š Bibliography Checks
# ==============================================================================
bibliography:
# Metadata Validation - Verify bib entries against online databases (arXiv, CrossRef, etc.)
# Detects incorrect titles, authors, venues, and publication years
# ⚠️ It will take some time to check metadata since it needs to query multiple online sources. Make it to false if you don't need to check metadata.
check_metadata: true
# Usage Check - Detect unused bib entries and missing citations
# Identifies entries in .bib not cited in .tex, and citations without bib entries
check_usage: true
# Duplicate Detection - Find duplicate entries with different keys
# Uses fuzzy matching on titles and DOIs to identify the same paper cited multiple times
check_duplicates: true
# Preprint Ratio Check - Warn if too many references are preprints
# Detects arXiv, bioRxiv, and other preprints. Warns if ratio exceeds threshold.
check_preprint_ratio: true
preprint_warning_threshold: 0.50 # Warn if more than 50% of used entries are preprints
# Relevance Assessment - Use LLM to evaluate if citations match their context
# Requires LLM configuration (see llm section below). Disabled by default due to API costs.
check_relevance: false
# ==============================================================================
# πŸ“‹ Submission Quality Checks
# ==============================================================================
submission:
# ─────────────────────────────────────────────────────────────────────────────
# Format Checks
# ─────────────────────────────────────────────────────────────────────────────
# Caption Position - Ensure table captions are above, figure captions below
# Checks \caption placement relative to \begin{table}/\begin{figure}
caption: true
# Cross-References - Verify all figures/tables/sections are referenced in text
# Detects orphaned floats that are never mentioned
reference: true
# Formatting Standards - Check citation format, spacing, special characters
# Validates \cite{} usage, non-breaking spaces, proper quotation marks, etc.
formatting: true
# Equation Checks - Verify equation punctuation and numbering consistency
# Ensures equations end with proper punctuation and labels are used correctly
equation: true
# ─────────────────────────────────────────────────────────────────────────────
# Writing Quality
# ─────────────────────────────────────────────────────────────────────────────
# AI Artifacts - Detect traces of AI-generated text
# Flags phrases like "Sure, here is...", "As an AI...", "It's important to note..."
ai_artifacts: true
# Sentence Quality - Identify overly long sentences, weak openings, redundant phrases
# Helps improve readability and academic writing style
sentence: true
# Terminology Consistency - Check for inconsistent spelling, hyphenation, US/UK variants
# Examples: "deep learning" vs "deep-learning", "color" vs "colour"
consistency: true
# ─────────────────────────────────────────────────────────────────────────────
# Academic Standards
# ─────────────────────────────────────────────────────────────────────────────
# Acronym Definitions - Ensure acronyms are defined on first use
# Example: "Natural Language Processing (NLP)" before using "NLP" alone
acronym: true
# Number Formatting - Check percentage formatting consistency
# Ensures no space before % sign and consistent use of '%' vs 'percent'
number: true
# Citation Quality - Flag outdated references and citation formatting issues
# Warns about papers older than 30 years and checks citation formatting (et al., hardcoded citations)
citation_quality: true
# ─────────────────────────────────────────────────────────────────────────────
# Review Compliance
# ─────────────────────────────────────────────────────────────────────────────
# Anonymization - Check double-blind review compliance
# Detects GitHub links, acknowledgments, self-citations that may reveal author identity
anonymization: true
# ==============================================================================
# πŸ” Metadata Check Workflow
# ==============================================================================
# Define the data sources and order for metadata validation.
# BibGuard will try each enabled source in sequence until a match is found.
# Set enabled: false to skip a particular source.
workflow:
- name: arxiv_id
enabled: true
description: "Lookup by arXiv ID (fastest, most reliable for preprints)"
- name: crossref_doi
enabled: true
description: "Lookup by DOI via CrossRef (authoritative for published papers)"
- name: semantic_scholar
enabled: true
description: "Semantic Scholar API (good coverage, includes citations)"
- name: dblp
enabled: true
description: "DBLP database (comprehensive for computer science papers)"
- name: openalex
enabled: true
description: "OpenAlex API (broad coverage across disciplines)"
- name: arxiv_title
enabled: true
description: "Search arXiv by title (fallback when ID unavailable)"
- name: crossref_title
enabled: true
description: "Search CrossRef by title (fallback when DOI unavailable)"
- name: google_scholar
enabled: false # May be rate-limited, disabled by default
description: "Google Scholar web scraping (use as last resort)"
# ==============================================================================
# πŸ€– LLM Configuration (for Relevance Checking)
# ==============================================================================
llm:
# Backend provider: ollama, vllm, gemini, openai, anthropic, deepseek
# Each backend requires different setup (API keys, local installation, etc.)
backend: "gemini"
# Model name (leave empty to use backend default)
# Examples: "gpt-4", "claude-3-opus", "gemini-pro", "llama3"
model: ""
# API endpoint (leave empty to use backend default)
# Only needed for self-hosted models (vllm, ollama) or custom endpoints
endpoint: ""
# API key (recommended to use environment variables instead)
# Set GEMINI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, etc. in your environment
api_key: ""
# ==============================================================================
# πŸ“Š Output Settings
# ==============================================================================
output:
# Quiet mode - Suppress progress messages, only output final reports
# Useful for CI/CD pipelines or batch processing
quiet: false
# Minimal verified entries - Hide detailed info for entries that passed all checks
# Reduces report size when you only care about issues
minimal_verified: false