| # ============================================================================== | |
| # BibGuard Configuration File | |
| # ============================================================================== | |
| # | |
| # Usage: python main.py --config bibguard.yaml | |
| # python main.py (auto-detect bibguard.yaml in current/parent directories) | |
| # | |
| # All paths are relative to this configuration file's directory. | |
| # ============================================================================== | |
| # π File Settings | |
| # ============================================================================== | |
| files: | |
| # Required: Path to your .bib bibliography file | |
| bib: "test.bib" | |
| # Required: Path to your .tex LaTeX source file | |
| tex: "test.tex" | |
| # Optional: Directory path for recursive scanning (Experimental) | |
| # When set, BibGuard will recursively search for all .tex and .bib files in this directory. | |
| # This mode is parallel to 'bib' and 'tex'. Use either this OR bib/tex. | |
| # input_dir: "./paper_project" | |
| # Output directory for all generated reports and files (default: bibguard_output) | |
| # All outputs including reports, cleaned .bib, and input file copies will be saved here | |
| output_dir: "test" | |
| # ============================================================================== | |
| # π Conference Template | |
| # ============================================================================== | |
| # Specify a conference template for venue-specific checks and formatting rules. | |
| # Available templates: acl, emnlp, naacl, cvpr, iccv, eccv, neurips, icml, iclr | |
| # Leave empty ("") to skip template-specific checks. | |
| template: "" | |
| # ============================================================================== | |
| # π Bibliography Checks | |
| # ============================================================================== | |
| bibliography: | |
| # Metadata Validation - Verify bib entries against online databases (arXiv, CrossRef, etc.) | |
| # Detects incorrect titles, authors, venues, and publication years | |
| # β οΈ It will take some time to check metadata since it needs to query multiple online sources. Make it to false if you don't need to check metadata. | |
| check_metadata: true | |
| # Usage Check - Detect unused bib entries and missing citations | |
| # Identifies entries in .bib not cited in .tex, and citations without bib entries | |
| check_usage: true | |
| # Duplicate Detection - Find duplicate entries with different keys | |
| # Uses fuzzy matching on titles and DOIs to identify the same paper cited multiple times | |
| check_duplicates: true | |
| # Preprint Ratio Check - Warn if too many references are preprints | |
| # Detects arXiv, bioRxiv, and other preprints. Warns if ratio exceeds threshold. | |
| check_preprint_ratio: true | |
| preprint_warning_threshold: 0.50 # Warn if more than 50% of used entries are preprints | |
| # Relevance Assessment - Use LLM to evaluate if citations match their context | |
| # Requires LLM configuration (see llm section below). Disabled by default due to API costs. | |
| check_relevance: false | |
| # ============================================================================== | |
| # π Submission Quality Checks | |
| # ============================================================================== | |
| submission: | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Format Checks | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Caption Position - Ensure table captions are above, figure captions below | |
| # Checks \caption placement relative to \begin{table}/\begin{figure} | |
| caption: true | |
| # Cross-References - Verify all figures/tables/sections are referenced in text | |
| # Detects orphaned floats that are never mentioned | |
| reference: true | |
| # Formatting Standards - Check citation format, spacing, special characters | |
| # Validates \cite{} usage, non-breaking spaces, proper quotation marks, etc. | |
| formatting: true | |
| # Equation Checks - Verify equation punctuation and numbering consistency | |
| # Ensures equations end with proper punctuation and labels are used correctly | |
| equation: true | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Writing Quality | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # AI Artifacts - Detect traces of AI-generated text | |
| # Flags phrases like "Sure, here is...", "As an AI...", "It's important to note..." | |
| ai_artifacts: true | |
| # Sentence Quality - Identify overly long sentences, weak openings, redundant phrases | |
| # Helps improve readability and academic writing style | |
| sentence: true | |
| # Terminology Consistency - Check for inconsistent spelling, hyphenation, US/UK variants | |
| # Examples: "deep learning" vs "deep-learning", "color" vs "colour" | |
| consistency: true | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Academic Standards | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Acronym Definitions - Ensure acronyms are defined on first use | |
| # Example: "Natural Language Processing (NLP)" before using "NLP" alone | |
| acronym: true | |
| # Number Formatting - Check percentage formatting consistency | |
| # Ensures no space before % sign and consistent use of '%' vs 'percent' | |
| number: true | |
| # Citation Quality - Flag outdated references and citation formatting issues | |
| # Warns about papers older than 30 years and checks citation formatting (et al., hardcoded citations) | |
| citation_quality: true | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Review Compliance | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Anonymization - Check double-blind review compliance | |
| # Detects GitHub links, acknowledgments, self-citations that may reveal author identity | |
| anonymization: true | |
| # ============================================================================== | |
| # π Metadata Check Workflow | |
| # ============================================================================== | |
| # Define the data sources and order for metadata validation. | |
| # BibGuard will try each enabled source in sequence until a match is found. | |
| # Set enabled: false to skip a particular source. | |
| workflow: | |
| - name: arxiv_id | |
| enabled: true | |
| description: "Lookup by arXiv ID (fastest, most reliable for preprints)" | |
| - name: crossref_doi | |
| enabled: true | |
| description: "Lookup by DOI via CrossRef (authoritative for published papers)" | |
| - name: semantic_scholar | |
| enabled: true | |
| description: "Semantic Scholar API (good coverage, includes citations)" | |
| - name: dblp | |
| enabled: true | |
| description: "DBLP database (comprehensive for computer science papers)" | |
| - name: openalex | |
| enabled: true | |
| description: "OpenAlex API (broad coverage across disciplines)" | |
| - name: arxiv_title | |
| enabled: true | |
| description: "Search arXiv by title (fallback when ID unavailable)" | |
| - name: crossref_title | |
| enabled: true | |
| description: "Search CrossRef by title (fallback when DOI unavailable)" | |
| - name: google_scholar | |
| enabled: false # May be rate-limited, disabled by default | |
| description: "Google Scholar web scraping (use as last resort)" | |
| # ============================================================================== | |
| # π€ LLM Configuration (for Relevance Checking) | |
| # ============================================================================== | |
| llm: | |
| # Backend provider: ollama, vllm, gemini, openai, anthropic, deepseek | |
| # Each backend requires different setup (API keys, local installation, etc.) | |
| backend: "gemini" | |
| # Model name (leave empty to use backend default) | |
| # Examples: "gpt-4", "claude-3-opus", "gemini-pro", "llama3" | |
| model: "" | |
| # API endpoint (leave empty to use backend default) | |
| # Only needed for self-hosted models (vllm, ollama) or custom endpoints | |
| endpoint: "" | |
| # API key (recommended to use environment variables instead) | |
| # Set GEMINI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, etc. in your environment | |
| api_key: "" | |
| # ============================================================================== | |
| # π Output Settings | |
| # ============================================================================== | |
| output: | |
| # Quiet mode - Suppress progress messages, only output final reports | |
| # Useful for CI/CD pipelines or batch processing | |
| quiet: false | |
| # Minimal verified entries - Hide detailed info for entries that passed all checks | |
| # Reduces report size when you only care about issues | |
| minimal_verified: false | |