Spaces:
Sleeping
Sleeping
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import os | |
| import logging | |
| from pathlib import Path | |
| from typing import Dict, List, Optional | |
| from pydantic import BaseModel, Field | |
| # Logging configuration | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
| handlers=[ | |
| logging.FileHandler("dissistant.log"), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| # Base directory | |
| BASE_DIR = Path(__file__).resolve().parent | |
| class Settings(BaseModel): | |
| """Application settings""" | |
| # Application settings | |
| app_name: str = "Graduate Center Dissertation Compliance Assistant" | |
| description: str = "A tool to check dissertations and theses for compliance with Graduate Center formatting and citation rules." | |
| version: str = "0.1.0" | |
| debug: bool = os.getenv("DEBUG", "False").lower() == "true" # Default to False if not set | |
| # Paths | |
| rules_dir: Path = BASE_DIR / "rules" | |
| formatting_rules_path: Path = rules_dir / "formatting_rules.md" | |
| citation_rules_path: Path = rules_dir / "citation_rules.md" | |
| metadata_rules_path: Path = rules_dir / "metadata_rules.md" | |
| # LLM settings | |
| llm_provider: str = os.getenv("LLM_PROVIDER", "openrouter").lower() # 'local', 'openai', or 'openrouter' | |
| llm_model_name: str = os.getenv("LLM_MODEL_NAME", "google/gemini-2.5-pro") | |
| llm_base_url: str = os.getenv("LLM_API_BASE", "https://openrouter.ai/api/v1") | |
| llm_api_key: str = os.getenv("LLM_API_KEY", "lm-studio") # Default for local LM Studio | |
| # OpenAI specific settings | |
| openai_api_key: Optional[str] = os.getenv("OPENAI_API_KEY") | |
| openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4") | |
| # OpenRouter specific settings | |
| openrouter_api_key: Optional[str] = os.getenv("OPENROUTER_API_KEY") | |
| # Document processing settings | |
| max_file_size_mb: int = 50 # Maximum file size in MB | |
| supported_file_types: List[str] = ["pdf", "docx"] | |
| # Citation styles | |
| citation_styles: List[str] = ["APA", "MLA", "Chicago", "Custom"] | |
| default_citation_style: str = "APA" | |
| # Department-specific settings | |
| departments: List[str] = [ | |
| "General", | |
| "English", | |
| "History", | |
| "Psychology", | |
| "Computer Science", | |
| "Other" | |
| ] | |
| # LLM prompt templates | |
| formatting_analysis_template: str = """ | |
| You are an expert in academic document formatting. Analyze the following document excerpt for compliance with the institutional formatting rules. | |
| FORMATTING RULES: | |
| {formatting_rules} | |
| DOCUMENT METADATA: | |
| {document_metadata} | |
| DOCUMENT EXCERPT: | |
| {document_excerpt} | |
| Identify any formatting issues in the document. For each issue, provide: | |
| 1. A description of the issue | |
| 2. The location in the document | |
| 3. The specific rule that is violated | |
| 4. A suggestion for how to fix the issue | |
| 5. The severity of the issue (critical, warning, or info) | |
| Format your response as a JSON array of issues, with each issue having the following fields: | |
| - "message": A clear description of the issue | |
| - "location": Where in the document the issue occurs | |
| - "rule": The specific rule that is violated | |
| - "suggestion": How to fix the issue | |
| - "severity": The severity level (critical, warning, or info) | |
| If no issues are found, return an empty array. | |
| """ | |
| citation_analysis_template: str = """ | |
| You are an expert in academic citation styles. Analyze the following document excerpt for compliance with the specified citation style. | |
| CITATION STYLE: {citation_style} | |
| CITATION STYLE GUIDELINES: | |
| {citation_guidelines} | |
| DOCUMENT EXCERPT: | |
| {document_excerpt} | |
| Identify any citation issues in the document. For each issue, provide: | |
| 1. A description of the issue | |
| 2. The problematic citation | |
| 3. The page or location where it appears | |
| 4. A suggestion for how to fix the issue | |
| 5. The severity of the issue (critical, warning, or info) | |
| Format your response as a JSON array of issues, with each issue having the following fields: | |
| - "message": A clear description of the issue | |
| - "citation": The problematic citation | |
| - "page": The page or location where it appears | |
| - "suggestion": How to fix the issue | |
| - "severity": The severity level (critical, warning, or info) | |
| If no issues are found, return an empty array. | |
| """ | |
| metadata_analysis_template: str = """ | |
| You are an expert in academic document structure. Analyze the following document front matter for compliance with the institutional metadata requirements. | |
| METADATA REQUIREMENTS: | |
| {metadata_requirements} | |
| DOCUMENT FRONT MATTER: | |
| {front_matter} | |
| Identify any metadata or front matter issues in the document. For each issue, provide: | |
| 1. A description of the issue | |
| 2. The specific element that is problematic | |
| 3. A suggestion for how to fix the issue | |
| 4. The severity of the issue (critical, warning, or info) | |
| Format your response as a JSON array of issues, with each issue having the following fields: | |
| - "message": A clear description of the issue | |
| - "element": The specific element that is problematic | |
| - "suggestion": How to fix the issue | |
| - "severity": The severity level (critical, warning, or info) | |
| If no issues are found, return an empty array. | |
| """ | |
| overall_analysis_template: str = """ | |
| You are an expert in academic document formatting and citation. Review the following analysis results and provide an overall assessment of the document's compliance with institutional requirements. | |
| FORMATTING ISSUES: | |
| {formatting_issues} | |
| CITATION ISSUES: | |
| {citation_issues} | |
| METADATA ISSUES: | |
| {metadata_issues} | |
| Provide: | |
| 1. An overall assessment of the document's compliance | |
| 2. A list of key recommendations for improving the document | |
| Format your response as a JSON object with the following fields: | |
| - "overall_assessment": A paragraph summarizing the document's compliance status | |
| - "recommendations": An array of specific recommendations for improving the document | |
| Be constructive and helpful in your assessment and recommendations. | |
| """ | |
| # Instantiate settings | |
| settings = Settings() | |
| if __name__ == "__main__": | |
| # Print out the settings for verification if run directly | |
| print("Application Settings:") | |
| for field_name, value in settings.model_dump().items(): | |
| if not isinstance(value, str) or len(value) < 100: # Skip printing long strings like templates | |
| print(f" {field_name}: {value}") | |