Spaces:

Zwounds
/

dissistant

Sleeping

dissistant / config.py

Stephen Zweibel

Update app for Hugging Face

5d74609 8 months ago

6.67 kB

	from dotenv import load_dotenv
	load_dotenv()

	import os
	import logging
	from pathlib import Path
	from typing import Dict, List, Optional
	from pydantic import BaseModel, Field

	# Logging configuration
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	handlers=[
	logging.FileHandler("dissistant.log"),
	logging.StreamHandler()
	]
	)

	# Base directory
	BASE_DIR = Path(__file__).resolve().parent

	class Settings(BaseModel):
	"""Application settings"""
	# Application settings
	app_name: str = "Graduate Center Dissertation Compliance Assistant"
	description: str = "A tool to check dissertations and theses for compliance with Graduate Center formatting and citation rules."
	version: str = "0.1.0"
	debug: bool = os.getenv("DEBUG", "False").lower() == "true" # Default to False if not set

	# Paths
	rules_dir: Path = BASE_DIR / "rules"
	formatting_rules_path: Path = rules_dir / "formatting_rules.md"
	citation_rules_path: Path = rules_dir / "citation_rules.md"
	metadata_rules_path: Path = rules_dir / "metadata_rules.md"

	# LLM settings
	llm_provider: str = os.getenv("LLM_PROVIDER", "openrouter").lower() # 'local', 'openai', or 'openrouter'
	llm_model_name: str = os.getenv("LLM_MODEL_NAME", "google/gemini-2.5-pro")
	llm_base_url: str = os.getenv("LLM_API_BASE", "https://openrouter.ai/api/v1")
	llm_api_key: str = os.getenv("LLM_API_KEY", "lm-studio") # Default for local LM Studio

	# OpenAI specific settings
	openai_api_key: Optional[str] = os.getenv("OPENAI_API_KEY")
	openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4")

	# OpenRouter specific settings
	openrouter_api_key: Optional[str] = os.getenv("OPENROUTER_API_KEY")

	# Document processing settings
	max_file_size_mb: int = 50 # Maximum file size in MB
	supported_file_types: List[str] = ["pdf", "docx"]

	# Citation styles
	citation_styles: List[str] = ["APA", "MLA", "Chicago", "Custom"]
	default_citation_style: str = "APA"

	# Department-specific settings
	departments: List[str] = [
	"General",
	"English",
	"History",
	"Psychology",
	"Computer Science",
	"Other"
	]

	# LLM prompt templates
	formatting_analysis_template: str = """
	You are an expert in academic document formatting. Analyze the following document excerpt for compliance with the institutional formatting rules.

	FORMATTING RULES:
	{formatting_rules}

	DOCUMENT METADATA:
	{document_metadata}

	DOCUMENT EXCERPT:
	{document_excerpt}

	Identify any formatting issues in the document. For each issue, provide:
	1. A description of the issue
	2. The location in the document
	3. The specific rule that is violated
	4. A suggestion for how to fix the issue
	5. The severity of the issue (critical, warning, or info)

	Format your response as a JSON array of issues, with each issue having the following fields:
	- "message": A clear description of the issue
	- "location": Where in the document the issue occurs
	- "rule": The specific rule that is violated
	- "suggestion": How to fix the issue
	- "severity": The severity level (critical, warning, or info)

	If no issues are found, return an empty array.
	"""

	citation_analysis_template: str = """
	You are an expert in academic citation styles. Analyze the following document excerpt for compliance with the specified citation style.

	CITATION STYLE: {citation_style}

	CITATION STYLE GUIDELINES:
	{citation_guidelines}

	DOCUMENT EXCERPT:
	{document_excerpt}

	Identify any citation issues in the document. For each issue, provide:
	1. A description of the issue
	2. The problematic citation
	3. The page or location where it appears
	4. A suggestion for how to fix the issue
	5. The severity of the issue (critical, warning, or info)

	Format your response as a JSON array of issues, with each issue having the following fields:
	- "message": A clear description of the issue
	- "citation": The problematic citation
	- "page": The page or location where it appears
	- "suggestion": How to fix the issue
	- "severity": The severity level (critical, warning, or info)

	If no issues are found, return an empty array.
	"""

	metadata_analysis_template: str = """
	You are an expert in academic document structure. Analyze the following document front matter for compliance with the institutional metadata requirements.

	METADATA REQUIREMENTS:
	{metadata_requirements}

	DOCUMENT FRONT MATTER:
	{front_matter}

	Identify any metadata or front matter issues in the document. For each issue, provide:
	1. A description of the issue
	2. The specific element that is problematic
	3. A suggestion for how to fix the issue
	4. The severity of the issue (critical, warning, or info)

	Format your response as a JSON array of issues, with each issue having the following fields:
	- "message": A clear description of the issue
	- "element": The specific element that is problematic
	- "suggestion": How to fix the issue
	- "severity": The severity level (critical, warning, or info)

	If no issues are found, return an empty array.
	"""

	overall_analysis_template: str = """
	You are an expert in academic document formatting and citation. Review the following analysis results and provide an overall assessment of the document's compliance with institutional requirements.

	FORMATTING ISSUES:
	{formatting_issues}

	CITATION ISSUES:
	{citation_issues}

	METADATA ISSUES:
	{metadata_issues}

	Provide:
	1. An overall assessment of the document's compliance
	2. A list of key recommendations for improving the document

	Format your response as a JSON object with the following fields:
	- "overall_assessment": A paragraph summarizing the document's compliance status
	- "recommendations": An array of specific recommendations for improving the document

	Be constructive and helpful in your assessment and recommendations.
	"""

	# Instantiate settings
	settings = Settings()

	if __name__ == "__main__":
	# Print out the settings for verification if run directly
	print("Application Settings:")
	for field_name, value in settings.model_dump().items():
	if not isinstance(value, str) or len(value) < 100: # Skip printing long strings like templates
	print(f" {field_name}: {value}")