Spaces:

DocSA
/

LP_2-test

Running

App Files Files Community

DocUA commited on 2 days ago

Commit

0c2542b

1 Parent(s): a765e3e

feat: Add debug prompt logging functionality with environment variable support

Browse files

Files changed (6) hide show

config/__init__.py +6 -0
config/environments/default.yaml +1 -0
config/environments/development.yaml +1 -0
config/environments/production.yaml +1 -0
config/settings.py +1 -0
main.py +49 -0

config/__init__.py CHANGED Viewed

@@ -156,6 +156,11 @@ else:
         }
     }
 # Required files - from YAML
 REQUIRED_FILES = _get_settings_attr('required_files', [
     'docstore_es_filter.json',
@@ -233,6 +238,7 @@ __all__ = [
     'GENERATION_TEMPERATURE',
     'LEGAL_POSITION_SCHEMA',
     'REQUIRED_FILES',
     'ModelProvider',
     'GenerationModelName',
     'AnalysisModelName',

         }
     }
+# Debug prompt logging - env var overrides YAML setting
+_debug_prompts_yaml = _get_settings_attr('app.debug_prompts', False)
+_debug_prompts_env = os.getenv("DEBUG_PROMPTS", "").lower() in ("1", "true", "yes")
+DEBUG_PROMPTS: bool = _debug_prompts_env or bool(_debug_prompts_yaml)
 # Required files - from YAML
 REQUIRED_FILES = _get_settings_attr('required_files', [
     'docstore_es_filter.json',
     'GENERATION_TEMPERATURE',
     'LEGAL_POSITION_SCHEMA',
     'REQUIRED_FILES',
+    'DEBUG_PROMPTS',
     'ModelProvider',
     'GenerationModelName',
     'AnalysisModelName',

config/environments/default.yaml CHANGED Viewed

@@ -5,6 +5,7 @@ app:
   version: "1.0.0"
   debug: false
   environment: "production"
 # AWS S3 Configuration
 aws:

   version: "1.0.0"
   debug: false
   environment: "production"
+  debug_prompts: false
 # AWS S3 Configuration
 aws:

config/environments/development.yaml CHANGED Viewed

@@ -3,6 +3,7 @@
 app:
   debug: true
   environment: "development"
 # AWS S3 Configuration - use local files in development
 aws:

 app:
   debug: true
   environment: "development"
+  debug_prompts: true
 # AWS S3 Configuration - use local files in development
 aws:

config/environments/production.yaml CHANGED Viewed

@@ -3,6 +3,7 @@
 app:
   debug: false
   environment: "production"
 # AWS S3 Configuration - use production index
 aws:

 app:
   debug: false
   environment: "production"
+  debug_prompts: false
 # AWS S3 Configuration - use production index
 aws:

config/settings.py CHANGED Viewed

@@ -13,6 +13,7 @@ class AppConfig(BaseModel):
     version: str
     debug: bool
     environment: str
 class AWSConfig(BaseModel):

     version: str
     debug: bool
     environment: str
+    debug_prompts: bool = False
 class AWSConfig(BaseModel):

main.py CHANGED Viewed

@@ -35,6 +35,7 @@ from config import (
     GENERATION_TEMPERATURE,
     LEGAL_POSITION_SCHEMA,
     REQUIRED_FILES,
     ModelProvider,
     AnalysisModelName,
     DEEPSEEK_API_KEY,
@@ -50,6 +51,26 @@ from utils import (
 )
 from embeddings import GeminiEmbedding
 # Initialize embedding model and settings BEFORE importing components
 # Priority: OpenAI > Gemini > None
 embed_model = None
@@ -352,6 +373,9 @@ class LLMAnalyzer:
                 completion_params["verbosity"] = "medium"
                 completion_params["store"] = False
             # Retry logic for OpenAI analysis
             max_retries = 3
             last_error = None
@@ -401,6 +425,12 @@ class LLMAnalyzer:
                 completion_params["response_format"] = {'type': 'json_object'}
                 completion_params["temperature"] = self.temperature
             # Retry logic for DeepSeek analysis
             max_retries = 3
             last_error = None
@@ -429,6 +459,7 @@ class LLMAnalyzer:
     async def _analyze_with_anthropic(self, prompt: str, response_schema: dict) -> str:
         """Analyze text using Anthropic."""
         try:
             response = self.client.messages.create(
                 model=self.model_name,
                 max_tokens=self.max_tokens or MAX_TOKENS_ANALYSIS,
@@ -465,6 +496,9 @@ class LLMAnalyzer:
             formatted_prompt = f"{prompt}\n\n{json_instruction}"
             # Use new google.genai API
             contents = [
                 types.Content(
@@ -771,6 +805,9 @@ def generate_legal_position(
                         # For other reasoning models (gpt-4.1, o1, etc.)
                         completion_params["reasoning_effort"] = thinking_level.lower()
                 # Execute with retries
                 for attempt in range(max_retries):
                     try:
@@ -867,6 +904,12 @@ def generate_legal_position(
                 if not is_reasoning:
                     completion_params["temperature"] = temperature
                 # Execute with retries
                 for attempt in range(max_retries):
                     try:
@@ -947,6 +990,9 @@ def generate_legal_position(
                     }
                     message_params["temperature"] = 1.0
             # Retry logic for connection errors
             max_retries = 3
             last_error = None
@@ -1063,6 +1109,9 @@ def generate_legal_position(
                 generate_content_config = types.GenerateContentConfig(**config_params)
                 response = client.models.generate_content(
                     model=model_name,
                     contents=contents,

     GENERATION_TEMPERATURE,
     LEGAL_POSITION_SCHEMA,
     REQUIRED_FILES,
+    DEBUG_PROMPTS,
     ModelProvider,
     AnalysisModelName,
     DEEPSEEK_API_KEY,
 )
 from embeddings import GeminiEmbedding
+# ============ Debug Prompt Logging ============
+# DEBUG_PROMPTS is loaded from config (YAML env setting + DEBUG_PROMPTS env var override).
+_PROMPT_SEP = "=" * 80
+def _log_prompt(provider: str, model: str, system: str, user: str) -> None:
+    """Print full system + user prompts when DEBUG_PROMPTS is enabled."""
+    if not DEBUG_PROMPTS:
+        return
+    print(f"\n{_PROMPT_SEP}")
+    print(f"[PROMPT DEBUG] Provider: {provider} | Model: {model}")
+    print(f"{_PROMPT_SEP}")
+    if system:
+        print("[PROMPT DEBUG] ── SYSTEM PROMPT ──────────────────────────────────")
+        print(system)
+    print("[PROMPT DEBUG] ── USER PROMPT ────────────────────────────────────")
+    print(user)
+    print(f"{_PROMPT_SEP}\n")
+# ============ End Debug Prompt Logging ============
 # Initialize embedding model and settings BEFORE importing components
 # Priority: OpenAI > Gemini > None
 embed_model = None
                 completion_params["verbosity"] = "medium"
                 completion_params["store"] = False
+            # Log full prompts in debug mode
+            _log_prompt("openai-analyzer", model_val, SYSTEM_PROMPT, prompt)
             # Retry logic for OpenAI analysis
             max_retries = 3
             last_error = None
                 completion_params["response_format"] = {'type': 'json_object'}
                 completion_params["temperature"] = self.temperature
+            # Log full prompts in debug mode
+            if is_reasoning:
+                _log_prompt("deepseek-analyzer", model_val, "", f"{SYSTEM_PROMPT}\n\n{prompt}")
+            else:
+                _log_prompt("deepseek-analyzer", model_val, SYSTEM_PROMPT, prompt)
             # Retry logic for DeepSeek analysis
             max_retries = 3
             last_error = None
     async def _analyze_with_anthropic(self, prompt: str, response_schema: dict) -> str:
         """Analyze text using Anthropic."""
         try:
+            _log_prompt("anthropic-analyzer", str(self.model_name), SYSTEM_PROMPT, prompt)
             response = self.client.messages.create(
                 model=self.model_name,
                 max_tokens=self.max_tokens or MAX_TOKENS_ANALYSIS,
             formatted_prompt = f"{prompt}\n\n{json_instruction}"
+            # Log full prompts in debug mode
+            _log_prompt("gemini-analyzer", str(self.model_name), SYSTEM_PROMPT, formatted_prompt)
             # Use new google.genai API
             contents = [
                 types.Content(
                         # For other reasoning models (gpt-4.1, o1, etc.)
                         completion_params["reasoning_effort"] = thinking_level.lower()
+                # Log full prompts in debug mode
+                _log_prompt("openai", model_name, system_prompt, content)
                 # Execute with retries
                 for attempt in range(max_retries):
                     try:
                 if not is_reasoning:
                     completion_params["temperature"] = temperature
+                # Log full prompts in debug mode
+                if is_reasoning:
+                    _log_prompt("deepseek", model_name, "", combined_content)
+                else:
+                    _log_prompt("deepseek", model_name, system_prompt, content)
                 # Execute with retries
                 for attempt in range(max_retries):
                     try:
                     }
                     message_params["temperature"] = 1.0
+            # Log full prompts in debug mode
+            _log_prompt("anthropic", model_name, system_prompt, content)
             # Retry logic for connection errors
             max_retries = 3
             last_error = None
                 generate_content_config = types.GenerateContentConfig(**config_params)
+                # Log full prompts in debug mode
+                _log_prompt("gemini", model_name, system_prompt, content)
                 response = client.models.generate_content(
                     model=model_name,
                     contents=contents,