Spaces:

DocSA
/

LP_2-test

Running

App Files Files Community

DocUA commited on about 4 hours ago

Commit

b434018

1 Parent(s): ca73321

feat: Optimize caching for Anthropic and OpenAI prompts, restructure prompt variables for efficiency

Browse files

Files changed (3) hide show

CHANGES.md +64 -0
main.py +32 -16
prompts.py +8 -8

CHANGES.md CHANGED Viewed

@@ -1,3 +1,67 @@
 # Changelog - Додано редагування промптів з ізоляцією сесій
 ## Дата: 2025-12-28

+# Changelog - Anthropic Prompt Caching
+## Дата: 2026-02-25
+## Зміни
+### ⚡ Оптимізація: Anthropic Prompt Caching
+#### Проблема
+Кожен запит до Anthropic API повністю перераховував токени системного промпту та інструктажу, хоча ця частина є статичною між запитами.
+#### Рішення
+**1. Увімкнення автоматичного кешування (`main.py`)**
+Додано параметр `cache_control={"type": "ephemeral"}` на верхній рівень обох Anthropic-викликів:
+- `LLMAnalyzer._analyze_with_anthropic()` — для аналізу прецедентів
+- `generate_legal_position()` — для генерації правових позицій
+API автоматично визначає найдовший відповідний префікс, переміщує точку кешу до останнього кешованого блоку та повторно використовує її на кожному наступному кроці.
+**2. Реструктуризація промпту (`prompts.py`)**
+Змінні частини промпту (`<court_decision>`, `<comment>`) переміщено в кінець `LEGAL_POSITION_PROMPT`:
+```
+До:                          Після:
+<task>          статичний    <task>          статичний
+<court_decision> ЗМІННИЙ     <strategy>      статичний
+<comment>        ЗМІННИЙ     <rules_do>      статичний  ← кешується
+<strategy>      статичний    <rules_dont>    статичний
+<rules_do>      статичний    <output_format> статичний
+<rules_dont>    статичний    ─── точка кешу ───────────
+<output_format> статичний    <court_decision> ЗМІННИЙ
+                             <comment>        ЗМІННИЙ
+```
+Тепер весь статичний інструктаж (~1500 токенів) кешується між запитами. Повторне обчислення лише змінних блоків наприкінці.
+### 📝 Змінені файли
+#### `main.py`
+- `LLMAnalyzer._analyze_with_anthropic()` — додано `cache_control={"type": "ephemeral"}`
+- `generate_legal_position()` (Anthropic branch) — додано `cache_control={"type": "ephemeral"}` в `message_params`
+#### `prompts.py`
+- `LEGAL_POSITION_PROMPT` — переміщено `<court_decision>` та `<comment>` в кінець промпту після `</output_format>`
+**3. OpenAI Prompt Caching (`main.py`)**
+OpenAI кешує автоматично для запитів ≥ 1024 токенів — жодних параметрів API вмикати не потрібно. Реструктуризація промпту (п. 2) вже забезпечує максимальний prefix для cache hit.
+Додано логування cache hits через `usage.prompt_tokens_details.cached_tokens`:
+- `LLMAnalyzer._analyze_with_openai()` — `[CACHE] OpenAI analysis: X/Y input tokens from cache`
+- `generate_legal_position()` (OpenAI branch) — `[CACHE] OpenAI generation: X/Y input tokens from cache`
+### 💰 Очікуваний ефект
+| Провайдер | Механізм | Зниження вартості | Зниження latency |
+|-----------|----------|-------------------|-----------------|
+| Anthropic | `cache_control` (ephemeral) + змінні блоки в кінці | до 90% | до 85% |
+| OpenAI    | автоматичне (≥1024 токенів) + змінні блоки в кінці | до 50% | до 80% |
+---
 # Changelog - Додано редагування промптів з ізоляцією сесій
 ## Дата: 2025-12-28

main.py CHANGED Viewed

@@ -166,30 +166,32 @@ def download_s3_folder(bucket_name: str, prefix: str, local_dir: Path) -> None:
 def initialize_components() -> bool:
     """Initialize all necessary components for the application."""
     try:
         # Create local directory if it doesn't exist
         LOCAL_DIR.mkdir(parents=True, exist_ok=True)
-        # Download index files from S3 only if S3 client is available and local files don't exist
         missing_files = [f for f in REQUIRED_FILES if not (LOCAL_DIR / f).exists()]
         if missing_files:
-            if s3_client:
-                print("Some required files are missing locally. Attempting to download from S3...")
-                download_s3_folder(BUCKET_NAME, PREFIX_RETRIEVER, LOCAL_DIR)
-            else:
-                print(f"Warning: Missing required files and no S3 client available: {', '.join(missing_files)}")
-                print(f"Checking if files exist in {LOCAL_DIR}...")
         else:
             print(f"All required files found locally in {LOCAL_DIR}")
-        if not LOCAL_DIR.exists():
-            raise FileNotFoundError(f"Directory not found: {LOCAL_DIR}")
-        # Check for required files again
         missing_files = [f for f in REQUIRED_FILES if not (LOCAL_DIR / f).exists()]
         if missing_files:
-            raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
         # Initialize search components if any embedding model is available
         if embed_model:
@@ -394,7 +396,13 @@ class LLMAnalyzer:
                         raise last_error
             response_text = response.choices[0].message.content
             # Verify it's valid JSON
             json_data = extract_json_from_text(response_text)
             return json.dumps(json_data, ensure_ascii=False) if json_data else response_text
@@ -465,7 +473,8 @@ class LLMAnalyzer:
                 max_tokens=self.max_tokens or MAX_TOKENS_ANALYSIS,
                 temperature=self.temperature,
                 system=SYSTEM_PROMPT,
-                messages=[{"role": "user", "content": prompt}]
             )
             response_text = response.content[0].text
@@ -837,6 +846,12 @@ def generate_legal_position(
                 response_text = response.choices[0].message.content
                 print(f"[DEBUG] OpenAI response length: {len(response_text) if response_text else 0}")
                 json_response = extract_json_from_text(response_text)
                 if json_response:
@@ -973,7 +988,8 @@ def generate_legal_position(
                 "max_tokens": max_tokens or MAX_TOKENS_CONFIG["anthropic"],
                 "system": system_prompt,
                 "messages": messages,
-                "temperature": temperature
             }
             # Add thinking config if enabled

 def initialize_components() -> bool:
     """Initialize all necessary components for the application."""
+    from index_loader import load_indexes_with_fallback
     try:
         # Create local directory if it doesn't exist
         LOCAL_DIR.mkdir(parents=True, exist_ok=True)
+        # Check if required files are present
         missing_files = [f for f in REQUIRED_FILES if not (LOCAL_DIR / f).exists()]
         if missing_files:
+            print(f"Missing index files: {', '.join(missing_files)}")
+            print(f"Attempting to load indexes via fallback (local → HF Dataset → S3)...")
+            indexes_ok = load_indexes_with_fallback(str(LOCAL_DIR))
+            if not indexes_ok:
+                # Last resort: try S3 directly if client is available
+                if s3_client:
+                    print("Fallback failed, trying S3 directly...")
+                    download_s3_folder(BUCKET_NAME, PREFIX_RETRIEVER, LOCAL_DIR)
+                else:
+                    print(f"Warning: No S3 client and fallback failed for: {', '.join(missing_files)}")
         else:
             print(f"All required files found locally in {LOCAL_DIR}")
+        # Final check
         missing_files = [f for f in REQUIRED_FILES if not (LOCAL_DIR / f).exists()]
         if missing_files:
+            raise FileNotFoundError(f"Missing required files after all attempts: {', '.join(missing_files)}")
         # Initialize search components if any embedding model is available
         if embed_model:
                         raise last_error
             response_text = response.choices[0].message.content
+            # Log cache hit stats (automatic caching, no config needed)
+            if hasattr(response, 'usage') and hasattr(response.usage, 'prompt_tokens_details'):
+                cached = getattr(response.usage.prompt_tokens_details, 'cached_tokens', 0)
+                total = response.usage.prompt_tokens
+                print(f"[CACHE] OpenAI analysis: {cached}/{total} input tokens from cache")
             # Verify it's valid JSON
             json_data = extract_json_from_text(response_text)
             return json.dumps(json_data, ensure_ascii=False) if json_data else response_text
                 max_tokens=self.max_tokens or MAX_TOKENS_ANALYSIS,
                 temperature=self.temperature,
                 system=SYSTEM_PROMPT,
+                messages=[{"role": "user", "content": prompt}],
+                cache_control={"type": "ephemeral"}
             )
             response_text = response.content[0].text
                 response_text = response.choices[0].message.content
                 print(f"[DEBUG] OpenAI response length: {len(response_text) if response_text else 0}")
+                # Log cache hit stats (automatic caching, no config needed)
+                if hasattr(response, 'usage') and hasattr(response.usage, 'prompt_tokens_details'):
+                    cached = getattr(response.usage.prompt_tokens_details, 'cached_tokens', 0)
+                    total = response.usage.prompt_tokens
+                    print(f"[CACHE] OpenAI generation: {cached}/{total} input tokens from cache")
                 json_response = extract_json_from_text(response_text)
                 if json_response:
                 "max_tokens": max_tokens or MAX_TOKENS_CONFIG["anthropic"],
                 "system": system_prompt,
                 "messages": messages,
+                "temperature": temperature,
+                "cache_control": {"type": "ephemeral"}
             }
             # Add thinking config if enabled

prompts.py CHANGED Viewed

@@ -18,14 +18,6 @@ LEGAL_POSITION_PROMPT = """
 правових позицій Верховного Суду (lpd.court.gov.ua).
 </task>
-<court_decision>
-{court_decision_text}
-</court_decision>
-<comment>
-{comment}
-</comment>
 <strategy>
 Постанова Верховного Суду має типову структуру. Для формулювання правової позиції
 зосередься ВИКЛЮЧНО на розділах:
@@ -150,6 +142,14 @@ LEGAL_POSITION_PROMPT = """
 }}
 </output_format>
 """
 PRECEDENT_ANALYSIS_TEMPLATE = PromptTemplate(

 правових позицій Верховного Суду (lpd.court.gov.ua).
 </task>
 <strategy>
 Постанова Верховного Суду має типову структуру. Для формулювання правової позиції
 зосередься ВИКЛЮЧНО на розділах:
 }}
 </output_format>
+<court_decision>
+{court_decision_text}
+</court_decision>
+<comment>
+{comment}
+</comment>
 """
 PRECEDENT_ANALYSIS_TEMPLATE = PromptTemplate(