Spaces:

studzinsky
/

bielik_app_service

Restarting

App Files Files Community

Patryk Studzinski commited on 12 days ago

Commit

14fc89e

1 Parent(s): 6cc98f9

Improve Polish grammar in infill prompt + remove debug logs

Browse files

Files changed (3) hide show

app/domains/cars/prompts.py +5 -1
app/logic/infill_utils.py +5 -54
app/main.py +0 -22

app/domains/cars/prompts.py CHANGED Viewed

@@ -54,10 +54,14 @@ Każda luka powinna być uzupełniona słowem lub krótką frazą (1-4 słowa).
 ZASADY:
 1. NIE zmieniaj żadnego tekstu poza lukami [GAP:n]
-2. Uzupełnienia muszą pasować kontekstowo i gramatycznie
 3. Używaj słownictwa typowego dla ogłoszeń motoryzacyjnych
 4. Dla każdej luki podaj {options.top_n_per_gap} alternatywnych propozycji
 WYMAGANY FORMAT ODPOWIEDZI (tylko JSON, bez komentarzy):
 {{
     "filled_text": "Pełny tekst z uzupełnionymi lukami",

 ZASADY:
 1. NIE zmieniaj żadnego tekstu poza lukami [GAP:n]
+2. Uzupełnienia muszą pasować kontekstowo i BARDZO WAŻNE - Gramatycznie
 3. Używaj słownictwa typowego dla ogłoszeń motoryzacyjnych
 4. Dla każdej luki podaj {options.top_n_per_gap} alternatywnych propozycji
+GRAMATYKA POLSKA - BARDZO WAŻNE:
+- Uzupełnienia MUSZĄ być w odpowiednim przypadku gramatycznym (deklinacja)!
+- Dopasuj formę do kontekstu zdania!
 WYMAGANY FORMAT ODPOWIEDZI (tylko JSON, bez komentarzy):
 {{
     "filled_text": "Pełny tekst z uzupełnionymi lukami",

app/logic/infill_utils.py CHANGED Viewed

@@ -89,7 +89,7 @@ def detect_gaps(text: str, notation: str = "auto") -> List[GapInfo]:
     return gaps
-def parse_infill_json(raw_output: str, debug: bool = True) -> Optional[dict]:
     """
     Extract and parse JSON from LLM output.
@@ -99,45 +99,22 @@ def parse_infill_json(raw_output: str, debug: bool = True) -> Optional[dict]:
     - Function-call style wrapper ({"name": "...", "arguments": {...}})
     - Double-escaped JSON strings in arguments field
     - Minor formatting issues
-    Args:
-        raw_output: Raw text from LLM
-        debug: If True, print debug info to logs
     Returns:
         Parsed dict with 'filled_text' and 'gaps' keys, or None if parsing fails
-    Expected JSON format:
-    {
-        "filled_text": "Complete text with gaps filled",
-        "gaps": [
-            {"index": 1, "marker": "[GAP:1]", "choice": "word", "alternatives": ["alt1", "alt2"]}
-        ]
-    }
     """
     if not raw_output:
-        if debug:
-            print("[INFILL_PARSER] Empty raw_output received")
         return None
-    if debug:
-        print(f"[INFILL_PARSER] Raw output length: {len(raw_output)}")
-        print(f"[INFILL_PARSER] Raw output preview: {raw_output[:500]}...")
     # Try to extract JSON from markdown code blocks
     json_block_pattern = r'```(?:json)?\s*([\s\S]*?)\s*```'
     match = re.search(json_block_pattern, raw_output)
     if match:
         raw_output = match.group(1)
-        if debug:
-            print("[INFILL_PARSER] Extracted from markdown code block")
-    # Try to find JSON object boundaries
-    # Look for the outermost { }
     start_idx = raw_output.find('{')
     if start_idx == -1:
-        if debug:
-            print("[INFILL_PARSER] No JSON object found (no opening brace)")
         return None
     # Find matching closing brace
@@ -153,45 +130,26 @@ def parse_infill_json(raw_output: str, debug: bool = True) -> Optional[dict]:
                 break
     if end_idx == -1:
-        if debug:
-            print("[INFILL_PARSER] No matching closing brace found")
         return None
     json_str = raw_output[start_idx:end_idx]
-    if debug:
-        print(f"[INFILL_PARSER] Extracted JSON string: {json_str[:300]}...")
     try:
         parsed = json.loads(json_str)
-        if debug:
-            print(f"[INFILL_PARSER] Parsed keys: {list(parsed.keys())}")
         # Handle function-call style wrapper with STRING arguments (double-escaped):
         # {"name": "fill_in_text", "arguments": "{\"filled_text\": \"...\"}"}
         if 'arguments' in parsed:
             args = parsed['arguments']
             if isinstance(args, str):
-                # Arguments is a JSON string - parse it
-                if debug:
-                    print(f"[INFILL_PARSER] Arguments is a string, parsing inner JSON...")
-                    print(f"[INFILL_PARSER] Arguments string preview: {args[:200]}...")
                 try:
                     parsed = json.loads(args)
-                    if debug:
-                        print(f"[INFILL_PARSER] Successfully parsed inner JSON, keys: {list(parsed.keys())}")
-                except json.JSONDecodeError as e:
-                    if debug:
-                        print(f"[INFILL_PARSER] Failed to parse inner JSON string: {e}")
                     return None
             elif isinstance(args, dict):
-                # Arguments is already a dict
                 parsed = args
-                if debug:
-                    print("[INFILL_PARSER] Arguments is already a dict")
-        # Also handle: {"name": "...", "parameters": {...}} or string parameters
         if 'parameters' in parsed:
             params = parsed['parameters']
             if isinstance(params, str):
@@ -204,17 +162,10 @@ def parse_infill_json(raw_output: str, debug: bool = True) -> Optional[dict]:
         # Validate required fields
         if 'filled_text' not in parsed and 'gaps' not in parsed:
-            if debug:
-                print(f"[INFILL_PARSER] Missing required fields. Found: {list(parsed.keys())}")
             return None
-        if debug:
-            print(f"[INFILL_PARSER] Success! filled_text present: {'filled_text' in parsed}, gaps count: {len(parsed.get('gaps', []))}")
         return parsed
-    except json.JSONDecodeError as e:
-        if debug:
-            print(f"[INFILL_PARSER] JSON decode error: {e}")
         return None

     return gaps
+def parse_infill_json(raw_output: str) -> Optional[dict]:
     """
     Extract and parse JSON from LLM output.
     - Function-call style wrapper ({"name": "...", "arguments": {...}})
     - Double-escaped JSON strings in arguments field
     - Minor formatting issues
     Returns:
         Parsed dict with 'filled_text' and 'gaps' keys, or None if parsing fails
     """
     if not raw_output:
         return None
     # Try to extract JSON from markdown code blocks
     json_block_pattern = r'```(?:json)?\s*([\s\S]*?)\s*```'
     match = re.search(json_block_pattern, raw_output)
     if match:
         raw_output = match.group(1)
+    # Find JSON object boundaries
     start_idx = raw_output.find('{')
     if start_idx == -1:
         return None
     # Find matching closing brace
                 break
     if end_idx == -1:
         return None
     json_str = raw_output[start_idx:end_idx]
     try:
         parsed = json.loads(json_str)
         # Handle function-call style wrapper with STRING arguments (double-escaped):
         # {"name": "fill_in_text", "arguments": "{\"filled_text\": \"...\"}"}
         if 'arguments' in parsed:
             args = parsed['arguments']
             if isinstance(args, str):
                 try:
                     parsed = json.loads(args)
+                except json.JSONDecodeError:
                     return None
             elif isinstance(args, dict):
                 parsed = args
+        # Also handle: {"name": "...", "parameters": {...}}
         if 'parameters' in parsed:
             params = parsed['parameters']
             if isinstance(params, str):
         # Validate required fields
         if 'filled_text' not in parsed and 'gaps' not in parsed:
             return None
         return parsed
+    except json.JSONDecodeError:
         return None

app/main.py CHANGED Viewed

@@ -393,17 +393,11 @@ async def process_infill_item(
     Returns InfillResult with status, filled_text, and gaps.
     """
     try:
-        print(f"\n{'='*60}")
-        print(f"[INFILL] Processing item id={item.id} with model={model_name}")
-        print(f"[INFILL] Input text: {item.text_with_gaps[:200]}...")
         # Normalize gaps to [GAP:n] format
         normalized_text, gaps = normalize_gaps_to_tagged(item.text_with_gaps)
-        print(f"[INFILL] Detected {len(gaps)} gaps: {gaps}")
         if not gaps:
             # No gaps found, return original text
-            print("[INFILL] No gaps found, returning original text")
             return InfillResult(
                 id=item.id,
                 status="ok",
@@ -414,9 +408,6 @@ async def process_infill_item(
         # Build prompt
         chat_messages = create_infill_prompt(normalized_text, options)
-        print(f"[INFILL] Prompt messages: {len(chat_messages)} messages")
-        for i, msg in enumerate(chat_messages):
-            print(f"[INFILL] Message {i} ({msg.get('role', 'unknown')}): {str(msg.get('content', ''))[:300]}...")
         # Generate
         llm = await registry.get_model(model_name)
@@ -427,16 +418,11 @@ async def process_infill_item(
             top_p=0.9,
         )
-        print(f"[INFILL] Raw model output ({len(raw_output)} chars):")
-        print(f"[INFILL] {raw_output}")
         # Parse JSON from output
         parsed = parse_infill_json(raw_output)
-        print(f"[INFILL] Parsed result: {parsed}")
         if not parsed:
             # JSON parsing failed
-            print(f"[INFILL] ERROR: JSON parsing failed!")
             return InfillResult(
                 id=item.id,
                 status="error",
@@ -459,16 +445,11 @@ async def process_infill_item(
             gap_fills.append(gap_fill)
             fills_dict[gap_fill.index] = gap_fill.choice
-        print(f"[INFILL] Extracted {len(gap_fills)} gap fills")
         # Get filled text - prefer model's version, fallback to reconstruction
         filled_text = parsed.get("filled_text")
         if not filled_text and fills_dict:
             filled_text = apply_fills(normalized_text, gaps, fills_dict)
-        print(f"[INFILL] Final filled_text: {filled_text[:200] if filled_text else 'None'}...")
-        print(f"[INFILL] Success for item {item.id}")
         return InfillResult(
             id=item.id,
             status="ok",
@@ -478,9 +459,6 @@ async def process_infill_item(
         )
     except Exception as e:
-        import traceback
-        print(f"[INFILL] EXCEPTION: {str(e)}")
-        print(f"[INFILL] Traceback: {traceback.format_exc()}")
         return InfillResult(
             id=item.id,
             status="error",

     Returns InfillResult with status, filled_text, and gaps.
     """
     try:
         # Normalize gaps to [GAP:n] format
         normalized_text, gaps = normalize_gaps_to_tagged(item.text_with_gaps)
         if not gaps:
             # No gaps found, return original text
             return InfillResult(
                 id=item.id,
                 status="ok",
         # Build prompt
         chat_messages = create_infill_prompt(normalized_text, options)
         # Generate
         llm = await registry.get_model(model_name)
             top_p=0.9,
         )
         # Parse JSON from output
         parsed = parse_infill_json(raw_output)
         if not parsed:
             # JSON parsing failed
             return InfillResult(
                 id=item.id,
                 status="error",
             gap_fills.append(gap_fill)
             fills_dict[gap_fill.index] = gap_fill.choice
         # Get filled text - prefer model's version, fallback to reconstruction
         filled_text = parsed.get("filled_text")
         if not filled_text and fills_dict:
             filled_text = apply_fills(normalized_text, gaps, fills_dict)
         return InfillResult(
             id=item.id,
             status="ok",
         )
     except Exception as e:
         return InfillResult(
             id=item.id,
             status="error",