Patryk Studzinski commited on
Commit
14fc89e
·
1 Parent(s): 6cc98f9

Improve Polish grammar in infill prompt + remove debug logs

Browse files
app/domains/cars/prompts.py CHANGED
@@ -54,10 +54,14 @@ Każda luka powinna być uzupełniona słowem lub krótką frazą (1-4 słowa).
54
 
55
  ZASADY:
56
  1. NIE zmieniaj żadnego tekstu poza lukami [GAP:n]
57
- 2. Uzupełnienia muszą pasować kontekstowo i gramatycznie
58
  3. Używaj słownictwa typowego dla ogłoszeń motoryzacyjnych
59
  4. Dla każdej luki podaj {options.top_n_per_gap} alternatywnych propozycji
60
 
 
 
 
 
61
  WYMAGANY FORMAT ODPOWIEDZI (tylko JSON, bez komentarzy):
62
  {{
63
  "filled_text": "Pełny tekst z uzupełnionymi lukami",
 
54
 
55
  ZASADY:
56
  1. NIE zmieniaj żadnego tekstu poza lukami [GAP:n]
57
+ 2. Uzupełnienia muszą pasować kontekstowo i BARDZO WAŻNE - Gramatycznie
58
  3. Używaj słownictwa typowego dla ogłoszeń motoryzacyjnych
59
  4. Dla każdej luki podaj {options.top_n_per_gap} alternatywnych propozycji
60
 
61
+ GRAMATYKA POLSKA - BARDZO WAŻNE:
62
+ - Uzupełnienia MUSZĄ być w odpowiednim przypadku gramatycznym (deklinacja)!
63
+ - Dopasuj formę do kontekstu zdania!
64
+
65
  WYMAGANY FORMAT ODPOWIEDZI (tylko JSON, bez komentarzy):
66
  {{
67
  "filled_text": "Pełny tekst z uzupełnionymi lukami",
app/logic/infill_utils.py CHANGED
@@ -89,7 +89,7 @@ def detect_gaps(text: str, notation: str = "auto") -> List[GapInfo]:
89
  return gaps
90
 
91
 
92
- def parse_infill_json(raw_output: str, debug: bool = True) -> Optional[dict]:
93
  """
94
  Extract and parse JSON from LLM output.
95
 
@@ -99,45 +99,22 @@ def parse_infill_json(raw_output: str, debug: bool = True) -> Optional[dict]:
99
  - Function-call style wrapper ({"name": "...", "arguments": {...}})
100
  - Double-escaped JSON strings in arguments field
101
  - Minor formatting issues
102
-
103
- Args:
104
- raw_output: Raw text from LLM
105
- debug: If True, print debug info to logs
106
 
107
  Returns:
108
  Parsed dict with 'filled_text' and 'gaps' keys, or None if parsing fails
109
-
110
- Expected JSON format:
111
- {
112
- "filled_text": "Complete text with gaps filled",
113
- "gaps": [
114
- {"index": 1, "marker": "[GAP:1]", "choice": "word", "alternatives": ["alt1", "alt2"]}
115
- ]
116
- }
117
  """
118
  if not raw_output:
119
- if debug:
120
- print("[INFILL_PARSER] Empty raw_output received")
121
  return None
122
 
123
- if debug:
124
- print(f"[INFILL_PARSER] Raw output length: {len(raw_output)}")
125
- print(f"[INFILL_PARSER] Raw output preview: {raw_output[:500]}...")
126
-
127
  # Try to extract JSON from markdown code blocks
128
  json_block_pattern = r'```(?:json)?\s*([\s\S]*?)\s*```'
129
  match = re.search(json_block_pattern, raw_output)
130
  if match:
131
  raw_output = match.group(1)
132
- if debug:
133
- print("[INFILL_PARSER] Extracted from markdown code block")
134
 
135
- # Try to find JSON object boundaries
136
- # Look for the outermost { }
137
  start_idx = raw_output.find('{')
138
  if start_idx == -1:
139
- if debug:
140
- print("[INFILL_PARSER] No JSON object found (no opening brace)")
141
  return None
142
 
143
  # Find matching closing brace
@@ -153,45 +130,26 @@ def parse_infill_json(raw_output: str, debug: bool = True) -> Optional[dict]:
153
  break
154
 
155
  if end_idx == -1:
156
- if debug:
157
- print("[INFILL_PARSER] No matching closing brace found")
158
  return None
159
 
160
  json_str = raw_output[start_idx:end_idx]
161
 
162
- if debug:
163
- print(f"[INFILL_PARSER] Extracted JSON string: {json_str[:300]}...")
164
-
165
  try:
166
  parsed = json.loads(json_str)
167
 
168
- if debug:
169
- print(f"[INFILL_PARSER] Parsed keys: {list(parsed.keys())}")
170
-
171
  # Handle function-call style wrapper with STRING arguments (double-escaped):
172
  # {"name": "fill_in_text", "arguments": "{\"filled_text\": \"...\"}"}
173
  if 'arguments' in parsed:
174
  args = parsed['arguments']
175
  if isinstance(args, str):
176
- # Arguments is a JSON string - parse it
177
- if debug:
178
- print(f"[INFILL_PARSER] Arguments is a string, parsing inner JSON...")
179
- print(f"[INFILL_PARSER] Arguments string preview: {args[:200]}...")
180
  try:
181
  parsed = json.loads(args)
182
- if debug:
183
- print(f"[INFILL_PARSER] Successfully parsed inner JSON, keys: {list(parsed.keys())}")
184
- except json.JSONDecodeError as e:
185
- if debug:
186
- print(f"[INFILL_PARSER] Failed to parse inner JSON string: {e}")
187
  return None
188
  elif isinstance(args, dict):
189
- # Arguments is already a dict
190
  parsed = args
191
- if debug:
192
- print("[INFILL_PARSER] Arguments is already a dict")
193
 
194
- # Also handle: {"name": "...", "parameters": {...}} or string parameters
195
  if 'parameters' in parsed:
196
  params = parsed['parameters']
197
  if isinstance(params, str):
@@ -204,17 +162,10 @@ def parse_infill_json(raw_output: str, debug: bool = True) -> Optional[dict]:
204
 
205
  # Validate required fields
206
  if 'filled_text' not in parsed and 'gaps' not in parsed:
207
- if debug:
208
- print(f"[INFILL_PARSER] Missing required fields. Found: {list(parsed.keys())}")
209
  return None
210
-
211
- if debug:
212
- print(f"[INFILL_PARSER] Success! filled_text present: {'filled_text' in parsed}, gaps count: {len(parsed.get('gaps', []))}")
213
 
214
  return parsed
215
- except json.JSONDecodeError as e:
216
- if debug:
217
- print(f"[INFILL_PARSER] JSON decode error: {e}")
218
  return None
219
 
220
 
 
89
  return gaps
90
 
91
 
92
+ def parse_infill_json(raw_output: str) -> Optional[dict]:
93
  """
94
  Extract and parse JSON from LLM output.
95
 
 
99
  - Function-call style wrapper ({"name": "...", "arguments": {...}})
100
  - Double-escaped JSON strings in arguments field
101
  - Minor formatting issues
 
 
 
 
102
 
103
  Returns:
104
  Parsed dict with 'filled_text' and 'gaps' keys, or None if parsing fails
 
 
 
 
 
 
 
 
105
  """
106
  if not raw_output:
 
 
107
  return None
108
 
 
 
 
 
109
  # Try to extract JSON from markdown code blocks
110
  json_block_pattern = r'```(?:json)?\s*([\s\S]*?)\s*```'
111
  match = re.search(json_block_pattern, raw_output)
112
  if match:
113
  raw_output = match.group(1)
 
 
114
 
115
+ # Find JSON object boundaries
 
116
  start_idx = raw_output.find('{')
117
  if start_idx == -1:
 
 
118
  return None
119
 
120
  # Find matching closing brace
 
130
  break
131
 
132
  if end_idx == -1:
 
 
133
  return None
134
 
135
  json_str = raw_output[start_idx:end_idx]
136
 
 
 
 
137
  try:
138
  parsed = json.loads(json_str)
139
 
 
 
 
140
  # Handle function-call style wrapper with STRING arguments (double-escaped):
141
  # {"name": "fill_in_text", "arguments": "{\"filled_text\": \"...\"}"}
142
  if 'arguments' in parsed:
143
  args = parsed['arguments']
144
  if isinstance(args, str):
 
 
 
 
145
  try:
146
  parsed = json.loads(args)
147
+ except json.JSONDecodeError:
 
 
 
 
148
  return None
149
  elif isinstance(args, dict):
 
150
  parsed = args
 
 
151
 
152
+ # Also handle: {"name": "...", "parameters": {...}}
153
  if 'parameters' in parsed:
154
  params = parsed['parameters']
155
  if isinstance(params, str):
 
162
 
163
  # Validate required fields
164
  if 'filled_text' not in parsed and 'gaps' not in parsed:
 
 
165
  return None
 
 
 
166
 
167
  return parsed
168
+ except json.JSONDecodeError:
 
 
169
  return None
170
 
171
 
app/main.py CHANGED
@@ -393,17 +393,11 @@ async def process_infill_item(
393
  Returns InfillResult with status, filled_text, and gaps.
394
  """
395
  try:
396
- print(f"\n{'='*60}")
397
- print(f"[INFILL] Processing item id={item.id} with model={model_name}")
398
- print(f"[INFILL] Input text: {item.text_with_gaps[:200]}...")
399
-
400
  # Normalize gaps to [GAP:n] format
401
  normalized_text, gaps = normalize_gaps_to_tagged(item.text_with_gaps)
402
- print(f"[INFILL] Detected {len(gaps)} gaps: {gaps}")
403
 
404
  if not gaps:
405
  # No gaps found, return original text
406
- print("[INFILL] No gaps found, returning original text")
407
  return InfillResult(
408
  id=item.id,
409
  status="ok",
@@ -414,9 +408,6 @@ async def process_infill_item(
414
 
415
  # Build prompt
416
  chat_messages = create_infill_prompt(normalized_text, options)
417
- print(f"[INFILL] Prompt messages: {len(chat_messages)} messages")
418
- for i, msg in enumerate(chat_messages):
419
- print(f"[INFILL] Message {i} ({msg.get('role', 'unknown')}): {str(msg.get('content', ''))[:300]}...")
420
 
421
  # Generate
422
  llm = await registry.get_model(model_name)
@@ -427,16 +418,11 @@ async def process_infill_item(
427
  top_p=0.9,
428
  )
429
 
430
- print(f"[INFILL] Raw model output ({len(raw_output)} chars):")
431
- print(f"[INFILL] {raw_output}")
432
-
433
  # Parse JSON from output
434
  parsed = parse_infill_json(raw_output)
435
- print(f"[INFILL] Parsed result: {parsed}")
436
 
437
  if not parsed:
438
  # JSON parsing failed
439
- print(f"[INFILL] ERROR: JSON parsing failed!")
440
  return InfillResult(
441
  id=item.id,
442
  status="error",
@@ -459,16 +445,11 @@ async def process_infill_item(
459
  gap_fills.append(gap_fill)
460
  fills_dict[gap_fill.index] = gap_fill.choice
461
 
462
- print(f"[INFILL] Extracted {len(gap_fills)} gap fills")
463
-
464
  # Get filled text - prefer model's version, fallback to reconstruction
465
  filled_text = parsed.get("filled_text")
466
  if not filled_text and fills_dict:
467
  filled_text = apply_fills(normalized_text, gaps, fills_dict)
468
 
469
- print(f"[INFILL] Final filled_text: {filled_text[:200] if filled_text else 'None'}...")
470
- print(f"[INFILL] Success for item {item.id}")
471
-
472
  return InfillResult(
473
  id=item.id,
474
  status="ok",
@@ -478,9 +459,6 @@ async def process_infill_item(
478
  )
479
 
480
  except Exception as e:
481
- import traceback
482
- print(f"[INFILL] EXCEPTION: {str(e)}")
483
- print(f"[INFILL] Traceback: {traceback.format_exc()}")
484
  return InfillResult(
485
  id=item.id,
486
  status="error",
 
393
  Returns InfillResult with status, filled_text, and gaps.
394
  """
395
  try:
 
 
 
 
396
  # Normalize gaps to [GAP:n] format
397
  normalized_text, gaps = normalize_gaps_to_tagged(item.text_with_gaps)
 
398
 
399
  if not gaps:
400
  # No gaps found, return original text
 
401
  return InfillResult(
402
  id=item.id,
403
  status="ok",
 
408
 
409
  # Build prompt
410
  chat_messages = create_infill_prompt(normalized_text, options)
 
 
 
411
 
412
  # Generate
413
  llm = await registry.get_model(model_name)
 
418
  top_p=0.9,
419
  )
420
 
 
 
 
421
  # Parse JSON from output
422
  parsed = parse_infill_json(raw_output)
 
423
 
424
  if not parsed:
425
  # JSON parsing failed
 
426
  return InfillResult(
427
  id=item.id,
428
  status="error",
 
445
  gap_fills.append(gap_fill)
446
  fills_dict[gap_fill.index] = gap_fill.choice
447
 
 
 
448
  # Get filled text - prefer model's version, fallback to reconstruction
449
  filled_text = parsed.get("filled_text")
450
  if not filled_text and fills_dict:
451
  filled_text = apply_fills(normalized_text, gaps, fills_dict)
452
 
 
 
 
453
  return InfillResult(
454
  id=item.id,
455
  status="ok",
 
459
  )
460
 
461
  except Exception as e:
 
 
 
462
  return InfillResult(
463
  id=item.id,
464
  status="error",