Spaces:

studzinsky
/

bielik_app_service

Sleeping

Patryk Studzinski commited on Dec 29, 2025

Commit

068583f

1 Parent(s): c50ae32

refine GBNF grammar for car advertisement; ensure compact JSON output and improve gap-item structure

enhance infill utilities to repair truncated JSON responses; extract individual gap items from incomplete data

Files changed (2) hide show

app/logic/answers.gbnf CHANGED Viewed

@@ -1,16 +1,15 @@
 # GBNF Grammar for Car Advertisement Gap Filling
-# Forces model to output valid JSON with gap fills
-# Supports 1-10 gaps with Polish characters
-root ::= "{" ws "\"gaps\":" ws "[" ws gap-list ws "]" ws "}"
-gap-list ::= gap-item (ws "," ws gap-item)*
-gap-item ::= "{" ws "\"index\":" ws number ws "," ws "\"choice\":" ws "\"" phrase "\"" ws "}"
-# Allow words with Polish characters, numbers, spaces
 phrase ::= word (space word){0,4}
 word ::= [a-zA-ZżźćńółęąśŻŹĆŃÓŁĘĄŚ0-9.,%-]+
 space ::= " "
-number ::= [0-9]+
-ws ::= [ \t\n]*

 # GBNF Grammar for Car Advertisement Gap Filling
+# Forces model to output COMPACT valid JSON with gap fills
+# No whitespace/newlines to minimize token count
+root ::= "{\"gaps\":[" gap-list "]}"
+gap-list ::= gap-item ("," gap-item)*
+gap-item ::= "{\"index\":" number ",\"choice\":\"" phrase "\"}"
+# Allow words with Polish characters, numbers, spaces (max 5 words)
 phrase ::= word (space word){0,4}
 word ::= [a-zA-ZżźćńółęąśŻŹĆŃÓŁĘĄŚ0-9.,%-]+
 space ::= " "
+number ::= [1-9][0-9]*

app/logic/infill_utils.py CHANGED Viewed

@@ -159,7 +159,26 @@ def parse_infill_response(raw_output: str) -> Optional[dict]:
                 return parsed
             except json.JSONDecodeError:
-                pass # Fall through to return None
     return None

                 return parsed
             except json.JSONDecodeError:
+                pass # Fall through to try repair
+    # Attempt 3: Repair truncated JSON (grammar output cut off by max_tokens)
+    # Extract individual gap items even if JSON is incomplete
+    gap_pattern = r'\{\s*"index"\s*:\s*(\d+)\s*,\s*"choice"\s*:\s*"([^"]+)"'
+    gap_matches = list(re.finditer(gap_pattern, raw_output))
+    if gap_matches:
+        for match in gap_matches:
+            index = int(match.group(1))
+            choice = match.group(2).strip()
+            gaps_list.append({
+                "index": index,
+                "choice": choice
+            })
+        return {
+            "filled_text": None,
+            "gaps": gaps_list
+        }
     return None