bielik_app_service / app /logic /grammar_utils.py
Patryk Studzinski
add GBNF grammar utilities for structured LLM output; integrate grammar in model generation
329abd1
"""
GBNF Grammar utilities for constrained LLM output.
Uses llama.cpp grammar feature to force valid JSON output,
dramatically speeding up generation and ensuring parseability.
"""
from typing import Optional
def create_infill_grammar(num_gaps: int) -> str:
"""
Create a GBNF grammar that forces the model to output valid JSON
with exactly num_gaps gap fills.
Example output for 3 gaps:
{"gaps": [{"index": 1, "choice": "czerwony"}, {"index": 2, "choice": "diesel"}, {"index": 3, "choice": "niski"}]}
Args:
num_gaps: Number of gaps to fill (1-10)
Returns:
GBNF grammar string
"""
if num_gaps < 1:
num_gaps = 1
if num_gaps > 10:
num_gaps = 10
# Build the gap items part dynamically
gap_items = " \",\" ws ".join([f"gap{i}" for i in range(1, num_gaps + 1)])
# Build individual gap rules
gap_rules = []
for i in range(1, num_gaps + 1):
gap_rules.append(f'gap{i} ::= "{{" ws "\\"index\\": {i}, \\"choice\\": \\"" phrase "\\"" ws "}}"')
grammar = f'''root ::= "{{" ws "\\"gaps\\": [" ws {gap_items} ws "]" ws "}}"
{chr(10).join(gap_rules)}
# Allow words, numbers, spaces, and common Polish characters
phrase ::= (word (space word)*)?
word ::= [a-zA-ZżźćńółęąśŻŹĆŃÓŁĘĄŚ0-9.,%-]+
space ::= " "
ws ::= [ \\t\\n]*
'''
return grammar
def create_single_word_grammar() -> str:
"""
Create a grammar for single-word/phrase output (for per-gap approach).
Forces model to output just a word or short phrase, nothing else.
Returns:
GBNF grammar string
"""
return '''root ::= phrase
phrase ::= word (space word){0,4}
word ::= [a-zA-ZżźćńółęąśŻŹĆŃÓŁĘĄŚ0-9.,%-]+
space ::= " "
'''
# Pre-generate common grammars for caching
GRAMMAR_CACHE = {
i: create_infill_grammar(i) for i in range(1, 11)
}
def get_infill_grammar(num_gaps: int) -> str:
"""Get cached grammar or generate new one."""
if num_gaps in GRAMMAR_CACHE:
return GRAMMAR_CACHE[num_gaps]
return create_infill_grammar(num_gaps)