| import json |
| import unicodedata |
| from llama_cpp import Llama |
|
|
| |
| |
| |
| GGUF_PATH = "./football-extractor-q4.gguf" |
|
|
| SYSTEM_PROMPT = ( |
| "You are a football data extraction assistant. " |
| "Extract structured data from the message and return ONLY a valid JSON array. " |
| "Each object in the array must have exactly these keys: " |
| "league, team_1, team_2, prediction, date, odds. " |
| "If a field is missing, use null. No extra text, no markdown." |
| ) |
|
|
| |
| |
| |
| llm = Llama( |
| model_path=GGUF_PATH, |
| n_ctx=2048, |
| n_gpu_layers=-1, |
| verbose=False, |
| ) |
| print("โ
Model loaded") |
|
|
| |
| |
| |
| def clean_input(text: str) -> str: |
| """Strip bold unicode characters (e.g. Telegram bold).""" |
| return ''.join( |
| c for c in unicodedata.normalize('NFKD', text) |
| if not unicodedata.combining(c) |
| ) |
|
|
| def fix_keys(results: list) -> list: |
| """Fix 'match' key โ team_1 / team_2 if model returns it.""" |
| for item in results: |
| if "match" in item and "team_1" not in item: |
| parts = item.pop("match").split(" - ", 1) |
| item["team_1"] = parts[0].strip() if len(parts) > 0 else None |
| item["team_2"] = parts[1].strip() if len(parts) > 1 else None |
| return results |
|
|
| def normalize(result: list) -> list: |
| keys = ["league", "team_1", "team_2", "prediction", "date", "odds"] |
| if result and not isinstance(result[0], (dict, list)): |
| return [dict(zip(keys, result))] |
| normalized = [] |
| for item in result: |
| if isinstance(item, str): |
| try: |
| item = json.loads(item) |
| except: |
| continue |
| if isinstance(item, list): |
| item = dict(zip(keys, item)) |
| if isinstance(item, dict): |
| normalized.append(item) |
| return normalized |
|
|
| |
| |
| |
| def extract(text: str, debug: bool = False) -> list: |
| text = clean_input(text) |
|
|
| response = llm.create_chat_completion( |
| messages=[ |
| {"role": "system", "content": SYSTEM_PROMPT}, |
| {"role": "user", "content": text}, |
| ], |
| temperature=0.0, |
| max_tokens=512, |
| stop=["<|im_end|>", "<|endoftext|>"], |
| ) |
|
|
| raw = response["choices"][0]["message"]["content"].strip() |
|
|
| if debug: |
| print(f"[raw] {repr(raw)}") |
|
|
| try: |
| result = json.loads(raw) |
| result = normalize(result if isinstance(result, list) else [result]) |
| result = fix_keys(result) |
| return result |
| except json.JSONDecodeError: |
| print(f"[!] Could not parse JSON:\n{raw}") |
| return [] |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| tests = [ |
| |
| """โฝ๏ธ Prediction of the Day โฝ๏ธ |
| Date: 24/03/2026 |
| League: Eerste divisie Netherlands |
| Match: FC Emmen - SC Cambuur |
| Kick off: 20:00 WAT |
| โ
Over 1.5 |
| โ
Odds @1.13 on BETANO""", |
|
|
| |
| """โฝ๏ธ ๐๐ซ๐๐๐ข๐๐ญ๐ข๐จ๐ง ๐จ๐ ๐ญ๐ก๐ ๐๐๐ฒ โฝ๏ธ |
| ๐๐๐ญ๐: 24/03/2026 |
| ๐๐๐๐ ๐ฎ๐: League 1 England |
| ๐๐๐ญ๐๐ก: Doncaster Rovers - Port Vale |
| ๐๐ข๐๐ค ๐จ๐๐: 20:45 WAT |
| โ
Under 3.5 |
| โ
Odds @1.36 on BETANO |
| โฝ๏ธ ๐๐ผ๐ผ๐๐ฏ๐ฎ๐น๐น ๐ง๐ถ๐ฝ ๐ฎ โฝ๏ธ |
| ๐๐๐ญ๐: 24/03/2026 |
| ๐๐๐๐ ๐ฎ๐: La Liga |
| ๐๐๐ญ๐๐ก: Real Madrid - Barcelona |
| ๐๐ข๐๐ค ๐จ๐๐: 21:00 WAT |
| โ
1X |
| โ
Odds @1.42 on BETANO""", |
|
|
| |
| """wow predictions |
| MATCH: Juventus VS Napoli |
| League: Serie A |
| we forecast Over 2.5 |
| Odds 1.75""", |
| ] |
|
|
| for i, test in enumerate(tests, 1): |
| print(f"\n{'='*50}") |
| print(f"TEST {i}: {test[:80]}...") |
| result = extract(test) |
| print(json.dumps(result, indent=2, ensure_ascii=False)) |
|
|