Upload inference_local.py with huggingface_hub

6f03249 verified 11 days ago

5.21 kB

	import json
	import unicodedata
	from llama_cpp import Llama

	# ─────────────────────────────────────────────
	# CONFIG
	# ─────────────────────────────────────────────
	GGUF_PATH = "./football-extractor-q4.gguf"

	SYSTEM_PROMPT = (
	"You are a football data extraction assistant. "
	"Extract structured data from the message and return ONLY a valid JSON array. "
	"Each object in the array must have exactly these keys: "
	"league, team_1, team_2, prediction, date, odds. "
	"If a field is missing, use null. No extra text, no markdown."
	)

	# ─────────────────────────────────────────────
	# LOAD MODEL (runs on Mac Metal / CPU)
	# ─────────────────────────────────────────────
	llm = Llama(
	model_path=GGUF_PATH,
	n_ctx=2048, # context window
	n_gpu_layers=-1, # offload all layers to Metal GPU
	verbose=False,
	)
	print("✅ Model loaded")

	# ─────────────────────────────────────────────
	# HELPERS
	# ─────────────────────────────────────────────
	def clean_input(text: str) -> str:
	"""Strip bold unicode characters (e.g. Telegram bold)."""
	return ''.join(
	c for c in unicodedata.normalize('NFKD', text)
	if not unicodedata.combining(c)
	)

	def fix_keys(results: list) -> list:
	"""Fix 'match' key → team_1 / team_2 if model returns it."""
	for item in results:
	if "match" in item and "team_1" not in item:
	parts = item.pop("match").split(" - ", 1)
	item["team_1"] = parts[0].strip() if len(parts) > 0 else None
	item["team_2"] = parts[1].strip() if len(parts) > 1 else None
	return results

	def normalize(result: list) -> list:
	keys = ["league", "team_1", "team_2", "prediction", "date", "odds"]
	if result and not isinstance(result[0], (dict, list)):
	return [dict(zip(keys, result))]
	normalized = []
	for item in result:
	if isinstance(item, str):
	try:
	item = json.loads(item)
	except:
	continue
	if isinstance(item, list):
	item = dict(zip(keys, item))
	if isinstance(item, dict):
	normalized.append(item)
	return normalized

	# ─────────────────────────────────────────────
	# INFERENCE
	# ─────────────────────────────────────────────
	def extract(text: str, debug: bool = False) -> list:
	text = clean_input(text)

	response = llm.create_chat_completion(
	messages=[
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": text},
	],
	temperature=0.0,
	max_tokens=512,
	stop=["<\|im_end\|>", "<\|endoftext\|>"],
	)

	raw = response["choices"][0]["message"]["content"].strip()

	if debug:
	print(f"[raw] {repr(raw)}")

	try:
	result = json.loads(raw)
	result = normalize(result if isinstance(result, list) else [result])
	result = fix_keys(result)
	return result
	except json.JSONDecodeError:
	print(f"[!] Could not parse JSON:\n{raw}")
	return []

	# ─────────────────────────────────────────────
	# TEST
	# ─────────────────────────────────────────────
	if __name__ == "__main__":
	tests = [
	# single tip
	"""⚽️ Prediction of the Day ⚽️
	Date: 24/03/2026
	League: Eerste divisie Netherlands
	Match: FC Emmen - SC Cambuur
	Kick off: 20:00 WAT
	✅Over 1.5
	✅Odds @1.13 on BETANO""",

	# multi tip real format
	"""⚽️ 𝐏𝐫𝐞𝐝𝐢𝐜𝐭𝐢𝐨𝐧 𝐨𝐟 𝐭𝐡𝐞 𝐃𝐚𝐲 ⚽️
	𝐃𝐚𝐭𝐞: 24/03/2026
	𝐋𝐞𝐚𝐠𝐮𝐞: League 1 England
	𝐌𝐚𝐭𝐜𝐡: Doncaster Rovers - Port Vale
	𝐊𝐢𝐜𝐤 𝐨𝐟𝐟: 20:45 WAT
	✅Under 3.5
	✅Odds @1.36 on BETANO
	⚽️ 𝗙𝗼𝗼𝘁𝗯𝗮𝗹𝗹 𝗧𝗶𝗽 𝟮 ⚽️
	𝐃𝐚𝐭𝐞: 24/03/2026
	𝐋𝐞𝐚𝐠𝐮𝐞: La Liga
	𝐌𝐚𝐭𝐜𝐡: Real Madrid - Barcelona
	𝐊𝐢𝐜𝐤 𝐨𝐟𝐟: 21:00 WAT
	✅1X
	✅Odds @1.42 on BETANO""",

	# noisy missing date
	"""wow predictions
	MATCH: Juventus VS Napoli
	League: Serie A
	we forecast Over 2.5
	Odds 1.75""",
	]

	for i, test in enumerate(tests, 1):
	print(f"\n{'='*50}")
	print(f"TEST {i}: {test[:80]}...")
	result = extract(test)
	print(json.dumps(result, indent=2, ensure_ascii=False))