Upload demo_local_cpu.py with huggingface_hub

0825961 verified 3 months ago

12.7 kB

	#!/usr/bin/env python3
	"""
	HybriKo-117M Linux Function Calling Demo (Local/Colab CPU)

	Downloads model from HuggingFace and runs on CPU.
	Usage:
	pip install huggingface_hub sentencepiece
	python scripts/demo_local_cpu.py --query "현재 폴더 보여줘"
	"""

	import torch
	import sentencepiece as spm
	import sys
	import json
	import re
	import argparse
	import os

	# HuggingFace에서 필요한 파일 다운로드
	def download_files():
	try:
	from huggingface_hub import hf_hub_download
	except ImportError:
	os.system("pip install -q huggingface_hub")
	from huggingface_hub import hf_hub_download

	repo_id = "Yaongi/HybriKo-117M-LinuxFC-SFT-v2"
	files_to_download = [
	"configuration_hybridko.py",
	"modeling_hybridko.py",
	"pytorch_model.pt",
	"HybriKo_tok.model",
	]

	for filename in files_to_download:
	if not os.path.exists(filename):
	print(f"Downloading {filename}...")
	hf_hub_download(repo_id, filename, local_dir=".")

	# 파일 다운로드
	download_files()

	# 현재 디렉토리를 Python path에 추가
	sys.path.insert(0, ".")

	# 이제 import 가능
	from configuration_hybridko import HybriKoConfig
	from modeling_hybridko import HybriKoModel


	# Exact system prompt used during training
	SYSTEM_PROMPT = """You are a Linux command assistant. You can use many tools (functions) to help users with their Linux tasks.
	At each step, you need to give your thought to analyze the status now and what to do next, with a function call to actually execute your step. Your output should follow this format:
	Thought:
	Action
	Action Input:

	After the call, you will get the call result, and you are now in a new state.
	Then you will analyze your status now, then decide what to do next...
	After many (Thought-call) pairs, you finally perform the task, then you can give your final answer.

	Remember:
	1. The state change is irreversible, you can't go back to one of the former state.
	2. All the thought is short, at most in 5 sentences.
	3. ALWAYS call "Finish" function at the end of the task.
	4. If you cannot handle the task with the available tools, say you don't know and call Finish with give_answer.

	You have access of the following tools:
	[
	{"name": "ls_command", "description": "List directory contents.", "parameters": {"type": "object", "properties": {"path": {"type": "string"}, "options": {"type": "string"}}, "required": ["path"]}},
	{"name": "cd_command", "description": "Change the current working directory.", "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}},
	{"name": "mkdir_command", "description": "Create a new directory.", "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}},
	{"name": "rm_command", "description": "Remove files or directories.", "parameters": {"type": "object", "properties": {"path": {"type": "string"}, "recursive": {"type": "boolean"}}, "required": ["path"]}},
	{"name": "cp_command", "description": "Copy files or directories.", "parameters": {"type": "object", "properties": {"source": {"type": "string"}, "destination": {"type": "string"}}, "required": ["source", "destination"]}},
	{"name": "mv_command", "description": "Move or rename files.", "parameters": {"type": "object", "properties": {"source": {"type": "string"}, "destination": {"type": "string"}}, "required": ["source", "destination"]}},
	{"name": "find_command", "description": "Find files by name pattern.", "parameters": {"type": "object", "properties": {"path": {"type": "string"}, "name": {"type": "string"}}, "required": ["path", "name"]}},
	{"name": "cat_command", "description": "Display file contents.", "parameters": {"type": "object", "properties": {"file": {"type": "string"}}, "required": ["file"]}},
	{"name": "grep_command", "description": "Search for patterns in files.", "parameters": {"type": "object", "properties": {"pattern": {"type": "string"}, "file": {"type": "string"}}, "required": ["pattern", "file"]}},
	{"name": "head_command", "description": "Display first lines of a file.", "parameters": {"type": "object", "properties": {"file": {"type": "string"}, "lines": {"type": "integer"}}, "required": ["file"]}},
	{"name": "tail_command", "description": "Display last lines of a file.", "parameters": {"type": "object", "properties": {"file": {"type": "string"}, "lines": {"type": "integer"}}, "required": ["file"]}},
	{"name": "wc_command", "description": "Count lines, words, and bytes.", "parameters": {"type": "object", "properties": {"file": {"type": "string"}}, "required": ["file"]}},
	{"name": "ps_command", "description": "Display running processes.", "parameters": {"type": "object", "properties": {"options": {"type": "string"}}, "required": []}},
	{"name": "df_command", "description": "Display disk space usage.", "parameters": {"type": "object", "properties": {"options": {"type": "string"}}, "required": []}},
	{"name": "du_command", "description": "Display directory space usage.", "parameters": {"type": "object", "properties": {"path": {"type": "string"}, "options": {"type": "string"}}, "required": ["path"]}},
	{"name": "top_command", "description": "Display system processes in real-time.", "parameters": {"type": "object", "properties": {}, "required": []}},
	{"name": "ping_command", "description": "Test network connectivity.", "parameters": {"type": "object", "properties": {"host": {"type": "string"}, "count": {"type": "integer"}}, "required": ["host"]}},
	{"name": "curl_command", "description": "Transfer data from URL.", "parameters": {"type": "object", "properties": {"url": {"type": "string"}, "options": {"type": "string"}}, "required": ["url"]}},
	{"name": "chmod_command", "description": "Change file permissions.", "parameters": {"type": "object", "properties": {"mode": {"type": "string"}, "file": {"type": "string"}}, "required": ["mode", "file"]}},
	{"name": "tar_command", "description": "Archive or extract files.", "parameters": {"type": "object", "properties": {"options": {"type": "string"}, "archive": {"type": "string"}, "files": {"type": "string"}}, "required": ["options", "archive"]}},
	{"name": "Finish", "description": "Complete the task.", "parameters": {"type": "object", "properties": {"give_answer": {"type": "string"}}, "required": ["give_answer"]}}
	]"""


	def load_model(threads=4):
	"""Load model and tokenizer for CPU inference."""
	torch.set_num_threads(threads)
	print(f"Using {threads} CPU threads")

	print("Loading tokenizer...")
	sp = spm.SentencePieceProcessor()
	sp.Load("HybriKo_tok.model")

	print("Loading model (CPU mode)...")
	config = HybriKoConfig(
	d_model=768, n_layers=12, vocab_size=32000,
	n_heads=12, n_kv_heads=3, ff_mult=3, max_seq_len=6144
	)
	model = HybriKoModel(config)
	checkpoint = torch.load("pytorch_model.pt", map_location="cpu", weights_only=False)
	model.load_state_dict(checkpoint["model_state_dict"])
	model.eval()

	print(f"✅ Model loaded on CPU ({sum(p.numel() for p in model.parameters()) / 1e6:.1f}M params)\n")
	return model, sp


	@torch.inference_mode()
	def generate(model, tokenizer, prompt, max_new_tokens=150):
	"""Generate response with CPU-optimized inference."""
	input_ids = tokenizer.EncodeAsIds(prompt)
	input_tensor = torch.tensor([input_ids], dtype=torch.long)
	prompt_len = len(input_ids)

	generated = input_tensor
	for step in range(max_new_tokens):
	outputs = model(generated)
	logits = outputs["logits"] if isinstance(outputs, dict) else outputs.logits
	next_token_logits = logits[:, -1, :]
	next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
	generated = torch.cat([generated, next_token], dim=1)

	# Progress indicator every 10 tokens
	if (step + 1) % 10 == 0:
	print(".", end="", flush=True)

	# Stop when we have complete Action Input
	new_tokens = generated[0, prompt_len:].tolist()
	new_text = tokenizer.DecodeIds(new_tokens)

	if "<\|im_end\|>" in new_text:
	break

	if "Action Input:" in new_text:
	ai_idx = new_text.find("Action Input:")
	after_ai = new_text[ai_idx + 13:].strip()
	if after_ai.startswith("{"):
	brace_count = 0
	for i, c in enumerate(after_ai):
	if c == "{":
	brace_count += 1
	elif c == "}":
	brace_count -= 1
	if brace_count == 0:
	print() # newline after progress dots
	return new_text

	print() # newline after progress dots
	return tokenizer.DecodeIds(generated[0, prompt_len:].tolist())


	def create_prompt(user_input):
	"""Create ChatML format prompt."""
	return f"<\|im_start\|>system\n{SYSTEM_PROMPT}<\|im_end\|>\n<\|im_start\|>user\n{user_input}<\|im_end\|>\n<\|im_start\|>assistant\n"


	def parse_response(response):
	"""Parse response into components."""
	if "<\|im_end\|>" in response:
	response = response.split("<\|im_end\|>")[0]
	if "<\|im_start\|>" in response:
	response = response.split("<\|im_start\|>")[0]

	result = {"thought": None, "action": None, "action_input": None, "raw": response}

	thought_match = re.search(r"Thought:\s(.+?)(?=\sAction:\|\s*$)", response, re.DOTALL)
	if thought_match:
	result["thought"] = thought_match.group(1).strip()

	action_match = re.search(r"Action:\s*(\w+)", response)
	if action_match:
	result["action"] = action_match.group(1)

	input_match = re.search(r"Action Input:\s*(\{[^}]+\})", response, re.DOTALL)
	if input_match:
	try:
	result["action_input"] = json.loads(input_match.group(1))
	except:
	result["action_input"] = input_match.group(1)

	return result


	def run_single(model, tokenizer, user_input):
	"""Run single inference."""
	prompt = create_prompt(user_input)
	print("Generating", end="", flush=True)
	response = generate(model, tokenizer, prompt)
	return parse_response(response)


	def main():
	parser = argparse.ArgumentParser(description="HybriKo Linux FC Demo (CPU)")
	parser.add_argument("--query", type=str, help="Single query mode")
	parser.add_argument("--threads", type=int, default=4, help="Number of CPU threads")
	args = parser.parse_args()

	print("=" * 60)
	print(" HybriKo-117M Linux Function Calling Demo (CPU)")
	print("=" * 60)

	model, tokenizer = load_model(args.threads)

	if args.query:
	print(f"Input: {args.query}")
	print("-" * 40)
	result = run_single(model, tokenizer, args.query)
	if result["thought"]:
	print(f"Thought: {result['thought']}")
	if result["action"]:
	print(f"Action: {result['action']}")
	if result["action_input"]:
	print(f"Input: {json.dumps(result['action_input'], ensure_ascii=False)}")
	if not result["thought"] and not result["action"]:
	print(f"[Raw]: {result['raw'][:300]}")
	print("-" * 40)
	else:
	# Interactive mode
	print("Supported: ls, cd, mkdir, rm, cp, mv, find, cat, grep, head, tail, wc, ps, df, du, top, ping, curl, chmod, tar")
	print("Type 'quit' to exit\n")

	while True:
	try:
	user_input = input("[User] ").strip()
	if not user_input:
	continue
	if user_input.lower() in ["quit", "exit", "q"]:
	break

	result = run_single(model, tokenizer, user_input)
	print("\n[HybriKo]")
	print("-" * 40)
	if result["thought"]:
	print(f"Thought: {result['thought']}")
	if result["action"]:
	print(f"Action: {result['action']}")
	if result["action_input"]:
	print(f"Input: {json.dumps(result['action_input'], ensure_ascii=False)}")
	if not result["thought"] and not result["action"]:
	print(f"[Raw]: {result['raw'][:300]}")
	print("-" * 40 + "\n")

	except KeyboardInterrupt:
	break
	except EOFError:
	break

	print("Goodbye!")


	if __name__ == "__main__":
	main()