import os import subprocess from huggingface_hub import hf_hub_download # Rýchle sťahovanie os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" def run(): # Stiahneš model (už máš llama-cpp pripravené v systéme) model_path = hf_hub_download( repo_id="bartowski/Qwen2.5-Coder-7B-Instruct-GGUF", filename="Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf", local_dir="." ) # Spustíš server subprocess.run([ "python3", "-m", "llama_cpp.server", "--model", "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_ctx", "4096" ]) if __name__ == "__main__": run()