import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download # GGUF 다운로드 model_path = hf_hub_download( repo_id="HaGPT/news-intelligence-chatbot", filename="meta-llama-3.1-8b.Q4_K_M.gguf", ) llm = Llama( model_path=model_path, n_ctx=4096, n_threads=6, n_gpu_layers=0, verbose=False ) def chat_fn(message, history): messages = [] for user, bot in history: messages.append({"role": "user", "content": user}) messages.append({"role": "assistant", "content": bot}) messages.append({"role": "user", "content": message}) response = llm.create_chat_completion( messages=messages, max_tokens=512, temperature=0.7 ) answer = response["choices"][0]["message"]["content"] return answer interface = gr.ChatInterface( fn=chat_fn, title="📰 News Intelligence Bot", description="Your local GGUF chatbot running on llama.cpp" ) interface.launch()