Spaces:
Build error
Build error
| import gradio as gr | |
| from llama_cpp import Llama | |
| # Load model locally - this will take a few minutes on first startup | |
| llm = Llama.from_pretrained( | |
| repo_id="bartowski/Llama-3-8B-Lexi-Uncensored-GGUF", | |
| filename="*Q4_K_M.gguf", # 4-bit quantization for CPU | |
| n_ctx=4096, | |
| n_threads=4, | |
| n_gpu_layers=0, | |
| verbose=False | |
| ) | |
| def chat(message, history): | |
| messages = [] | |
| for human, assistant in history: | |
| messages.append({"role": "user", "content": human}) | |
| messages.append({"role": "assistant", "content": assistant}) | |
| messages.append({"role": "user", "content": message}) | |
| try: | |
| response = llm.create_chat_completion( | |
| messages=messages, | |
| max_tokens=512, | |
| temperature=0.7, | |
| ) | |
| return response["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| demo = gr.ChatInterface( | |
| chat, | |
| title="AI Chat", | |
| description="Context maintained during session, resets on refresh" | |
| ) | |
| demo.launch() |