Spaces:
Build error
Build error
| import gradio as gr | |
| from llama_cpp import Llama | |
| # Load quantized model (downloads on first run) | |
| llm = Llama( | |
| model_path="microsoft/Phi-3-mini-4k-instruct-q4.gguf", # Or use HF downloader | |
| n_ctx=2048, # Context window | |
| n_threads=2, # Match free tier CPUs | |
| verbose=False | |
| ) | |
| def chat(message, history): | |
| # Build prompt (simple chat format) | |
| prompt = f"<|user|>\n{message}<|end|>\n<|assistant|>\n" | |
| # Generate | |
| output = llm( | |
| prompt, | |
| max_tokens=256, | |
| temperature=0.7, | |
| stop=["<|end|>"] | |
| ) | |
| response = output['choices'][0]['text'].strip() | |
| history.append((message, response)) | |
| return history, "" | |
| # Gradio UI | |
| demo = gr.ChatInterface( | |
| fn=chat, | |
| title="Fast Phi-3 Chat", | |
| description="Quick responses on free HF Spaces!" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |