socio-ai-phi / app.py
Invescoz's picture
Update app.py
15580c0 verified
raw
history blame contribute delete
851 Bytes
import gradio as gr
from llama_cpp import Llama
# Load quantized model (downloads on first run)
llm = Llama(
model_path="microsoft/Phi-3-mini-4k-instruct-q4.gguf", # Or use HF downloader
n_ctx=2048, # Context window
n_threads=2, # Match free tier CPUs
verbose=False
)
def chat(message, history):
# Build prompt (simple chat format)
prompt = f"<|user|>\n{message}<|end|>\n<|assistant|>\n"
# Generate
output = llm(
prompt,
max_tokens=256,
temperature=0.7,
stop=["<|end|>"]
)
response = output['choices'][0]['text'].strip()
history.append((message, response))
return history, ""
# Gradio UI
demo = gr.ChatInterface(
fn=chat,
title="Fast Phi-3 Chat",
description="Quick responses on free HF Spaces!"
)
if __name__ == "__main__":
demo.launch()