Leo9Bot / app.py
ehi20011's picture
Update app.py
2208bb7 verified
raw
history blame contribute delete
771 Bytes
import os
import gradio as gr
from llama_cpp import Llama
# Load GGUF model
MODEL_PATH = "./models/mistral.gguf"
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=9, # Increase for more speed if CPU allows
n_batch=128,
use_mlock=True,
use_mmap=True,
verbose=False
)
# Streaming generator
def generate_response(prompt):
stream = llm(
prompt=f"[INST] {prompt.strip()} [/INST]",
max_tokens=512,
stop=["</s>"],
stream=True
)
partial = ""
for chunk in stream:
partial += chunk["choices"][0]["text"]
yield partial
# Gradio UI
gr.ChatInterface(
fn=generate_response,
title="Leo9 AI Tutor",
description="An ai chatbots who answer any question.",
).launch()