AXONAI_MX4 / app.py
Daffaadityp's picture
Rename App.py to app.py
86d8319 verified
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# 1. Download file GGUF dari repo lu ke mesin Space
# Ganti dengan repo_id lu dan NAMA FILE gguf-nya yang bener
repo_id = "Daffaadityp/AxonAI-MX4-2.0-GGUF" # Ganti kalau reponya beda
nama_file_gguf = "AxonAI-MX4-2.0-Q4_K_M.gguf" # Sesuaikan sama nama file lu
print("Downloading model...")
model_path = hf_hub_download(repo_id=repo_id, filename=nama_file_gguf)
# 2. Load model GGUF ke memori CPU
print("Loading model to CPU...")
llm = Llama(
model_path=model_path,
n_ctx=2048, # Kapasitas ingatan konteks (bisa dikecilin misal 1024 kalau berat)
n_threads=2 # Disesuaikan sama CPU Basic yang dapet 2 vCPU
)
# 3. Fungsi buat generate jawaban
def prediksi(prompt):
# Format prompt bisa disesuaikan sama chat template model lu
hasil = llm(
f"User: {prompt}\nAxonAI:", # Contoh simple prompt formatting
max_tokens=150,
stop=["User:", "\n\n"],
echo=False
)
return hasil['choices'][0]['text'].strip()
# 4. Bikin UI Gradio biar API-nya kebuka
iface = gr.Interface(fn=prediksi, inputs="text", outputs="text")
iface.launch()