Spaces:
Build error
Build error
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| # 1. Download file GGUF dari repo lu ke mesin Space | |
| # Ganti dengan repo_id lu dan NAMA FILE gguf-nya yang bener | |
| repo_id = "Daffaadityp/AxonAI-MX4-2.0-GGUF" # Ganti kalau reponya beda | |
| nama_file_gguf = "AxonAI-MX4-2.0-Q4_K_M.gguf" # Sesuaikan sama nama file lu | |
| print("Downloading model...") | |
| model_path = hf_hub_download(repo_id=repo_id, filename=nama_file_gguf) | |
| # 2. Load model GGUF ke memori CPU | |
| print("Loading model to CPU...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, # Kapasitas ingatan konteks (bisa dikecilin misal 1024 kalau berat) | |
| n_threads=2 # Disesuaikan sama CPU Basic yang dapet 2 vCPU | |
| ) | |
| # 3. Fungsi buat generate jawaban | |
| def prediksi(prompt): | |
| # Format prompt bisa disesuaikan sama chat template model lu | |
| hasil = llm( | |
| f"User: {prompt}\nAxonAI:", # Contoh simple prompt formatting | |
| max_tokens=150, | |
| stop=["User:", "\n\n"], | |
| echo=False | |
| ) | |
| return hasil['choices'][0]['text'].strip() | |
| # 4. Bikin UI Gradio biar API-nya kebuka | |
| iface = gr.Interface(fn=prediksi, inputs="text", outputs="text") | |
| iface.launch() |