Spaces:
Build error
Build error
File size: 1,849 Bytes
75a7093 e543043 75a7093 e543043 75a7093 e543043 75a7093 e543043 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# 1. Download Model Koding Terbaik (Qwen2.5-Coder 7B)
# Kita pakai versi GGUF Q4_K_M agar muat di RAM gratisan dan cepat
print("Sedang mendownload model... Mohon tunggu.")
model_path = hf_hub_download(
repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
filename="qwen2.5-coder-7b-instruct-q4_k_m.gguf"
)
# 2. Load Model ke Memory (CPU)
llm = Llama(
model_path=model_path,
n_ctx=8192, # Context window besar untuk kode panjang
n_threads=2, # Menggunakan 2 core CPU
verbose=False
)
# 3. Fungsi Chat
def chat_response(message, history):
# Format prompt agar sesuai standar Qwen
system_prompt = "You are an expert coding assistant. Write clean, professional code."
prompt_str = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
for user_input, ai_output in history:
prompt_str += f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n{ai_output}<|im_end|>\n"
prompt_str += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
# Generate jawaban stream (agar teks muncul satu-satu tidak nge-lag)
output_stream = llm(
prompt_str,
max_tokens=2048,
stop=["<|im_end|>"],
stream=True
)
partial_message = ""
for chunk in output_stream:
delta = chunk['choices'][0]['text']
partial_message += delta
yield partial_message
# 4. Tampilan Web (Mobile Friendly)
custom_css = """
footer {visibility: hidden}
"""
demo = gr.ChatInterface(
fn=chat_response,
title="🤖 Qwen Android Coder",
description="Asisten Koding Profesional untuk Android. Copy kode & jalankan di Termux/Acode.",
theme=gr.themes.Soft(),
css=custom_css
)
if __name__ == "__main__":
demo.launch() |