|
|
import gradio as gr |
|
|
import os |
|
|
import time |
|
|
from typing import Iterator |
|
|
import threading |
|
|
|
|
|
|
|
|
llm = None |
|
|
model_loading = True |
|
|
model_error = None |
|
|
|
|
|
def load_model(): |
|
|
"""Load the GGUF model""" |
|
|
global llm, model_loading, model_error |
|
|
|
|
|
try: |
|
|
print("🔄 Loading model...") |
|
|
from llama_cpp import Llama |
|
|
|
|
|
|
|
|
llm = Llama.from_pretrained( |
|
|
repo_id="Tohirju/Ameena_Qwen3-8B_e3_Quantised_gguf", |
|
|
filename="Ameena_Qwen3-8B_e3.gguf", |
|
|
|
|
|
n_ctx=2048, |
|
|
n_threads=None, |
|
|
n_gpu_layers=0, |
|
|
use_mmap=True, |
|
|
use_mlock=False, |
|
|
n_batch=512, |
|
|
verbose=False, |
|
|
|
|
|
offload_kqv=False, |
|
|
f16_kv=True, |
|
|
) |
|
|
|
|
|
model_loading = False |
|
|
print("✅ Model loaded successfully!") |
|
|
|
|
|
except Exception as e: |
|
|
model_error = f"Model loading failed: {str(e)}" |
|
|
model_loading = False |
|
|
print(f"❌ {model_error}") |
|
|
|
|
|
def chat_with_model( |
|
|
message: str, |
|
|
history: list, |
|
|
system_message: str = "Шумо ёвари хуб ҳастед ва ба забони тоҷикӣ ҷавоб медиҳед.", |
|
|
max_tokens: int = 150, |
|
|
temperature: float = 0.7, |
|
|
top_p: float = 0.9, |
|
|
) -> Iterator[str]: |
|
|
""" |
|
|
Chat function that streams responses |
|
|
""" |
|
|
|
|
|
if model_loading: |
|
|
yield "⏳ Model is still loading, please wait..." |
|
|
return |
|
|
|
|
|
if model_error: |
|
|
yield f"❌ Model error: {model_error}" |
|
|
return |
|
|
|
|
|
if llm is None: |
|
|
yield "❌ Model not loaded. Please refresh the page." |
|
|
return |
|
|
|
|
|
try: |
|
|
|
|
|
messages = [] |
|
|
|
|
|
|
|
|
if system_message.strip(): |
|
|
messages.append({"role": "system", "content": system_message}) |
|
|
|
|
|
|
|
|
for user_msg, assistant_msg in history: |
|
|
if user_msg: |
|
|
messages.append({"role": "user", "content": user_msg}) |
|
|
if assistant_msg: |
|
|
messages.append({"role": "assistant", "content": assistant_msg}) |
|
|
|
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
|
|
|
response_stream = llm.create_chat_completion( |
|
|
messages=messages, |
|
|
max_tokens=max_tokens, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
stream=True, |
|
|
stop=["</s>", "User:", "Human:", "Assistant:"], |
|
|
repeat_penalty=1.1, |
|
|
) |
|
|
|
|
|
|
|
|
partial_response = "" |
|
|
for chunk in response_stream: |
|
|
if chunk["choices"][0]["delta"].get("content"): |
|
|
partial_response += chunk["choices"][0]["delta"]["content"] |
|
|
yield partial_response |
|
|
|
|
|
except Exception as e: |
|
|
yield f"❌ Generation error: {str(e)}" |
|
|
|
|
|
def get_model_status(): |
|
|
"""Get current model status""" |
|
|
if model_loading: |
|
|
return "🔄 Loading model... Please wait." |
|
|
elif model_error: |
|
|
return f"❌ Error: {model_error}" |
|
|
elif llm is not None: |
|
|
return "✅ Model ready!" |
|
|
else: |
|
|
return "❓ Unknown status" |
|
|
|
|
|
|
|
|
model_thread = threading.Thread(target=load_model, daemon=True) |
|
|
model_thread.start() |
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="🇹🇯 Ameena Qwen3-8B Tajik Language Model", |
|
|
theme=gr.themes.Soft(), |
|
|
css=""" |
|
|
.gradio-container { |
|
|
max-width: 800px !important; |
|
|
margin: auto !important; |
|
|
} |
|
|
""" |
|
|
) as demo: |
|
|
|
|
|
gr.Markdown(""" |
|
|
# 🇹🇯 Ameena Qwen3-8B - Tajik Language Model |
|
|
|
|
|
**Model**: Quantized GGUF (4GB) | **Backend**: CPU Only | **Language**: Tajik |
|
|
|
|
|
Base model: Qwen3-8B fine-tuned for Tajik language |
|
|
""") |
|
|
|
|
|
|
|
|
status_display = gr.Markdown(get_model_status()) |
|
|
|
|
|
|
|
|
chatbot = gr.Chatbot( |
|
|
height=400, |
|
|
show_label=False, |
|
|
show_copy_button=True, |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
msg = gr.Textbox( |
|
|
placeholder="Салом! Саволи худро дар ин ҷо бинависед... (Hello! Write your question here...)", |
|
|
show_label=False, |
|
|
scale=4 |
|
|
) |
|
|
submit_btn = gr.Button("Send", scale=1, variant="primary") |
|
|
|
|
|
|
|
|
with gr.Accordion("⚙️ Settings", open=False): |
|
|
system_msg = gr.Textbox( |
|
|
value="Шумо ёвари хуб ҳастед ва ба забони тоҷикӣ ҷавоб медиҳед.", |
|
|
label="System Message (Tajik)", |
|
|
info="Instructions for the model in Tajik language" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
max_tokens = gr.Slider( |
|
|
minimum=50, |
|
|
maximum=300, |
|
|
value=150, |
|
|
step=10, |
|
|
label="Max Tokens", |
|
|
info="Maximum response length" |
|
|
) |
|
|
temperature = gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=1.5, |
|
|
value=0.7, |
|
|
step=0.1, |
|
|
label="Temperature", |
|
|
info="Response creativity (higher = more creative)" |
|
|
) |
|
|
top_p = gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=1.0, |
|
|
value=0.9, |
|
|
step=0.05, |
|
|
label="Top-p", |
|
|
info="Nucleus sampling parameter" |
|
|
) |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["Салом! Чӣ хел ҳастед?"], |
|
|
["Тоҷикистон дар куҷо ҷойгир аст?"], |
|
|
["Барномасозӣ чист ва чӣ гуна кор мекунад?"], |
|
|
["Оиди забони тоҷикӣ маълумот диҳед"], |
|
|
["Шеър дар бораи табиат нависед"], |
|
|
], |
|
|
inputs=msg, |
|
|
label="💡 Example Questions" |
|
|
) |
|
|
|
|
|
def respond(message, history, system_message, max_tokens, temperature, top_p): |
|
|
"""Handle user message and generate response""" |
|
|
if not message.strip(): |
|
|
return history, "" |
|
|
|
|
|
|
|
|
history.append([message, None]) |
|
|
|
|
|
|
|
|
response_generator = chat_with_model( |
|
|
message, history[:-1], system_message, max_tokens, temperature, top_p |
|
|
) |
|
|
|
|
|
|
|
|
for partial_response in response_generator: |
|
|
history[-1][1] = partial_response |
|
|
yield history, "" |
|
|
|
|
|
return history, "" |
|
|
|
|
|
def clear_chat(): |
|
|
"""Clear chat history""" |
|
|
return [], "" |
|
|
|
|
|
def update_status(): |
|
|
"""Update model status display""" |
|
|
return get_model_status() |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
respond, |
|
|
inputs=[msg, chatbot, system_msg, max_tokens, temperature, top_p], |
|
|
outputs=[chatbot, msg] |
|
|
) |
|
|
|
|
|
msg.submit( |
|
|
respond, |
|
|
inputs=[msg, chatbot, system_msg, max_tokens, temperature, top_p], |
|
|
outputs=[chatbot, msg] |
|
|
) |
|
|
|
|
|
|
|
|
clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary") |
|
|
clear_btn.click(clear_chat, outputs=[chatbot, msg]) |
|
|
|
|
|
|
|
|
refresh_btn = gr.Button("🔄 Refresh Status", variant="secondary") |
|
|
refresh_btn.click(update_status, outputs=status_display) |
|
|
|
|
|
|
|
|
demo.load(update_status, outputs=status_display, every=5) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
show_error=True, |
|
|
share=False, |
|
|
quiet=False, |
|
|
) |