import gradio as gr from huggingface_hub import hf_hub_download # Download model print("📥 Downloading GGUF model...") model_path = hf_hub_download( repo_id="HexQuant/Pars-Medical-o1-Llama-FFT-GGUF", filename="model-Q4_K_M.gguf", # ✅ Fixed filename ) print(f"✅ Model downloaded: {model_path}") from llama_cpp import Llama print("🔄 Loading model...") llm = Llama( model_path=model_path, n_ctx=2048, n_threads=2, n_gpu_layers=0, verbose=False, ) print("✅ Model ready!") SYSTEM = """You are Pars-Medical, a bilingual (Persian/English) medical AI assistant with chain-of-thought reasoning. When answering: analyze carefully, think step-by-step, provide accurate medical information. Respond in the user's language. ⚠️ Educational purposes only - always consult a real doctor.""" def chat(message, history): messages = [{"role": "system", "content": SYSTEM}] for user, assistant in history: messages.append({"role": "user", "content": user}) if assistant: messages.append({"role": "assistant", "content": assistant}) messages.append({"role": "user", "content": message}) response = llm.create_chat_completion(messages=messages, max_tokens=512, temperature=0.7, stream=True) text = "" for chunk in response: delta = chunk["choices"][0].get("delta", {}).get("content", "") text += delta yield text with gr.Blocks(title="🩺 Pars-Medical-o1", theme=gr.themes.Soft()) as demo: gr.HTML("""
Made with ❤️ by @xPOURY4 in Iran 🇮🇷
""") msg.submit(chat, [msg, chatbot], chatbot).then(lambda: "", None, msg) send.click(chat, [msg, chatbot], chatbot).then(lambda: "", None, msg) clear.click(lambda: None, None, chatbot) demo.queue().launch()