Spaces:

saiful-ai-dev
/

motionmindx

Runtime error

App Files Files Community

saiful-ai-dev commited on 17 days ago

Commit

ac69158

verified ·

1 Parent(s): 375f784

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -21

app.py CHANGED Viewed

@@ -1,35 +1,46 @@
 import gradio as gr
-from transformers import pipeline
-import torch
-# ৩বি মডেল লোড করার সবথেকে নিরাপদ উপায়
-model_id = "Qwen/Qwen2.5-3B-Instruct"
-print("⏳ এআই টিউটর লোড হচ্ছে... এটি ২-৩ মিনিট সময় নেবে।")
-# মেমোরি বাঁচানোর জন্য বিশেষ সেটিংস
-pipe = pipeline(
-    "text-generation",
-    model=model_id,
-    model_kwargs={"torch_dtype": torch.float32, "low_cpu_mem_usage": True},
-    device_map="auto"
 )
 def respond(message, history):
-    messages = [
-        {"role": "system", "content": "You are Motion Mind X, a friendly SSC/HSC tutor from Bangladesh. Respond clearly in Bengali/English."},
-        {"role": "user", "content": message},
-    ]
-    # জেনারেশন শুরু
-    out = pipe(messages, max_new_tokens=512)
-    return out[0]['generated_text'][-1]['content']
-# চ্যাট ইন্টারফেস
 demo = gr.ChatInterface(
     respond,
     title="Motion Mind X 🚀",
-    examples=["SSC গণিত প্রস্তুতি কীভাবে নেব?", "HSC English 2nd paper grammar help"]
 )
 if __name__ == "__main__":

 import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# ১. মডেল ডাউনলোড
+model_path = hf_hub_download(
+    repo_id="saiful-ai-dev/MotionMindX",
+    filename="Qwen2.5-3B-Instruct-Q4_K_M.gguf"
+)
+# ২. মডেল সেটিংস (তোমার চাওয়া অনুযায়ী ২০৪৮ সেট করা হয়েছে)
+llm = Llama(
+    model_path=model_path,
+    n_ctx=2048,      # Context window
+    n_batch=512,     # RAM বাঁচাতে ব্যাচ সাইজ একটু কম রাখা ভালো, তবে প্রম্পট ২০৪৮ হ্যান্ডেল করবে
+    n_threads=2      # Free CPU এর জন্য এটাই সেরা
 )
 def respond(message, history):
+    # প্রম্পট ফরম্যাটিং
+    prompt = f"<|im_start|>system\nYou are Motion Mind X, a helpful tutor for SSC/HSC students in Bangladesh. Respond clearly.<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+    # ৩. স্ট্রিমিং (Streaming) শুরু
+    response_stream = llm(
+        prompt,
+        max_tokens=512,
+        stop=["<|im_end|>"],
+        stream=True # এটি লেখাকে এক এক করে পাঠাবে
+    )
+    partial_message = ""
+    for chunk in response_stream:
+        if "text" in chunk["choices"][0]:
+            token = chunk["choices"][0]["text"]
+            partial_message += token
+            yield partial_message # এটিই স্ট্রিমিং ম্যাজিক!
+# ৪. ইন্টারফেস ডিজাইন
 demo = gr.ChatInterface(
     respond,
     title="Motion Mind X 🚀",
+    description="SSC/HSC শিক্ষার্থীদের জন্য লাইভ এআই টিউটর (Streaming Enabled)",
+    theme="soft"
 )
 if __name__ == "__main__":