Spaces:

saiful-ai-dev
/

motionmindx

Runtime error

App Files Files Community

saiful-ai-dev commited on 17 days ago

Commit

3cfed9e

verified ·

1 Parent(s): 3720298

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -51

app.py CHANGED Viewed

@@ -1,62 +1,42 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-import torch
-from threading import Thread
-# ১. মডেল এবং টোকেনাইজার সেটআপ (Qwen 2.5 3B)
-model_id = "Qwen/Qwen2.5-3B-Instruct"
-print("⏳ এআই টিউটর লোড হচ্ছে...")
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-# সিপিপিইউ-তে মেমরি বাঁচাতে low_cpu_mem_usage ব্যবহার করা হয়েছে
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.float32,
-    device_map="auto",
-    low_cpu_mem_usage=True
 )
 def respond(message, history):
-    # ২. প্রম্পট তৈরি (২০৪৮ কনটেক্সট হ্যান্ডেল করতে সক্ষম)
-    messages = [
-        {"role": "system", "content": "You are Motion Mind X, a helpful SSC/HSC tutor from Bangladesh. Respond clearly in Bengali or English."},
-        {"role": "user", "content": message},
-    ]
-    input_ids = tokenizer.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    # ৩. স্ট্রিমিং সেটআপ
-    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        input_ids=input_ids,
-        streamer=streamer,
-        max_new_tokens=1024, # উত্তর সর্বোচ্চ ১০২৪ টোকেন পর্যন্ত হবে
-        do_sample=True,
-        top_p=0.95,
-        temperature=0.7,
     )
-    # থ্রেডিং ব্যবহার করে স্ট্রিমিং চালানো (যাতে ইন্টারফেস ফ্রিজ না হয়)
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
     partial_message = ""
-    for new_token in streamer:
-        partial_message += new_token
-        yield partial_message
-# ৪. ইন্টারফেস (Gradio ChatInterface)
-demo = gr.ChatInterface(
-    respond,
-    title="Motion Mind X 🚀",
-    description="Streaming Enabled | 2048 Context | SSC/HSC AI Tutor",
-    theme="soft"
-)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# ১. তোমার GGUF মডেল ডাউনলোড
+print("⏳ মডেল ডাউনলোড হচ্ছে...")
+model_path = hf_hub_download(
+    repo_id="saiful-ai-dev/MotionMindX",
+    filename="Qwen2.5-3B-Instruct-Q4_K_M.gguf"
+)
+# ২. মডেল সেটআপ (২০৪৮ কনটেক্সট)
+llm = Llama(
+    model_path=model_path,
+    n_ctx=2048,
+    n_threads=2
 )
 def respond(message, history):
+    # স্ট্রিমিং ফরম্যাট
+    prompt = f"<|im_start|>system\nYou are Motion Mind X, a helpful tutor for SSC/HSC students in Bangladesh.<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+    # ৩. স্ট্রিমিং জেনারেশন
+    response_stream = llm(
+        prompt,
+        max_tokens=1024,
+        stop=["<|im_end|>"],
+        stream=True
     )
     partial_message = ""
+    for chunk in response_stream:
+        if "text" in chunk["choices"][0]:
+            token = chunk["choices"][0]["text"]
+            partial_message += token
+            yield partial_message # টাইপ রাইটারের মতো লেখা দেখাবে
+# ৪. ইন্টারফেস
+demo = gr.ChatInterface(respond, title="Motion Mind X 🚀 (GGUF Streaming)")
 if __name__ == "__main__":
     demo.launch()