Spaces:

Derr11
/

Der11

Paused

App Files Files Community

Derr11 commited on 15 days ago

Commit

3b87138

verified ·

1 Parent(s): d421e34

Update app.py

Browse files

Files changed (1) hide show

app.py +238 -139

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torch
 import gradio as gr
 import spaces
 from PIL import Image
-from transformers import AutoModel, AutoTokenizer
 import warnings
 warnings.filterwarnings("ignore")
@@ -27,25 +27,25 @@ def load_model():
     print(f"Loading {MODEL_ID}...")
-    # استخدام float16 بدلاً من bfloat16 للتوافق مع ZeroGPU
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     try:
-        # تحميل tokenizer
         tokenizer = AutoTokenizer.from_pretrained(
             MODEL_ID,
             trust_remote_code=True,
             use_fast=False
         )
-        # تحميل النموذج مع إعدادات آمنة لـ ZeroGPU
         model = AutoModel.from_pretrained(
             MODEL_ID,
             trust_remote_code=True,
             torch_dtype=dtype,
             low_cpu_mem_usage=True,
-            attn_implementation="eager",  # استخدام eager بدلاً من flash_attention
         ).eval()
         if torch.cuda.is_available():
@@ -54,23 +54,41 @@ def load_model():
         print("Model loaded successfully!")
     except Exception as e:
-        print(f"Error loading model: {e}")
-        # محاولة تحميل بديلة بدون trust_remote_code
         try:
-            from transformers import AutoModelForCausalLM
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
                 torch_dtype=dtype,
                 low_cpu_mem_usage=True,
             ).eval()
-            tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
             if torch.cuda.is_available():
                 model = model.cuda()
         except Exception as e2:
-            raise RuntimeError(f"Failed to load model: {e2}")
 # =========================================================
@@ -96,75 +114,78 @@ def generate_response(
         load_model()
         global model, tokenizer
-        # إعداد الرسائل
         if image_input is not None:
             # معالجة الصورة + النص
             if not text_input:
                 text_input = "What is shown in this image? Please describe in detail."
-            # تحضير المدخل للنموذج
-            msgs = [{"role": "user", "content": [image_input, text_input]}]
-            # استخدام طريقة chat الخاصة بالنموذج
-            with torch.no_grad():
-                if hasattr(model, 'chat'):
-                    response = model.chat(
-                        image=image_input,
-                        msgs=msgs,
-                        tokenizer=tokenizer,
-                        sampling=True,
-                        temperature=temperature,
-                        top_p=top_p,
-                        max_new_tokens=max_new_tokens
-                    )
-                else:
-                    # fallback للنماذج التي لا تدعم chat
-                    inputs = tokenizer(text_input, return_tensors="pt")
-                    if torch.cuda.is_available():
-                        inputs = inputs.to("cuda")
-                    outputs = model.generate(
-                        **inputs,
-                        max_new_tokens=max_new_tokens,
-                        temperature=temperature,
-                        top_p=top_p,
-                        do_sample=True
-                    )
-                    response = tokenizer.decode(
-                        outputs[0][inputs['input_ids'].shape[1]:],
-                        skip_special_tokens=True
-                    )
-        else:
-            # نص فقط
-            inputs = tokenizer(
-                text_input,
-                return_tensors="pt",
-                padding=True,
-                truncation=True,
-                max_length=2048
-            )
-            if torch.cuda.is_available():
-                inputs = inputs.to("cuda")
-            with torch.no_grad():
-                outputs = model.generate(
-                    **inputs,
-                    max_new_tokens=max_new_tokens,
-                    temperature=temperature,
-                    top_p=top_p,
-                    do_sample=True,
-                    pad_token_id=tokenizer.pad_token_id,
-                    eos_token_id=tokenizer.eos_token_id
-                )
-            response = tokenizer.decode(
-                outputs[0][inputs['input_ids'].shape[1]:],
-                skip_special_tokens=True
-            )
-        return response
     except Exception as e:
         import traceback
@@ -172,6 +193,20 @@ def generate_response(
         return f"Error: {str(e)}"
 # =========================================================
 # واجهة Gradio
 # =========================================================
@@ -179,83 +214,142 @@ def generate_response(
 def create_demo():
     """إنشاء واجهة Gradio البسيطة"""
-    with gr.Blocks(title="MiniCPM-o-2.6") as demo:
         gr.Markdown(
             """
-            # 🤖 MiniCPM-o-2.6 - Multimodal AI
-            **Capabilities:**
-            - 🖼️ Image Understanding (OCR, description, analysis)
-            - 💬 Text Generation
-            - 🧠 8B parameters with GPT-4 level performance
-            Enter your text or upload an image to start!
             """
         )
         with gr.Row():
             with gr.Column(scale=2):
-                text_input = gr.Textbox(
-                    label="Text Input",
-                    placeholder="Enter your question or prompt...",
-                    lines=3
-                )
-                image_input = gr.Image(
-                    label="Image Input (Optional)",
-                    type="pil"
-                )
                 with gr.Row():
-                    submit_btn = gr.Button("🚀 Generate", variant="primary")
-                    clear_btn = gr.Button("🗑️ Clear")
                 output = gr.Textbox(
-                    label="Response",
-                    lines=8,
-                    interactive=False
                 )
             with gr.Column(scale=1):
-                gr.Markdown("### ⚙️ Settings")
-                temperature = gr.Slider(
-                    label="Temperature",
-                    minimum=0.1,
-                    maximum=1.0,
-                    value=0.7,
-                    step=0.1,
-                    info="Higher = more creative"
-                )
-                top_p = gr.Slider(
-                    label="Top-p",
-                    minimum=0.1,
-                    maximum=1.0,
-                    value=0.9,
-                    step=0.05,
-                    info="Nucleus sampling"
-                )
-                max_new_tokens = gr.Slider(
-                    label="Max Tokens",
-                    minimum=50,
-                    maximum=1024,
-                    value=512,
-                    step=50,
-                    info="Maximum response length"
-                )
                 gr.Markdown(
                     """
-                    ### 📝 Tips:
-                    - For images: Upload and ask questions
-                    - Supports OCR and image analysis
-                    - Can handle multiple languages
                     """
                 )
-        # Event handlers
         submit_btn.click(
             fn=generate_response,
             inputs=[text_input, image_input, temperature, top_p, max_new_tokens],
@@ -263,31 +357,36 @@ def create_demo():
             api_name="generate"
         )
         clear_btn.click(
-            fn=lambda: (None, None, ""),
             inputs=[],
             outputs=[text_input, image_input, output]
         )
-        # Examples
-        gr.Examples(
-            examples=[
-                ["What is artificial intelligence?", None],
-                ["Explain quantum computing in simple terms", None],
-                ["Write a poem about nature", None],
-            ],
-            inputs=[text_input, image_input],
-            outputs=output,
-            fn=lambda t, i: generate_response(t, i, 0.7, 0.9, 512),
-            cache_examples=False
         )
     return demo
 if __name__ == "__main__":
     demo = create_demo()
     demo.launch(
         ssr_mode=False,
-        show_error=True
     )

 import gradio as gr
 import spaces
 from PIL import Image
+from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
 import warnings
 warnings.filterwarnings("ignore")
     print(f"Loading {MODEL_ID}...")
+    # استخدام float16 للتوافق مع ZeroGPU
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     try:
+        # تحميل tokenizer أولاً
         tokenizer = AutoTokenizer.from_pretrained(
             MODEL_ID,
             trust_remote_code=True,
             use_fast=False
         )
+        # تحميل النموذج مع trust_remote_code=True
         model = AutoModel.from_pretrained(
             MODEL_ID,
             trust_remote_code=True,
             torch_dtype=dtype,
             low_cpu_mem_usage=True,
+            attn_implementation="eager",
         ).eval()
         if torch.cuda.is_available():
         print("Model loaded successfully!")
     except Exception as e:
+        print(f"Error with AutoModel, trying AutoModelForCausalLM: {e}")
+        # محاولة بديلة مع AutoModelForCausalLM
         try:
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_ID,
+                trust_remote_code=True,  # مهم جداً!
                 torch_dtype=dtype,
                 low_cpu_mem_usage=True,
+                attn_implementation="eager"
             ).eval()
             if torch.cuda.is_available():
                 model = model.cuda()
+            print("Model loaded successfully with AutoModelForCausalLM!")
         except Exception as e2:
+            print(f"Failed to load model: {e2}")
+            raise RuntimeError(f"Could not load model: {e2}")
+# =========================================================
+# دالة معالجة الصور
+# =========================================================
+def process_image(image_input):
+    """معالجة الصورة للنموذج"""
+    if image_input is None:
+        return None
+    if isinstance(image_input, str):
+        return Image.open(image_input).convert('RGB')
+    else:
+        return image_input.convert('RGB')
 # =========================================================
         load_model()
         global model, tokenizer
+        # إعداد المدخلات
         if image_input is not None:
             # معالجة الصورة + النص
+            image = process_image(image_input)
             if not text_input:
                 text_input = "What is shown in this image? Please describe in detail."
+            # التحقق من وجود دالة chat في النموذج
+            if hasattr(model, 'chat'):
+                try:
+                    # استخدام دالة chat المخصصة
+                    msgs = [{"role": "user", "content": [image, text_input]}]
+                    with torch.no_grad():
+                        response = model.chat(
+                            image=image,
+                            msgs=msgs,
+                            tokenizer=tokenizer,
+                            sampling=True,
+                            temperature=temperature,
+                            top_p=top_p,
+                            max_new_tokens=max_new_tokens
+                        )
+                    return response
+                except Exception as e:
+                    print(f"Chat method failed: {e}")
+                    # السقوط إلى الطريقة العادية
+            # الطريقة البديلة للصور
+            # دمج النص مع وصف الصورة
+            prompt = f"Image: [Image will be processed]\n\nQuestion: {text_input}\n\nAnswer:"
+        else:
+            # نص فقط
+            prompt = text_input
+        # المعالجة العادية للنص
+        inputs = tokenizer(
+            prompt,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=2048
+        )
+        if torch.cuda.is_available():
+            inputs = {k: v.cuda() for k, v in inputs.items() if v is not None}
+        # إعدادات التوليد
+        gen_kwargs = {
+            "max_new_tokens": max_new_tokens,
+            "temperature": temperature if temperature > 0 else 1e-7,
+            "top_p": top_p,
+            "do_sample": temperature > 0,
+            "pad_token_id": tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id,
+            "eos_token_id": tokenizer.eos_token_id,
+        }
+        # التوليد
+        with torch.no_grad():
+            outputs = model.generate(**inputs, **gen_kwargs)
+        # فك التشفير
+        response = tokenizer.decode(
+            outputs[0][inputs['input_ids'].shape[1]:],
+            skip_special_tokens=True
+        )
+        return response.strip()
     except Exception as e:
         import traceback
         return f"Error: {str(e)}"
+# =========================================================
+# دوال مساعدة للواجهة
+# =========================================================
+def clear_all():
+    """مسح جميع المدخلات والمخرجات"""
+    return "", None, ""
+def update_examples_visibility(show_examples):
+    """تحديث رؤية الأمثلة"""
+    return gr.update(visible=show_examples)
 # =========================================================
 # واجهة Gradio
 # =========================================================
 def create_demo():
     """إنشاء واجهة Gradio البسيطة"""
+    with gr.Blocks(title="MiniCPM-o-2.6", css="""
+        .gradio-container {
+            max-width: 1200px;
+            margin: auto;
+        }
+        h1 {
+            text-align: center;
+        }
+        .contain {
+            background: white;
+            border-radius: 10px;
+            padding: 20px;
+        }
+    """) as demo:
         gr.Markdown(
             """
+            # 🤖 MiniCPM-o-2.6 - Multimodal AI Assistant
+            <div style="text-align: center;">
+                <p>
+                    <b>8B parameters model</b> with GPT-4 level performance<br>
+                    Supports: Text Generation, Image Understanding, OCR, and Multi-lingual conversations
+                </p>
+            </div>
             """
         )
         with gr.Row():
+            # العمود الرئيسي
             with gr.Column(scale=2):
+                with gr.Group():
+                    text_input = gr.Textbox(
+                        label="💭 Text Input",
+                        placeholder="Enter your question or prompt here...\nYou can ask about images, request text generation, or have a conversation.",
+                        lines=4,
+                        elem_id="text_input"
+                    )
+                    image_input = gr.Image(
+                        label="📷 Image Input (Optional)",
+                        type="pil",
+                        elem_id="image_input"
+                    )
                 with gr.Row():
+                    submit_btn = gr.Button(
+                        "🚀 Generate Response",
+                        variant="primary",
+                        scale=2
+                    )
+                    clear_btn = gr.Button(
+                        "🗑️ Clear All",
+                        variant="secondary",
+                        scale=1
+                    )
                 output = gr.Textbox(
+                    label="🤖 AI Response",
+                    lines=10,
+                    interactive=False,
+                    elem_id="output"
                 )
+            # عمود الإعدادات
             with gr.Column(scale=1):
+                with gr.Group():
+                    gr.Markdown("### ⚙️ Generation Settings")
+                    temperature = gr.Slider(
+                        label="Temperature",
+                        minimum=0.0,
+                        maximum=1.5,
+                        value=0.7,
+                        step=0.1,
+                        info="Controls randomness (0=deterministic, 1.5=very creative)"
+                    )
+                    top_p = gr.Slider(
+                        label="Top-p (Nucleus Sampling)",
+                        minimum=0.1,
+                        maximum=1.0,
+                        value=0.9,
+                        step=0.05,
+                        info="Controls diversity of output"
+                    )
+                    max_new_tokens = gr.Slider(
+                        label="Max New Tokens",
+                        minimum=50,
+                        maximum=2048,
+                        value=512,
+                        step=50,
+                        info="Maximum length of generated response"
+                    )
                 gr.Markdown(
                     """
+                    ### 📚 Quick Tips:
+                    **Text Generation:**
+                    - Ask questions
+                    - Request explanations
+                    - Generate creative content
+                    **Image Understanding:**
+                    - Upload an image
+                    - Ask about contents
+                    - Request OCR/text extraction
+                    - Get detailed descriptions
+                    **Languages:**
+                    - English, Chinese, Arabic
+                    - And many more!
                     """
                 )
+        # أمثلة
+        with gr.Group():
+            gr.Markdown("### 💡 Example Prompts")
+            gr.Examples(
+                examples=[
+                    ["Explain quantum computing in simple terms for a beginner.", None],
+                    ["Write a short story about a robot learning to paint.", None],
+                    ["What are the main differences between Python and JavaScript?", None],
+                    ["Create a healthy meal plan for one week.", None],
+                    ["Translate 'Hello, how are you?' to French, Spanish, and Arabic.", None],
+                ],
+                inputs=[text_input, image_input],
+                outputs=output,
+                fn=lambda t, i: generate_response(t, i, 0.7, 0.9, 512),
+                cache_examples=False,
+                label="Click any example to try it"
+            )
+        # ربط الأحداث
         submit_btn.click(
             fn=generate_response,
             inputs=[text_input, image_input, temperature, top_p, max_new_tokens],
             api_name="generate"
         )
+        text_input.submit(
+            fn=generate_response,
+            inputs=[text_input, image_input, temperature, top_p, max_new_tokens],
+            outputs=output
+        )
         clear_btn.click(
+            fn=clear_all,
             inputs=[],
             outputs=[text_input, image_input, output]
         )
+        # رسالة ترحيبية عند التحميل
+        demo.load(
+            lambda: gr.Info("Model is loading... This may take a moment on first use."),
+            inputs=None,
+            outputs=None
         )
     return demo
+# =========================================================
+# تشغيل التطبيق
+# =========================================================
 if __name__ == "__main__":
     demo = create_demo()
     demo.launch(
         ssr_mode=False,
+        show_error=True,
+        share=False
     )