Spaces:

anaspro
/

chatbox

Runtime error

App Files Files Community

anaspro commited on Oct 29

Commit

eef5702

verified ·

1 Parent(s): 8070fa9

Update app2.py

Browse files

Files changed (1) hide show

app2.py +208 -159

app2.py CHANGED Viewed

@@ -1,179 +1,228 @@
-# -*- coding: utf-8 -*-
 import os
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import spaces
-# Load system prompt from file
-def load_system_prompt():
-    try:
-        with open('system_prompt.txt', 'r', encoding='utf-8') as f:
-            return f.read().strip()
-    except FileNotFoundError:
-        return "أنت مساعد ذكي مفيد."
-DEFAULT_SYSTEM_PROMPT = load_system_prompt()
-model_path = "inceptionai/jais-adapted-7b-chat"
-# Jais chat prompts from documentation
-prompt_eng = """### Instruction:Your name is 'Jais', and you are named after Jebel Jais, the highest mountain in UAE. You were made by 'Inception' in the UAE. You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Complete the conversation between [|Human|] and [|AI|]:
-### Input: [|Human|] {Question}
-[|AI|]
-### Response :"""
-prompt_ar = """### Instruction:اسمك "جيس" وسميت على اسم جبل جيس اعلى جبل في الامارات. تم بنائك بواسطة Inception في الإمارات. أنت مساعد مفيد ومحترم وصادق. أجب دائمًا بأكبر قدر ممكن من المساعدة، مع الحفاظ على البقاء أمناً. أكمل المحادثة بين [|Human|] و[|AI|] :
-### Input:[|Human|] {Question}
-[|AI|]
-### Response :"""
-# إذا كان فيه HF_TOKEN في البيئة
-hf_token = os.getenv("HF_TOKEN")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token)
-model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True, token=hf_token)
-if tokenizer.pad_token is None:
-    tokenizer.pad_token = tokenizer.eos_token
-def get_response(text, tokenizer=tokenizer, model=model):
-    """نفس الدالة من documentation مع تعديل لـ chat model"""
-    tokenized = tokenizer(text, return_tensors="pt")
-    input_ids, attention_mask = tokenized['input_ids'].to(device), tokenized['attention_mask'].to(device)
-    input_len = input_ids.shape[-1]
-    generate_ids = model.generate(
-        input_ids,
-        attention_mask=attention_mask,
-        top_p=0.9,
-        temperature=0.3,
-        max_length=2048,
-        min_length=input_len + 4,
-        repetition_penalty=1.2,
-        do_sample=True,
-        pad_token_id=tokenizer.pad_token_id
-    )
-    response = tokenizer.batch_decode(
-        generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
-    )[0]
-    response = response.split("### Response :")[-1].lstrip()
-    return response
-def format_conversation_history(chat_history):
     messages = []
-    for item in chat_history:
-        role = item["role"]
-        content = item["content"]
-        if isinstance(content, list):
-            content = content[0]["text"] if content and "text" in content[0] else str(content)
-        messages.append({"role": role, "content": content})
     return messages
-def detect_language(text):
-    """Simple language detection - Arabic vs English"""
-    arabic_chars = sum(1 for char in text if '\u0600' <= char <= '\u06FF')
-    total_chars = len(text.replace(' ', ''))
-    if total_chars == 0:
-        return 'ar'  # default to Arabic
-    arabic_ratio = arabic_chars / total_chars
-    return 'ar' if arabic_ratio > 0.3 else 'en'
 @spaces.GPU()
-def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
-    # Detect language of the current question
-    lang = detect_language(input_data)
-    prompt_template = prompt_ar if lang == 'ar' else prompt_eng
-    # Build conversation for Jais format
-    conversation_parts = []
-    # Add system prompt as part of the instruction (keep it short for Jais)
-    system_instruction = "اسمك \"أليكس\" وأنت مساعد خدمة العملاء في شركة TechSolutions. مهمتك مساعدة العملاء في حل مشاكلهم مع المنتجات والإجابة عن أسئلتهم حول الخدمات. كن ودوداً وصبوراً ومحترماً. أجب بالعربية أو الإنجليزية حسب تفضيل العميل. ابدأ بالتحية وكن مباشراً في الحلول."
-    # Add chat history
-    if chat_history:
-        for item in chat_history:
-            role = item["role"]
-            content = item["content"]
-            if isinstance(content, list):
-                content = content[0]["text"] if content and "text" in content[0] else str(content)
-            if role == "user":
-                conversation_parts.append(f"[|Human|] {content}")
-            elif role == "assistant":
-                conversation_parts.append(f"[|AI|] {content}")
-    # Add current user message
-    conversation_parts.append(f"[|Human|] {input_data}")
-    conversation_parts.append("[|AI|]")
-    # Join conversation
-    conversation = "\n".join(conversation_parts)
-    # Create full prompt using Jais format with our system prompt
-    full_prompt = f"### Instruction:{system_instruction}\n### Input:{conversation}\n### Response :"
-    try:
-        # استخدام دالة get_response من documentation
-        response = get_response(full_prompt)
-        # استخراج الرد الجديد فقط (بعد "### Response :")
-        if "### Response :" in response:
-            response = response.split("### Response :")[-1].strip()
-        if not response:
-            response = "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
-        yield response
-    except Exception as e:
-        print(f"Error in generate_response: {e}")
-        import traceback
-        print(traceback.format_exc())
-        yield "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
 demo = gr.ChatInterface(
-    fn=generate_response,
-    additional_inputs=[
-        gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
-        gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=0.7),
-        gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
-        gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=50),
-        gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
-    ],
-    examples=[
-        [{"text": "النت عندي معطل من الصبح، تقدر تساعدني؟"}],
-        [{"text": "عندي مشكلة بالاتصال بالواي فاي"}],
-        [{"text": "شنو الباقات المتوفرة عندكم؟"}],
-        [{"text": "كيف أعيد ضبط الجهاز؟"}],
-        [{"text": "My device is not working properly"}],
-    ],
-    cache_examples=False,
     type="messages",
-    title="دعم عملاء TechSolutions - مساعد أليكس (العراقي)",
-    description="""🤖 مساعد خدمة عملاء ذكي لـ TechSolutions
-✨ المميزات:
-- 🌐 دعم ثنائي اللغة (عربي وإنجليزي)
-- 💬 لهجة محادثة طبيعية
-- 🔧 دعم فني واستكشاف الأخطاء
-- 📋 معلومات الخدمات والإرشاد
-- 🎯 مدعوم بـ موديل Unsloth Meta-Llama-3.1-8B-Instruct (مع تحسينات الأداء)
-احجي مع أليكس لحل مشاكلك التقنية، استفسر عن الخدمات، أو احصل على معلومات المنتجات.""",
-    fill_height=True,
-    textbox=gr.Textbox(
-        label="اكتب رسالتك هنا",
-        placeholder="مثال: عندي مشكلة بالجهاز..."
     ),
-    stop_btn="إيقاف التوليد",
-    multimodal=False,
-    theme=gr.themes.Soft()
 )
 if __name__ == "__main__":

 import os
+import pathlib
+import tempfile
+from collections.abc import Iterator
+from threading import Thread
+import av
 import gradio as gr
 import spaces
+import torch
+from transformers import AutoModelForImageTextToText, AutoProcessor
+from transformers.generation.streamers import TextIteratorStreamer
+# Model configuration
+model_id = "anaspro/Shako-4B-it-v2"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForImageTextToText.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.bfloat16
+)
+# Supported file types
+IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
+VIDEO_FILE_TYPES = (".mp4", ".mov", ".webm")
+AUDIO_FILE_TYPES = (".mp3", ".wav")
+# Video processing settings
+TARGET_FPS = int(os.getenv("TARGET_FPS", "3"))
+MAX_FRAMES = int(os.getenv("MAX_FRAMES", "30"))
+MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "10_000"))
+def get_file_type(path: str) -> str:
+    if path.endswith(IMAGE_FILE_TYPES):
+        return "image"
+    if path.endswith(VIDEO_FILE_TYPES):
+        return "video"
+    if path.endswith(AUDIO_FILE_TYPES):
+        return "audio"
+    error_message = f"Unsupported file type: {path}"
+    raise ValueError(error_message)
+def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
+    video_count = 0
+    non_video_count = 0
+    for path in paths:
+        if path.endswith(VIDEO_FILE_TYPES):
+            video_count += 1
+        else:
+            non_video_count += 1
+    return video_count, non_video_count
+def validate_media_constraints(message: dict) -> bool:
+    video_count, non_video_count = count_files_in_new_message(message["files"])
+    if video_count > 1:
+        gr.Warning("Only one video is supported.")
+        return False
+    if video_count == 1 and non_video_count > 0:
+        gr.Warning("Mixing images and videos is not allowed.")
+        return False
+    return True
+def extract_frames_to_tempdir(
+    video_path: str,
+    target_fps: float,
+    max_frames: int | None = None,
+    parent_dir: str | None = None,
+    prefix: str = "frames_",
+) -> str:
+    temp_dir = tempfile.mkdtemp(prefix=prefix, dir=parent_dir)
+    container = av.open(video_path)
+    video_stream = container.streams.video[0]
+    if video_stream.duration is None or video_stream.time_base is None:
+        raise ValueError("video_stream is missing duration or time_base")
+    time_base = video_stream.time_base
+    duration = float(video_stream.duration * time_base)
+    interval = 1.0 / target_fps
+    total_frames = int(duration * target_fps)
+    if max_frames is not None:
+        total_frames = min(total_frames, max_frames)
+    target_times = [i * interval for i in range(total_frames)]
+    target_index = 0
+    for frame in container.decode(video=0):
+        if frame.pts is None:
+            continue
+        timestamp = float(frame.pts * time_base)
+        if target_index < len(target_times) and abs(timestamp - target_times[target_index]) < (interval / 2):
+            frame_path = pathlib.Path(temp_dir) / f"frame_{target_index:04d}.jpg"
+            frame.to_image().save(frame_path)
+            target_index += 1
+            if max_frames is not None and target_index >= max_frames:
+                break
+    container.close()
+    return temp_dir
+def process_new_user_message(message: dict) -> list[dict]:
+    if not message["files"]:
+        return [{"type": "text", "text": message["text"]}]
+    file_types = [get_file_type(path) for path in message["files"]]
+    if len(file_types) == 1 and file_types[0] == "video":
+        gr.Info(f"Video will be processed at {TARGET_FPS} FPS, max {MAX_FRAMES} frames in this Space.")
+        temp_dir = extract_frames_to_tempdir(
+            message["files"][0],
+            target_fps=TARGET_FPS,
+            max_frames=MAX_FRAMES,
+        )
+        paths = sorted(pathlib.Path(temp_dir).glob("*.jpg"))
+        return [
+            {"type": "text", "text": message["text"]},
+            *[{"type": "image", "image": path.as_posix()} for path in paths],
+        ]
+    return [
+        {"type": "text", "text": message["text"]},
+        *[{"type": file_type, file_type: path} for path, file_type in zip(message["files"], file_types, strict=True)],
+    ]
+def process_history(history: list[dict]) -> list[dict]:
     messages = []
+    current_user_content: list[dict] = []
+    for item in history:
+        if item["role"] == "assistant":
+            if current_user_content:
+                messages.append({"role": "user", "content": current_user_content})
+                current_user_content = []
+            messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
+        else:
+            content = item["content"]
+            if isinstance(content, str):
+                current_user_content.append({"type": "text", "text": content})
+            else:
+                filepath = content[0]
+                file_type = get_file_type(filepath)
+                current_user_content.append({"type": file_type, file_type: filepath})
     return messages
 @spaces.GPU()
+@torch.inference_mode()
+def generate(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
+    if not validate_media_constraints(message):
+        yield ""
+        return
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
+    messages.extend(process_history(history))
+    messages.append({"role": "user", "content": process_new_user_message(message)})
+    inputs = processor.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt",
+    )
+    n_tokens = inputs["input_ids"].shape[1]
+    if n_tokens > MAX_INPUT_TOKENS:
+        gr.Warning(
+            f"Input too long. Max {MAX_INPUT_TOKENS} tokens. Got {n_tokens} tokens. This limit is set to avoid CUDA out-of-memory errors in this Space."
+        )
+        yield ""
+        return
+    inputs = inputs.to(device=model.device, dtype=torch.bfloat16)
+    streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        inputs,
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=False,
+        disable_compile=True,
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    output = ""
+    for delta in streamer:
+        output += delta
+        yield output
+# Examples for the chat interface (with additional inputs: system_prompt, max_new_tokens)
+examples = [
+    ["انت موديل عراقي تحكي هعراقي فقط وتكون ترفيهي", 700]
+]
+# Create the chat interface
 demo = gr.ChatInterface(
+    fn=generate,
     type="messages",
+    textbox=gr.MultimodalTextbox(
+        file_types=list(IMAGE_FILE_TYPES + VIDEO_FILE_TYPES + AUDIO_FILE_TYPES),
+        file_count="multiple",
+        autofocus=True,
     ),
+    multimodal=True,
+    additional_inputs=[
+        gr.Textbox(label="System Prompt", value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا"),
+        gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
+    ],
+    title="Shako IRAQI AI",
+    examples=examples,
+    stop_btn=False,
 )
 if __name__ == "__main__":