Spaces:

boo4blue
/

UniversalAI

Runtime error

App Files Files Community

boo4blue commited on Sep 18, 2025

Commit

e051aaf

verified ·

1 Parent(s): a3465da

Update app.py

Browse files

Files changed (1) hide show

app.py +239 -257

app.py CHANGED Viewed

@@ -1,304 +1,286 @@
-# app.py
-# Universal AI for Hugging Face Spaces — text + optional image, memory, system prompt, and generation controls.
-# Works with both the Gradio UI and the Hugging Face Inference API.
-#
-# Inference API payloads:
-# - Simple (string):
-#   { "inputs": "Explain transformers in simple terms." }
-#
-# - Universal (JSON):
-#   {
-#     "mode": "chat",
-#     "inputs": "Describe this image and write a tweet about it.",
-#     "image": "<base64-encoded-image-optional>",
-#     "options": { "temperature": 0.7, "max_new_tokens": 256 },
-#     "system": "You are a concise, tactical assistant.",
-#     "reset": false
-#   }
 import os
-import io
-import json
-import base64
-from collections import deque
-from PIL import Image
 import gradio as gr
-from transformers import pipeline
-# -----------------------------
-# Model choices (tune as needed)
-# -----------------------------
-TEXT_MODEL = os.getenv("TEXT_MODEL", "mistralai/Mistral-7B-Instruct-v0.2")
-# Pick a lightweight image caption model so it runs on free tiers
-IMAGE_CAPTION_MODEL = os.getenv("IMAGE_MODEL", "nlpconnect/vit-gpt2-image-captioning")
 # -----------------------------
-# Load pipelines
 # -----------------------------
-text_gen = pipeline(
-    "text-generation",
-    model=TEXT_MODEL,
-    trust_remote_code=True,
-    device_map="auto"
 )
-# Lazily load image captioning when first used (saves cold start)
-_image_captioner = None
-def get_image_captioner():
-    global _image_captioner
-    if _image_captioner is None:
-        _image_captioner = pipeline("image-to-text", model=IMAGE_CAPTION_MODEL, device_map="auto")
-    return _image_captioner
 # -----------------------------
-# Memory and prompting
 # -----------------------------
-# Keep a short rolling memory of turns: [(user, assistant), ...]
-MEMORY_MAX_TURNS = int(os.getenv("MEMORY_TURNS", "6"))
-memory = deque(maxlen=MEMORY_MAX_TURNS)
-DEFAULT_SYSTEM_PROMPT = (
-    "You are UniversalAI: a concise, capable, and adaptive assistant. "
-    "Always be clear, practical, and accurate. If tools are unavailable, say so briefly then proceed with your best reasoning. "
-    "Use step-by-step explanations only when they add value."
 )
-def build_prompt(user_msg: str, system_prompt: str) -> str:
-    # Construct a clean, instruction-tuned style prompt.
-    # Mistral Instruct can respond well to plain text, but [INST] tags often help.
-    lines = []
-    sys = system_prompt.strip() if system_prompt else DEFAULT_SYSTEM_PROMPT
-    lines.append(f"<<SYS>>\n{sys}\n<</SYS>>")
-    for u, a in list(memory):
-        lines.append(f"[INST] {u.strip()} [/INST]\n{a.strip()}")
     lines.append(f"[INST] {user_msg.strip()} [/INST]\n")
     return "\n".join(lines)
 # -----------------------------
-# Utilities
 # -----------------------------
-def ensure_pil_image(img_input):
-    # Handles either Gradio image (PIL) or base64 string
-    if img_input is None:
-        return None
-    if isinstance(img_input, Image.Image):
-        return img_input
-    if isinstance(img_input, str):
-        try:
-            # If it's a data URL, strip the prefix
-            if img_input.startswith("data:"):
-                img_input = img_input.split(",", 1)[1]
-            data = base64.b64decode(img_input)
-            return Image.open(io.BytesIO(data)).convert("RGB")
-        except Exception:
-            return None
-    return None
-def caption_image(pil_img):
-    cap = get_image_captioner()
-    try:
-        result = cap(pil_img)
-        if isinstance(result, list) and len(result) and "generated_text" in result[0]:
-            return result[0]["generated_text"]
-        # Some image-to-text pipelines return a string directly
-        if isinstance(result, str):
-            return result
-    except Exception as e:
-        return f"(Image captioning failed: {e})"
-    return "(No caption generated)"
-def generate_text(prompt, temperature=0.7, max_new_tokens=256, top_p=0.9, do_sample=True):
-    out = text_gen(
-        prompt,
-        temperature=float(temperature),
         max_new_tokens=int(max_new_tokens),
-        top_p=float(top_p),
-        do_sample=bool(do_sample),
-        pad_token_id=50256  # safe default for many GPT-like models
     )
-    # Pipeline returns a list of dicts with 'generated_text'
-    return out[0]["generated_text"]
-def extract_assistant_reply(full_generated_text: str, user_prompt: str) -> str:
-    # Heuristic: get only the text after the final user [INST] block.
-    # If tags not found, return the full generated text.
-    try:
-        marker = f"[INST] {user_prompt.strip()} [/INST]"
-        if marker in full_generated_text:
-            return full_generated_text.split(marker, 1)[-1].strip()
-        return full_generated_text.strip()
-    except Exception:
-        return full_generated_text.strip()
 # -----------------------------
-# Core handler (works for both UI and API)
 # -----------------------------
-def handle_request(
-    user_input: str = "",
-    image_input=None,
-    temperature: float = 0.7,
-    max_new_tokens: int = 256,
-    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
-    reset_memory: bool = False
-):
-    # Reset memory if requested
-    if reset_memory:
-        memory.clear()
-    # If image exists, caption it and augment the user input
-    pil_img = ensure_pil_image(image_input)
-    vision_context = ""
-    if pil_img is not None:
-        caption = caption_image(pil_img)
-        vision_context = f"\n[Image context]: {caption}"
-    final_user = (user_input or "").strip()
-    if vision_context:
-        final_user = f"{final_user}\n{vision_context}".strip()
-    # Build final prompt with system + memory
-    full_prompt = build_prompt(final_user, system_prompt)
-    # Generate
-    gen_text = generate_text(
-        full_prompt,
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=0.9,
-        do_sample=True
-    )
-    assistant = extract_assistant_reply(gen_text, final_user)
-    # Update memory
-    if final_user:
-        memory.append((final_user, assistant))
-    return assistant
 # -----------------------------
-# Inference API adapter
 # -----------------------------
-# This lets you send either a simple string or a JSON object in "inputs".
-# If "inputs" is dict-like JSON, we extract 'mode', 'image', 'options', etc.
-def hf_api_predict(inputs):
     try:
-        # Case 1: inputs is already a dict-like object (Gradio may pass parsed JSON)
-        payload = inputs if isinstance(inputs, dict) else None
-        # Case 2: inputs is a string that might be JSON
-        if payload is None and isinstance(inputs, str) and inputs.strip().startswith("{"):
-            payload = json.loads(inputs)
-        if payload is None:
-            # Treat as plain prompt
-            return handle_request(user_input=str(inputs))
-        # Extract universal fields
-        mode = payload.get("mode", "chat")
-        system = payload.get("system", DEFAULT_SYSTEM_PROMPT)
-        reset = bool(payload.get("reset", False))
-        options = payload.get("options", {}) or {}
-        # Inputs can be a string or object
-        user_msg = payload.get("inputs", "")
-        image_b64 = payload.get("image", None)
-        temperature = float(options.get("temperature", 0.7))
-        max_new_tokens = int(options.get("max_new_tokens", 256))
-        # Run
-        reply = handle_request(
-            user_input=user_msg,
-            image_input=image_b64,
-            temperature=temperature,
-            max_new_tokens=max_new_tokens,
-            system_prompt=system,
-            reset_memory=reset
-        )
-        return reply
     except Exception as e:
-        return f"(Error parsing/processing request: {e})"
 # -----------------------------
-# Gradio UI
 # -----------------------------
-with gr.Blocks(title="UniversalAI — Text + Image, Memory, Controls") as demo:
-    gr.Markdown("## UniversalAI — Text + Image, Memory, Controls")
     with gr.Row():
-        with gr.Column():
-            sys_box = gr.Textbox(
-                label="System prompt",
-                value=DEFAULT_SYSTEM_PROMPT,
-                lines=3
-            )
-            prompt_box = gr.Textbox(
-                label="Your message",
-                placeholder="Ask anything… (You can also attach an image)",
-                lines=4
-            )
-            image_box = gr.Image(
-                label="Optional image",
-                type="pil"
             )
             with gr.Row():
-                temp_slider = gr.Slider(
-                    minimum=0.1, maximum=1.2, value=0.7, step=0.05,
-                    label="Creativity (temperature)"
-                )
-                max_tokens_slider = gr.Slider(
-                    minimum=32, maximum=1024, value=256, step=16,
-                    label="Max new tokens"
                 )
-            reset_chk = gr.Checkbox(
-                label="Reset memory before this message",
-                value=False
-            )
-            submit_btn = gr.Button("Send", variant="primary")
-            clear_btn = gr.Button("Clear memory", variant="secondary")
-        with gr.Column():
-            output_box = gr.Textbox(
-                label="Assistant",
-                lines=20
             )
-    def ui_send(system, prompt, image, temp, max_new, reset):
-        reply = handle_request(
-            user_input=prompt or "",
-            image_input=image,
-            temperature=temp,
-            max_new_tokens=int(max_new),
-            system_prompt=system or DEFAULT_SYSTEM_PROMPT,
-            reset_memory=bool(reset)
-        )
-        return reply
-    def ui_clear():
-        memory.clear()
-        return "Memory cleared."
-    submit_btn.click(
-        fn=ui_send,
-        inputs=[sys_box, prompt_box, image_box, temp_slider, max_tokens_slider, reset_chk],
-        outputs=[output_box]
     )
-    clear_btn.click(
-        fn=ui_clear,
-        inputs=[],
-        outputs=[output_box]
     )
-    # Expose a simple API endpoint for HF Inference API callers:
-    # Map a Textbox "inputs" to our universal parser.
-    # This keeps the official /models/<user>/<space> endpoint working with JSON too.
-    api_in = gr.Textbox(label="API (inputs)", visible=False)
-    api_out = gr.Textbox(label="API (outputs)", visible=False)
-    demo.load(fn=lambda: "", inputs=None, outputs=None)  # no-op to ensure Blocks initializes
-    demo.add_api_route("/predict", hf_api_predict, inputs=api_in, outputs=api_out)  # Gradio 4.x
 if __name__ == "__main__":
-    demo.launch()

 import os
+import time
 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 # -----------------------------
+# Config
 # -----------------------------
+DEFAULT_MODEL = os.getenv("TEXT_MODEL", "mistralai/Mistral-7B-Instruct-v0.2")
+DEFAULT_SYSTEM = (
+    "You are UniversalAI — a concise, capable, adaptive assistant. "
+    "Answer clearly, use Markdown for structure, show code in fenced blocks. "
+    "Ask clarifying questions when needed. Keep answers tight but complete."
 )
+DEFAULT_TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
+DEFAULT_MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
+# Safety pad token for many GPT-like models
+DEFAULT_PAD_TOKEN_ID = 50256
 # -----------------------------
+# Load model
 # -----------------------------
+torch.set_grad_enabled(False)
+tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL, use_fast=True, trust_remote_code=True)
+if tokenizer.pad_token_id is None:
+    tokenizer.pad_token_id = DEFAULT_PAD_TOKEN_ID
+model = AutoModelForCausalLM.from_pretrained(
+    DEFAULT_MODEL,
+    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto",
+    trust_remote_code=True
 )
+# -----------------------------
+# Prompt building (ChatML/INST Hybrid)
+# -----------------------------
+def build_prompt(system_prompt: str, history: list[tuple[str, str]], user_msg: str) -> str:
+    # history is list of (user, assistant)
+    sys = system_prompt.strip() if system_prompt else DEFAULT_SYSTEM
+    lines = [f"<<SYS>>\n{sys}\n<</SYS>>"]
+    for u, a in history:
+        u = (u or "").strip()
+        a = (a or "").strip()
+        if not u and not a:
+            continue
+        lines.append(f"[INST] {u} [/INST]\n{a}")
     lines.append(f"[INST] {user_msg.strip()} [/INST]\n")
     return "\n".join(lines)
 # -----------------------------
+# Generation (streaming)
 # -----------------------------
+def stream_generate(
+    prompt: str,
+    temperature: float,
+    max_new_tokens: int,
+):
+    inputs = tokenizer(prompt, return_tensors="pt")
+    for k in inputs:
+        inputs[k] = inputs[k].to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    gen_kwargs = dict(
+        **inputs,
+        streamer=streamer,
+        do_sample=True,
+        temperature=float(max(0.01, temperature)),
+        top_p=0.9,
         max_new_tokens=int(max_new_tokens),
+        repetition_penalty=1.05,
+        pad_token_id=tokenizer.pad_token_id,
     )
+    # Run generation in a background thread so we can yield tokens
+    import threading
+    thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
+    thread.start()
+    partial = ""
+    for new_text in streamer:
+        partial += new_text
+        yield partial
 # -----------------------------
+# Slash commands
 # -----------------------------
+def apply_slash_commands(user_msg: str, system_prompt: str, history: list[tuple[str, str]]):
+    msg = (user_msg or "").strip()
+    sys = system_prompt
+    if msg.lower().startswith("/reset"):
+        return "", sys, [], "Memory cleared."
+    if msg.lower().startswith("/system:"):
+        new_sys = msg.split(":", 1)[1].strip()
+        if new_sys:
+            return "", new_sys, history, "System prompt updated."
+        return msg, sys, history, "No system text provided."
+    return msg, sys, history, None
 # -----------------------------
+# Chat handlers
 # -----------------------------
+def chat_submit(
+    user_msg, chat_history, system_prompt, temperature, max_new_tokens, last_user
+):
+    # Initialize states
+    chat_history = chat_history or []
+    last_user = ""
+    # Slash commands
+    processed_msg, new_system, new_history, note = apply_slash_commands(user_msg, system_prompt, chat_history)
+    if processed_msg == "" and note is not None:
+        # Command-only case: show system note
+        chat_history.append((user_msg, note))
+        return "", chat_history, new_system, last_user
+    # Build prompt
+    prompt = build_prompt(new_system, new_history, processed_msg)
+    # Add placeholder for streaming
+    new_history.append((processed_msg, ""))
+    # Start streaming
+    stream = stream_generate(prompt, temperature, max_new_tokens)
+    partial = ""
+    for chunk in stream:
+        partial = chunk
+        # Update the last assistant message
+        new_history[-1] = (processed_msg, partial)
+        yield "", new_history, new_system, processed_msg  # keep last_user for regenerate
+def regenerate(chat_history, system_prompt, temperature, max_new_tokens, last_user):
+    chat_history = chat_history or []
+    if not chat_history:
+        return chat_history
+    # last turn was assistant; rebuild by removing it and re-answering last_user
+    # Find last_user from state
+    user_msg = last_user or (chat_history[-1][0] if chat_history else "")
+    if not user_msg:
+        return chat_history
+    # Remove last assistant turn if it matches last_user
+    if chat_history and chat_history[-1][0] == user_msg:
+        chat_history.pop()
+    # Build prompt from remaining history
+    prompt = build_prompt(system_prompt, chat_history, user_msg)
+    chat_history.append((user_msg, ""))
+    stream = stream_generate(prompt, temperature, max_new_tokens)
+    partial = ""
+    for chunk in stream:
+        partial = chunk
+        chat_history[-1] = (user_msg, partial)
+        yield chat_history
+def clear_memory():
+    return [], ""
+# -----------------------------
+# Inference API adapter (so /models/<user>/<space> works)
+# Accepts either plain string or JSON:
+# { "inputs": "...", "system": "...", "options": { "temperature": 0.7, "max_new_tokens": 256 }, "history": [...] }
+# -----------------------------
+def hf_inference_api(inputs):
     try:
+        # If inputs is dict-like, use it; else treat as plain prompt
+        if isinstance(inputs, dict):
+            prompt_text = inputs.get("inputs", "")
+            system = inputs.get("system", DEFAULT_SYSTEM)
+            options = inputs.get("options", {}) or {}
+            temp = float(options.get("temperature", DEFAULT_TEMPERATURE))
+            max_new = int(options.get("max_new_tokens", DEFAULT_MAX_NEW_TOKENS))
+            history = inputs.get("history", [])
+        else:
+            prompt_text = str(inputs)
+            system = DEFAULT_SYSTEM
+            temp = DEFAULT_TEMPERATURE
+            max_new = DEFAULT_MAX_NEW_TOKENS
+            history = []
+        prompt = build_prompt(system, history, prompt_text)
+        out = ""
+        for chunk in stream_generate(prompt, temp, max_new):
+            out = chunk
+        # Return final text
+        return out
     except Exception as e:
+        return f"(Error: {e})"
 # -----------------------------
+# UI (ChatGPT-like)
 # -----------------------------
+CSS = """
+:root { --radius: 14px; }
+#chatbot { height: 70vh !important; }
+.gradio-container { max-width: 1200px !important; margin: auto; }
+"""
+with gr.Blocks(title="UniversalAI — ChatGPT‑style", css=CSS, theme=gr.themes.Soft()) as demo:
+    gr.Markdown("### UniversalAI — ChatGPT‑style")
     with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(
+                label="Chat",
+                bubble_full_width=False,
+                render_markdown=True,
+                likeable=True,
+                layout="bubble",
+                height=520,
+                elem_id="chatbot"
             )
             with gr.Row():
+                user_box = gr.Textbox(
+                    placeholder="Message UniversalAI…  (commands: /reset, /system: <prompt>)",
+                    show_label=False,
+                    lines=3
                 )
+            with gr.Row():
+                send_btn = gr.Button("Send", variant="primary")
+                regen_btn = gr.Button("Regenerate", variant="secondary")
+                clear_btn = gr.Button("Clear", variant="secondary")
+        with gr.Column(scale=2):
+            sys_box = gr.Textbox(
+                value=DEFAULT_SYSTEM,
+                label="System prompt",
+                lines=6
             )
+            temp_slider = gr.Slider(
+                minimum=0.1, maximum=1.2, value=DEFAULT_TEMPERATURE, step=0.05,
+                label="Creativity (temperature)"
+            )
+            max_tokens = gr.Slider(
+                minimum=64, maximum=2048, value=DEFAULT_MAX_NEW_TOKENS, step=32,
+                label="Max new tokens"
+            )
+            gr.Markdown("> Tip: Use /reset to clear memory. Use /system: to change the assistant persona on the fly.")
+    # Session state
+    state_history = gr.State([])     # list[(user, assistant)]
+    state_last_user = gr.State("")   # last user message for regenerate
+    # Wiring
+    send_evt = send_btn.click(
+        fn=chat_submit,
+        inputs=[user_box, state_history, sys_box, temp_slider, max_tokens, state_last_user],
+        outputs=[user_box, chatbot, sys_box, state_last_user],
+        queue=True
     )
+    send_evt.then(lambda h: h, inputs=chatbot, outputs=state_history)
+    # Allow Enter to send
+    enter_evt = user_box.submit(
+        fn=chat_submit,
+        inputs=[user_box, state_history, sys_box, temp_slider, max_tokens, state_last_user],
+        outputs=[user_box, chatbot, sys_box, state_last_user],
+        queue=True
+    )
+    enter_evt.then(lambda h: h, inputs=chatbot, outputs=state_history)
+    regen_stream = regen_btn.click(
+        fn=regenerate,
+        inputs=[state_history, sys_box, temp_slider, max_tokens, state_last_user],
+        outputs=[chatbot],
+        queue=True
     )
+    regen_stream.then(lambda h: h, inputs=chatbot, outputs=state_history)
+    clear_btn.click(fn=clear_memory, inputs=None, outputs=[chatbot, state_last_user])
+    # Expose a simple API route for Inference API callers
+    api_in = gr.Textbox(visible=False)
+    api_out = gr.Textbox(visible=False)
+    demo.add_api_route("/predict", hf_inference_api, inputs=api_in, outputs=api_out)
 if __name__ == "__main__":
+    demo.queue().launch()