| import os | |
| import re | |
| import json | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from daggr import GradioNode, FnNode, Graph | |
| print("BOOT: image → kosmos → zephyr (daggr, clean, commented)") | |
| # ============================================================================= | |
| # DEFAULT SYSTEM PROMPT | |
| # ============================================================================= | |
| # EN: | |
| # This is the default system prompt used to instruct the LLM. | |
| # It describes the task (generate a chatbot persona) and enforces | |
| # a strict output format (Title / System prompt / Example input). | |
| # | |
| # FR: | |
| # Prompt système par défaut envoyé au LLM. | |
| # Il décrit précisément la tâche et impose un format de sortie strict. | |
| # ============================================================================= | |
| AGENT_MAKER_SYS = """ | |
| You are an AI whose job is to help users design a chatbot. | |
| The chatbot’s personality must be inspired by the character and atmosphere | |
| described by the user (based on an image description provided separately). | |
| You must: | |
| - Respond succinctly in a friendly tone | |
| - Generate exactly three things: | |
| 1) A catchy chatbot title | |
| 2) A system prompt defining the chatbot’s personality and behavior | |
| 3) A very short example of what a *future user* might say to this chatbot | |
| Important rules: | |
| - The system prompt must NOT mention any image or visual input | |
| - The example input must represent a realistic first message | |
| that an end-user would send when chatting with this bot | |
| - STOP immediately after the example input | |
| - Follow EXACTLY this format (including labels and line breaks): | |
| "Sure, I'd be happy to help you build a bot! I'm generating a title, a system prompt, | |
| and an example user message. How do they sound? | |
| Title: ... | |
| System prompt: ... | |
| Example input: ..." | |
| """.strip() | |
| # ============================================================================= | |
| # NODE 0 — IMAGE INPUT (PASSTHROUGH) | |
| # ============================================================================= | |
| # EN: | |
| # Daggr requires explicit nodes. | |
| # This FnNode exists only to expose the image upload as a visible node | |
| # and to make data flow explicit in the graph. | |
| # | |
| # FR: | |
| # Node pure "pass-through" pour exposer l’upload d’image dans le graphe. | |
| # Aucun traitement ici, juste de la lisibilité et du debugging. | |
| # ============================================================================= | |
| def passthrough_image(image): | |
| print("IMAGE INPUT:", image) | |
| return image | |
| image_input_node = FnNode( | |
| fn=passthrough_image, | |
| inputs={ | |
| "image": gr.Image( | |
| label="Upload image", | |
| type="filepath", | |
| ) | |
| }, | |
| outputs={ | |
| "image": gr.Image(label="Input image"), | |
| }, | |
| ) | |
| # ============================================================================= | |
| # KOSMOS POSTPROCESS — CLEAN CAPTION | |
| # ============================================================================= | |
| # EN: | |
| # Kosmos-2 returns: | |
| # 0) image (FileData dict) | |
| # 1) highlighted_text (list of token dicts) | |
| # 2) entities (string) | |
| # | |
| # Daggr passes all outputs positionally to postprocess. | |
| # We collapse everything into a single clean caption string. | |
| # | |
| # FR: | |
| # Le postprocess reconstruit une phrase lisible à partir des tokens | |
| # et supprime le préfixe "Describe this image in detail:" si présent. | |
| # ============================================================================= | |
| def kosmos_postprocess(image_out, highlighted_text, *rest): | |
| if not isinstance(highlighted_text, list): | |
| return "" | |
| caption = "".join( | |
| item.get("token", "") | |
| for item in highlighted_text | |
| if isinstance(item, dict) | |
| ).strip() | |
| # Remove Kosmos "Detailed" prefix if present | |
| caption = re.sub( | |
| r"^\s*Describe\s+this\s+image\s+in\s+details?\s*:\s*", | |
| "", | |
| caption, | |
| flags=re.IGNORECASE, | |
| ).strip() | |
| print("CAPTION:", repr(caption)) | |
| return caption | |
| # ============================================================================= | |
| # KOSMOS NODE | |
| # ============================================================================= | |
| # EN: | |
| # Calls the Kosmos-2 Gradio Space via API. | |
| # Output is reduced to ONE value: a clean caption string. | |
| # | |
| # FR: | |
| # Appel du Space Kosmos-2 via GradioNode. | |
| # La sortie est volontairement simplifiée à une seule string. | |
| # ============================================================================= | |
| desc_type = gr.Radio( | |
| ["Brief", "Detailed"], | |
| label="Description Type", | |
| value="Detailed", | |
| ) | |
| kosmos_caption = GradioNode( | |
| space_or_url="fffiloni/Kosmos-2-API", | |
| api_name="/generate_predictions", | |
| inputs={ | |
| "image_input": image_input_node.image, | |
| "text_input": desc_type, | |
| }, | |
| postprocess=kosmos_postprocess, | |
| outputs={ | |
| "caption": gr.Textbox(label="Image caption", lines=3), | |
| }, | |
| ) | |
| # ============================================================================= | |
| # BUILD CONVERSATION PAYLOAD (STRING) | |
| # ============================================================================= | |
| # EN: | |
| # Zephyr (via InferenceClient) expects a list[dict] messages. | |
| # Daggr UI does NOT handle gr.JSON reliably yet. | |
| # We therefore pass a JSON STRING and parse it inside the LLM node. | |
| # | |
| # FR: | |
| # On transporte la conversation sous forme de string JSON, | |
| # pour éviter les problèmes d’UI liés à gr.JSON dans daggr. | |
| # ============================================================================= | |
| def build_messages_json(caption: str, system_prompt: str) -> str: | |
| messages = [ | |
| {"role": "system", "content": (system_prompt or "").strip()}, | |
| {"role": "user", "content": (caption or "").strip()}, | |
| ] | |
| payload = json.dumps(messages, ensure_ascii=False) | |
| print("MESSAGES_JSON head:", repr(payload[:200])) | |
| return payload | |
| system_prompt = gr.Textbox( | |
| label="System prompt", | |
| value=AGENT_MAKER_SYS, | |
| lines=14, | |
| ) | |
| messages_node = FnNode( | |
| fn=build_messages_json, | |
| inputs={ | |
| "caption": kosmos_caption.caption, | |
| "system_prompt": system_prompt, | |
| }, | |
| outputs={ | |
| "messages_json": gr.Textbox( | |
| label="Conversation payload (string)", | |
| lines=6, | |
| ) | |
| }, | |
| ) | |
| # ============================================================================= | |
| # ZEPHYR CHAT NODE (FnNode, NOT InferenceNode) | |
| # ============================================================================= | |
| # EN: | |
| # Zephyr-7B on Hugging Face ONLY supports "conversational" task. | |
| # daggr.InferenceNode currently targets text-generation style APIs. | |
| # | |
| # Therefore: | |
| # - We must use huggingface_hub.InferenceClient | |
| # - Wrapped in a FnNode (custom Python call) | |
| # | |
| # FR: | |
| # InferenceNode n’est PAS adapté ici. | |
| # FnNode est le bon choix pour appeler client.chat.completions.create(). | |
| # ============================================================================= | |
| def zephyr_chat(messages_json: str) -> str: | |
| token = os.environ.get("HF_TOKEN") | |
| if not token: | |
| raise RuntimeError("HF_TOKEN missing (Space secret).") | |
| messages = json.loads(messages_json) | |
| client = InferenceClient(api_key=token) | |
| completion = client.chat.completions.create( | |
| model="HuggingFaceH4/zephyr-7b-beta:featherless-ai", | |
| messages=messages, | |
| ) | |
| out = completion.choices[0].message.content or "" | |
| print("LLM OUT head:", repr(out[:200])) | |
| return out | |
| zephyr_node = FnNode( | |
| fn=zephyr_chat, | |
| inputs={ | |
| "messages_json": messages_node.messages_json, | |
| }, | |
| outputs={ | |
| "raw": gr.Textbox( | |
| label="Zephyr raw output", | |
| lines=14, | |
| ) | |
| }, | |
| ) | |
| # ============================================================================= | |
| # PARSE ZEPHYR OUTPUT INTO FIELDS | |
| # ============================================================================= | |
| # EN: | |
| # Extract Title / System prompt / Example input from the LLM output. | |
| # Keeps raw text as fallback for debugging. | |
| # | |
| # FR: | |
| # Parsing volontairement simple et robuste, | |
| # avec fallback si le format n’est pas respecté. | |
| # ============================================================================= | |
| def parse_zephyr_output(text: str): | |
| raw = (text or "").strip() | |
| def grab(label): | |
| pattern = rf"(?is)\b{label}\s*:\s*(.*?)(?=\n\s*(Title|System prompt|Example input)\s*:|\Z)" | |
| m = re.search(pattern, raw) | |
| return m.group(1).strip() if m else "" | |
| title = grab("Title") | |
| system = grab("System prompt") | |
| example = grab("Example input") | |
| # ✅ NEW: if model inlines "Example input:" inside the system prompt field, cut it off. | |
| if system: | |
| system = re.split(r"(?is)\bExample input\s*:\s*", system, maxsplit=1)[0].strip() | |
| if not (title or system or example): | |
| system = raw # fallback debug | |
| return title, system, example | |
| parsed = FnNode( | |
| fn=parse_zephyr_output, | |
| inputs={"text": zephyr_node.raw}, | |
| outputs={ | |
| "title": gr.Textbox(label="Parsed title", lines=2), | |
| "system": gr.Textbox(label="Parsed system prompt", lines=10), | |
| "example": gr.Textbox(label="Parsed example input", lines=3), | |
| }, | |
| ) | |
| # ============================================================================= | |
| # GRAPH | |
| # ============================================================================= | |
| # EN: | |
| # Explicit, linear, debuggable DAG. | |
| # Every intermediate value is visible in the UI. | |
| # | |
| # FR: | |
| # Graphe volontairement verbeux pour démonstration, | |
| # debug et pédagogie autour de daggr. | |
| # ============================================================================= | |
| graph = Graph( | |
| name="Image → System prompt idea (daggr)", | |
| nodes=[ | |
| image_input_node, | |
| kosmos_caption, | |
| messages_node, | |
| zephyr_node, | |
| parsed, | |
| ], | |
| persist_key=False, | |
| ) | |
| if __name__ == "__main__": | |
| graph.launch() |