fffiloni's picture
cosmetic param order change
400e335 verified
import os
import re
import json
import gradio as gr
from huggingface_hub import InferenceClient
from daggr import GradioNode, FnNode, Graph
print("BOOT: image → kosmos → zephyr (daggr, clean, commented)")
# =============================================================================
# DEFAULT SYSTEM PROMPT
# =============================================================================
# EN:
# This is the default system prompt used to instruct the LLM.
# It describes the task (generate a chatbot persona) and enforces
# a strict output format (Title / System prompt / Example input).
#
# FR:
# Prompt système par défaut envoyé au LLM.
# Il décrit précisément la tâche et impose un format de sortie strict.
# =============================================================================
AGENT_MAKER_SYS = """
You are an AI whose job is to help users design a chatbot.
The chatbot’s personality must be inspired by the character and atmosphere
described by the user (based on an image description provided separately).
You must:
- Respond succinctly in a friendly tone
- Generate exactly three things:
1) A catchy chatbot title
2) A system prompt defining the chatbot’s personality and behavior
3) A very short example of what a *future user* might say to this chatbot
Important rules:
- The system prompt must NOT mention any image or visual input
- The example input must represent a realistic first message
that an end-user would send when chatting with this bot
- STOP immediately after the example input
- Follow EXACTLY this format (including labels and line breaks):
"Sure, I'd be happy to help you build a bot! I'm generating a title, a system prompt,
and an example user message. How do they sound?
Title: ...
System prompt: ...
Example input: ..."
""".strip()
# =============================================================================
# NODE 0 — IMAGE INPUT (PASSTHROUGH)
# =============================================================================
# EN:
# Daggr requires explicit nodes.
# This FnNode exists only to expose the image upload as a visible node
# and to make data flow explicit in the graph.
#
# FR:
# Node pure "pass-through" pour exposer l’upload d’image dans le graphe.
# Aucun traitement ici, juste de la lisibilité et du debugging.
# =============================================================================
def passthrough_image(image):
print("IMAGE INPUT:", image)
return image
image_input_node = FnNode(
fn=passthrough_image,
inputs={
"image": gr.Image(
label="Upload image",
type="filepath",
)
},
outputs={
"image": gr.Image(label="Input image"),
},
)
# =============================================================================
# KOSMOS POSTPROCESS — CLEAN CAPTION
# =============================================================================
# EN:
# Kosmos-2 returns:
# 0) image (FileData dict)
# 1) highlighted_text (list of token dicts)
# 2) entities (string)
#
# Daggr passes all outputs positionally to postprocess.
# We collapse everything into a single clean caption string.
#
# FR:
# Le postprocess reconstruit une phrase lisible à partir des tokens
# et supprime le préfixe "Describe this image in detail:" si présent.
# =============================================================================
def kosmos_postprocess(image_out, highlighted_text, *rest):
if not isinstance(highlighted_text, list):
return ""
caption = "".join(
item.get("token", "")
for item in highlighted_text
if isinstance(item, dict)
).strip()
# Remove Kosmos "Detailed" prefix if present
caption = re.sub(
r"^\s*Describe\s+this\s+image\s+in\s+details?\s*:\s*",
"",
caption,
flags=re.IGNORECASE,
).strip()
print("CAPTION:", repr(caption))
return caption
# =============================================================================
# KOSMOS NODE
# =============================================================================
# EN:
# Calls the Kosmos-2 Gradio Space via API.
# Output is reduced to ONE value: a clean caption string.
#
# FR:
# Appel du Space Kosmos-2 via GradioNode.
# La sortie est volontairement simplifiée à une seule string.
# =============================================================================
desc_type = gr.Radio(
["Brief", "Detailed"],
label="Description Type",
value="Detailed",
)
kosmos_caption = GradioNode(
space_or_url="fffiloni/Kosmos-2-API",
api_name="/generate_predictions",
inputs={
"image_input": image_input_node.image,
"text_input": desc_type,
},
postprocess=kosmos_postprocess,
outputs={
"caption": gr.Textbox(label="Image caption", lines=3),
},
)
# =============================================================================
# BUILD CONVERSATION PAYLOAD (STRING)
# =============================================================================
# EN:
# Zephyr (via InferenceClient) expects a list[dict] messages.
# Daggr UI does NOT handle gr.JSON reliably yet.
# We therefore pass a JSON STRING and parse it inside the LLM node.
#
# FR:
# On transporte la conversation sous forme de string JSON,
# pour éviter les problèmes d’UI liés à gr.JSON dans daggr.
# =============================================================================
def build_messages_json(caption: str, system_prompt: str) -> str:
messages = [
{"role": "system", "content": (system_prompt or "").strip()},
{"role": "user", "content": (caption or "").strip()},
]
payload = json.dumps(messages, ensure_ascii=False)
print("MESSAGES_JSON head:", repr(payload[:200]))
return payload
system_prompt = gr.Textbox(
label="System prompt",
value=AGENT_MAKER_SYS,
lines=14,
)
messages_node = FnNode(
fn=build_messages_json,
inputs={
"caption": kosmos_caption.caption,
"system_prompt": system_prompt,
},
outputs={
"messages_json": gr.Textbox(
label="Conversation payload (string)",
lines=6,
)
},
)
# =============================================================================
# ZEPHYR CHAT NODE (FnNode, NOT InferenceNode)
# =============================================================================
# EN:
# Zephyr-7B on Hugging Face ONLY supports "conversational" task.
# daggr.InferenceNode currently targets text-generation style APIs.
#
# Therefore:
# - We must use huggingface_hub.InferenceClient
# - Wrapped in a FnNode (custom Python call)
#
# FR:
# InferenceNode n’est PAS adapté ici.
# FnNode est le bon choix pour appeler client.chat.completions.create().
# =============================================================================
def zephyr_chat(messages_json: str) -> str:
token = os.environ.get("HF_TOKEN")
if not token:
raise RuntimeError("HF_TOKEN missing (Space secret).")
messages = json.loads(messages_json)
client = InferenceClient(api_key=token)
completion = client.chat.completions.create(
model="HuggingFaceH4/zephyr-7b-beta:featherless-ai",
messages=messages,
)
out = completion.choices[0].message.content or ""
print("LLM OUT head:", repr(out[:200]))
return out
zephyr_node = FnNode(
fn=zephyr_chat,
inputs={
"messages_json": messages_node.messages_json,
},
outputs={
"raw": gr.Textbox(
label="Zephyr raw output",
lines=14,
)
},
)
# =============================================================================
# PARSE ZEPHYR OUTPUT INTO FIELDS
# =============================================================================
# EN:
# Extract Title / System prompt / Example input from the LLM output.
# Keeps raw text as fallback for debugging.
#
# FR:
# Parsing volontairement simple et robuste,
# avec fallback si le format n’est pas respecté.
# =============================================================================
def parse_zephyr_output(text: str):
raw = (text or "").strip()
def grab(label):
pattern = rf"(?is)\b{label}\s*:\s*(.*?)(?=\n\s*(Title|System prompt|Example input)\s*:|\Z)"
m = re.search(pattern, raw)
return m.group(1).strip() if m else ""
title = grab("Title")
system = grab("System prompt")
example = grab("Example input")
# ✅ NEW: if model inlines "Example input:" inside the system prompt field, cut it off.
if system:
system = re.split(r"(?is)\bExample input\s*:\s*", system, maxsplit=1)[0].strip()
if not (title or system or example):
system = raw # fallback debug
return title, system, example
parsed = FnNode(
fn=parse_zephyr_output,
inputs={"text": zephyr_node.raw},
outputs={
"title": gr.Textbox(label="Parsed title", lines=2),
"system": gr.Textbox(label="Parsed system prompt", lines=10),
"example": gr.Textbox(label="Parsed example input", lines=3),
},
)
# =============================================================================
# GRAPH
# =============================================================================
# EN:
# Explicit, linear, debuggable DAG.
# Every intermediate value is visible in the UI.
#
# FR:
# Graphe volontairement verbeux pour démonstration,
# debug et pédagogie autour de daggr.
# =============================================================================
graph = Graph(
name="Image → System prompt idea (daggr)",
nodes=[
image_input_node,
kosmos_caption,
messages_node,
zephyr_node,
parsed,
],
persist_key=False,
)
if __name__ == "__main__":
graph.launch()