Spaces:
Sleeping
Sleeping
File size: 10,774 Bytes
9e93462 7e711f2 820a28c 9e93462 7e711f2 3b42a60 820a28c 7e711f2 820a28c 7e711f2 820a28c 3b42a60 820a28c 3b42a60 820a28c 3b42a60 820a28c 3b42a60 7e711f2 9e93462 a8500b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
import gradio as gr
from gradio import ChatMessage
from langchain_core.messages import BaseMessage, HumanMessage
import time
from ml_pipeline_workflow import ml_app as workflow
def format_namespace(namespace):
"""λ€μμ€νμ΄μ€μμ κ·Έλν μ΄λ¦ μΆμΆ"""
return namespace[-1].split(":")[0] if len(namespace) > 0 else "root graph"
def handle_like(data: gr.LikeData):
"""Like/dislike μ΄λ²€νΈ νΈλ€λ¬"""
if data.liked:
print(f"π Upvoted: {data.value}")
else:
print(f"π Downvoted: {data.value}")
def generate_response(message, history):
"""ML Pipeline μν¬νλ‘μ°μ λν nested thought μλ΅ μμ±"""
inputs = {
"messages": [HumanMessage(content=message)],
}
response = []
message_id_counter = [0] # Use list for mutable counter
agent_header_ids = {} # Map agent names to their header message IDs
current_namespace = None # Track current namespace to detect changes
for namespace, chunk in workflow.stream(
inputs,
stream_mode="updates",
subgraphs=True
):
start_time = time.time()
for node_name, node_chunk in chunk.items():
formatted_namespace = format_namespace(namespace)
# Complete previous non-header message FIRST (before namespace change check)
if len(response) > 0 and response[-1].metadata.get("status") == "pending":
# Only complete if it's not an agent header
if response[-1].metadata.get("id") not in agent_header_ids.values():
prev_msg = response[-1]
prev_msg.metadata["status"] = "done"
if "start_time" in prev_msg.metadata:
prev_msg.metadata["duration"] = time.time() - prev_msg.metadata["start_time"]
# Update parent agent header to show completion
prev_parent_id = prev_msg.metadata.get("parent_id")
if prev_parent_id:
prev_title = prev_msg.metadata.get("title", "unknown")
for msg in response:
if msg.metadata.get("id") == prev_parent_id and msg.metadata.get("is_agent_header"):
# Add completion log
if not msg.content or "π" in msg.content:
msg.content = f"β {prev_title}"
else:
msg.content += f"\nβ {prev_title}"
break
yield response # Yield to show completion
# If namespace changed, complete the previous agent header
if current_namespace and current_namespace != formatted_namespace:
if current_namespace in agent_header_ids:
header_id = agent_header_ids[current_namespace]
# Find and complete the agent header
for msg in response:
if msg.metadata.get("id") == header_id and msg.metadata.get("status") == "pending":
msg.metadata["status"] = "done"
if "start_time" in msg.metadata:
msg.metadata["duration"] = time.time() - msg.metadata["start_time"]
yield response
break
current_namespace = formatted_namespace
# If this is a subgraph node, ensure parent header exists
parent_id = None
if formatted_namespace != "root graph":
# This node is inside an agent subgraph
if formatted_namespace not in agent_header_ids:
# Create agent header message first
message_id_counter[0] += 1
header_id = message_id_counter[0]
agent_header_ids[formatted_namespace] = header_id
agent_emojis = {
"supervisor": "π",
"data_extraction_expert": "π",
"pretraining_expert": "π¬",
"finetuning_expert": "π―",
"evaluation_expert": "π"
}
emoji = agent_emojis.get(formatted_namespace, "π§ ")
header_message = ChatMessage(
content="",
metadata={
"title": f"{emoji} {formatted_namespace}",
"id": header_id,
"status": "pending",
"start_time": time.time(),
"is_agent_header": True # Mark as agent header
}
)
response.append(header_message)
yield response
parent_id = agent_header_ids[formatted_namespace]
# Create current node message
message_id_counter[0] += 1
current_id = message_id_counter[0]
# Create title
agent_names = ["data_extraction_expert", "pretraining_expert",
"finetuning_expert", "evaluation_expert"]
if formatted_namespace == "root graph":
if node_name == "supervisor":
title = f"π {node_name}"
elif node_name in agent_names:
# Skip - we'll handle these when we see their subgraph
continue
else:
title = f"π§ {node_name}"
else:
# This is inside an agent
title = f"βοΈ {node_name}"
# Create node message
node_message = ChatMessage(
content="",
metadata={
"title": title,
"id": current_id,
"status": "pending",
"start_time": time.time()
}
)
if parent_id:
node_message.metadata["parent_id"] = parent_id
response.append(node_message)
yield response
# Process node content
out_str = []
if isinstance(node_chunk, dict):
for k, v in node_chunk.items():
if isinstance(v, BaseMessage):
out_str.append(v.pretty_repr())
elif isinstance(v, list):
for list_item in v:
if isinstance(list_item, BaseMessage):
out_str.append(list_item.pretty_repr())
else:
out_str.append(str(list_item))
else:
out_str.append(f"{k}:\n{v}")
response[-1].content = "\n".join(out_str)
# Update parent agent header with current activity
if parent_id:
for msg in response:
if msg.metadata.get("id") == parent_id and msg.metadata.get("is_agent_header"):
# Update agent header content with summary of current activity
activity_summary = f"π Working on: {node_name}"
if len(out_str) > 0:
# Add a brief preview of the content
preview = out_str[0][:100].replace('\n', ' ')
activity_summary += f"\nπ {preview}..."
msg.content = activity_summary
break
yield response
# Keep the node as pending - it will be completed when the next node starts
# Just yield to show current state
yield response
# Complete any remaining pending messages (both agent headers and sub nodes)
for msg in response:
if msg.metadata.get("status") == "pending":
msg.metadata["status"] = "done"
if "start_time" in msg.metadata:
msg.metadata["duration"] = time.time() - msg.metadata["start_time"]
# Add final response (without metadata so it displays as regular message)
if node_chunk and isinstance(node_chunk, dict) and 'messages' in node_chunk:
final_message = node_chunk['messages'][-1].content
response.append(ChatMessage(content=final_message))
yield response
# Create interface with Blocks for like functionality
with gr.Blocks() as demo:
chatbot = gr.Chatbot(
type="messages",
show_copy_button=True,
show_copy_all_button=True,
show_share_button=True
)
chatbot.like(handle_like, None, None)
gr.ChatInterface(
generate_response,
type="messages",
chatbot=chatbot,
title="π¬ ML Pipeline Automation",
description="λ°μ΄ν° μΆμΆ, μ¬μ νμ΅, νμΈνλ, νκ° λ¨κ³λ₯Ό κ±°μΉλ μμ ν ML νμ΄νλΌμΈμ
λλ€. κ° μ λ¬Έκ° μμ΄μ νΈμ μμ
κ³Όμ μ λ¨κ³λ³λ‘ νμΈν μ μμ΅λλ€.",
examples=[
# μ 체 νμ΄νλΌμΈ μμ
"user_events ν
μ΄λΈμμ 2024-01-01λΆν° 2024-12-31κΉμ§ μ΄λ²€νΈ λ°μ΄ν°λ₯Ό μΆμΆνκ³ , λͺ¨λΈμ μ¬μ νμ΅ν ν 5κ° ν΄λμ€ λΆλ₯ λͺ¨λΈμ νμ΅νκ³ νκ°ν΄μ€",
"transaction_data ν
μ΄λΈμμ μ΅κ·Ό 6κ°μ λ°μ΄ν°λ‘ μ΄μκ±°λ νμ§ λͺ¨λΈμ μ²μλΆν° λκΉμ§ λ§λ€μ΄μ€",
# μ‘°ν© μμ (2-3λ¨κ³)
"customer_logs ν
μ΄λΈμμ 2024λ
λ°μ΄ν°λ₯Ό μΆμΆνκ³ GPT-2 λͺ¨λΈ μ¬μ νμ΅κΉμ§λ§ ν΄μ€",
"μ€λΉλ λ°μ΄ν°(/data/corpus.txt)λ‘ λͺ¨λΈμ μ¬μ νμ΅νκ³ , κ·Έ λͺ¨λΈλ‘ κ°μ±λΆμ 3ν΄λμ€ λΆλ₯ νμΈνλκΉμ§ μ§νν΄μ€",
"μ¬μ νμ΅λ λͺ¨λΈ(/models/pretrained/gpt2)λ‘ spam detection 2ν΄λμ€ λΆλ₯ λͺ¨λΈ νμ΅νκ³ μ±λ₯ νκ°ν΄μ€",
"product_reviews ν
μ΄λΈμμ 2024λ
리뷰 λ°μ΄ν° μΆμΆνκ³ λ°λ‘ λ³μ μμΈ‘ 5ν΄λμ€ λΆλ₯ λͺ¨λΈ νμΈνλν΄μ€",
# λ¨μΌ κΈ°λ₯ μμ
"μ€λΉλ λ°μ΄ν°(/data/pretraining/corpus.txt)λ‘ GPT-2 λͺ¨λΈμ 3 μν¬ν¬ μ¬μ νμ΅λ§ ν΄μ€",
"μ¬μ νμ΅λ λͺ¨λΈ(/models/pretrained/checkpoint)μΌλ‘ κ°μ±λΆμ 3ν΄λμ€ λΆλ₯ νμΈνλλ§ μ§νν΄μ€",
"νμ΅λ λͺ¨λΈ(/models/finetuned/model)μ ν
μ€νΈ λ°μ΄ν°(/data/test.jsonl)λ‘ νκ°λ§ ν΄μ€"
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch(ssr_mode=False)
|