File size: 10,774 Bytes
9e93462
7e711f2
 
 
820a28c
9e93462
7e711f2
 
 
 
 
 
3b42a60
 
 
 
 
 
 
 
820a28c
7e711f2
 
 
 
 
 
 
 
 
 
820a28c
7e711f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820a28c
 
 
3b42a60
 
 
 
 
820a28c
3b42a60
 
820a28c
3b42a60
820a28c
3b42a60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e711f2
9e93462
a8500b3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import gradio as gr
from gradio import ChatMessage
from langchain_core.messages import BaseMessage, HumanMessage
import time
from ml_pipeline_workflow import ml_app as workflow


def format_namespace(namespace):
    """λ„€μž„μŠ€νŽ˜μ΄μŠ€μ—μ„œ κ·Έλž˜ν”„ 이름 μΆ”μΆœ"""
    return namespace[-1].split(":")[0] if len(namespace) > 0 else "root graph"


def handle_like(data: gr.LikeData):
    """Like/dislike 이벀트 ν•Έλ“€λŸ¬"""
    if data.liked:
        print(f"πŸ‘ Upvoted: {data.value}")
    else:
        print(f"πŸ‘Ž Downvoted: {data.value}")


def generate_response(message, history):
    """ML Pipeline μ›Œν¬ν”Œλ‘œμš°μ— λŒ€ν•œ nested thought 응닡 생성"""
    inputs = {
        "messages": [HumanMessage(content=message)],
    }

    response = []
    message_id_counter = [0]  # Use list for mutable counter
    agent_header_ids = {}  # Map agent names to their header message IDs
    current_namespace = None  # Track current namespace to detect changes

    for namespace, chunk in workflow.stream(
        inputs,
        stream_mode="updates",
        subgraphs=True
    ):
        start_time = time.time()

        for node_name, node_chunk in chunk.items():
            formatted_namespace = format_namespace(namespace)

            # Complete previous non-header message FIRST (before namespace change check)
            if len(response) > 0 and response[-1].metadata.get("status") == "pending":
                # Only complete if it's not an agent header
                if response[-1].metadata.get("id") not in agent_header_ids.values():
                    prev_msg = response[-1]
                    prev_msg.metadata["status"] = "done"
                    if "start_time" in prev_msg.metadata:
                        prev_msg.metadata["duration"] = time.time() - prev_msg.metadata["start_time"]

                    # Update parent agent header to show completion
                    prev_parent_id = prev_msg.metadata.get("parent_id")
                    if prev_parent_id:
                        prev_title = prev_msg.metadata.get("title", "unknown")
                        for msg in response:
                            if msg.metadata.get("id") == prev_parent_id and msg.metadata.get("is_agent_header"):
                                # Add completion log
                                if not msg.content or "πŸ”„" in msg.content:
                                    msg.content = f"βœ“ {prev_title}"
                                else:
                                    msg.content += f"\nβœ“ {prev_title}"
                                break

                    yield response  # Yield to show completion

            # If namespace changed, complete the previous agent header
            if current_namespace and current_namespace != formatted_namespace:
                if current_namespace in agent_header_ids:
                    header_id = agent_header_ids[current_namespace]
                    # Find and complete the agent header
                    for msg in response:
                        if msg.metadata.get("id") == header_id and msg.metadata.get("status") == "pending":
                            msg.metadata["status"] = "done"
                            if "start_time" in msg.metadata:
                                msg.metadata["duration"] = time.time() - msg.metadata["start_time"]
                            yield response
                            break

            current_namespace = formatted_namespace

            # If this is a subgraph node, ensure parent header exists
            parent_id = None
            if formatted_namespace != "root graph":
                # This node is inside an agent subgraph
                if formatted_namespace not in agent_header_ids:
                    # Create agent header message first
                    message_id_counter[0] += 1
                    header_id = message_id_counter[0]
                    agent_header_ids[formatted_namespace] = header_id

                    agent_emojis = {
                        "supervisor": "πŸ‘”",
                        "data_extraction_expert": "πŸ“Š",
                        "pretraining_expert": "πŸ”¬",
                        "finetuning_expert": "🎯",
                        "evaluation_expert": "πŸ“ˆ"
                    }
                    emoji = agent_emojis.get(formatted_namespace, "🧠")

                    header_message = ChatMessage(
                        content="",
                        metadata={
                            "title": f"{emoji} {formatted_namespace}",
                            "id": header_id,
                            "status": "pending",
                            "start_time": time.time(),
                            "is_agent_header": True  # Mark as agent header
                        }
                    )
                    response.append(header_message)
                    yield response

                parent_id = agent_header_ids[formatted_namespace]

            # Create current node message
            message_id_counter[0] += 1
            current_id = message_id_counter[0]

            # Create title
            agent_names = ["data_extraction_expert", "pretraining_expert",
                          "finetuning_expert", "evaluation_expert"]
            if formatted_namespace == "root graph":
                if node_name == "supervisor":
                    title = f"πŸ‘” {node_name}"
                elif node_name in agent_names:
                    # Skip - we'll handle these when we see their subgraph
                    continue
                else:
                    title = f"🧠 {node_name}"
            else:
                # This is inside an agent
                title = f"βš™οΈ {node_name}"

            # Create node message
            node_message = ChatMessage(
                content="",
                metadata={
                    "title": title,
                    "id": current_id,
                    "status": "pending",
                    "start_time": time.time()
                }
            )

            if parent_id:
                node_message.metadata["parent_id"] = parent_id

            response.append(node_message)
            yield response

            # Process node content
            out_str = []
            if isinstance(node_chunk, dict):
                for k, v in node_chunk.items():
                    if isinstance(v, BaseMessage):
                        out_str.append(v.pretty_repr())
                    elif isinstance(v, list):
                        for list_item in v:
                            if isinstance(list_item, BaseMessage):
                                out_str.append(list_item.pretty_repr())
                            else:
                                out_str.append(str(list_item))
                    else:
                        out_str.append(f"{k}:\n{v}")

                response[-1].content = "\n".join(out_str)

                # Update parent agent header with current activity
                if parent_id:
                    for msg in response:
                        if msg.metadata.get("id") == parent_id and msg.metadata.get("is_agent_header"):
                            # Update agent header content with summary of current activity
                            activity_summary = f"πŸ”„ Working on: {node_name}"
                            if len(out_str) > 0:
                                # Add a brief preview of the content
                                preview = out_str[0][:100].replace('\n', ' ')
                                activity_summary += f"\nπŸ“ {preview}..."
                            msg.content = activity_summary
                            break

                yield response

            # Keep the node as pending - it will be completed when the next node starts
            # Just yield to show current state
            yield response

    # Complete any remaining pending messages (both agent headers and sub nodes)
    for msg in response:
        if msg.metadata.get("status") == "pending":
            msg.metadata["status"] = "done"
            if "start_time" in msg.metadata:
                msg.metadata["duration"] = time.time() - msg.metadata["start_time"]

    # Add final response (without metadata so it displays as regular message)
    if node_chunk and isinstance(node_chunk, dict) and 'messages' in node_chunk:
        final_message = node_chunk['messages'][-1].content
        response.append(ChatMessage(content=final_message))
        yield response


# Create interface with Blocks for like functionality
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        type="messages",
        show_copy_button=True,
        show_copy_all_button=True,
        show_share_button=True
    )
    chatbot.like(handle_like, None, None)

    gr.ChatInterface(
        generate_response,
        type="messages",
        chatbot=chatbot,
        title="πŸ”¬ ML Pipeline Automation",
        description="데이터 μΆ”μΆœ, μ‚¬μ „ν•™μŠ΅, νŒŒμΈνŠœλ‹, 평가 단계λ₯Ό κ±°μΉ˜λŠ” μ™„μ „ν•œ ML νŒŒμ΄ν”„λΌμΈμž…λ‹ˆλ‹€. 각 μ „λ¬Έκ°€ μ—μ΄μ „νŠΈμ˜ μž‘μ—… 과정을 λ‹¨κ³„λ³„λ‘œ 확인할 수 μžˆμŠ΅λ‹ˆλ‹€.",
        examples=[
            # 전체 νŒŒμ΄ν”„λΌμΈ 예제
            "user_events ν…Œμ΄λΈ”μ—μ„œ 2024-01-01λΆ€ν„° 2024-12-31κΉŒμ§€ 이벀트 데이터λ₯Ό μΆ”μΆœν•˜κ³ , λͺ¨λΈμ„ μ‚¬μ „ν•™μŠ΅ν•œ ν›„ 5개 클래슀 λΆ„λ₯˜ λͺ¨λΈμ„ ν•™μŠ΅ν•˜κ³  ν‰κ°€ν•΄μ€˜",
            "transaction_data ν…Œμ΄λΈ”μ—μ„œ 졜근 6κ°œμ›” λ°μ΄ν„°λ‘œ μ΄μƒκ±°λž˜ 탐지 λͺ¨λΈμ„ μ²˜μŒλΆ€ν„° λκΉŒμ§€ λ§Œλ“€μ–΄μ€˜",

            # μ‘°ν•© 예제 (2-3단계)
            "customer_logs ν…Œμ΄λΈ”μ—μ„œ 2024λ…„ 데이터λ₯Ό μΆ”μΆœν•˜κ³  GPT-2 λͺ¨λΈ μ‚¬μ „ν•™μŠ΅κΉŒμ§€λ§Œ ν•΄μ€˜",
            "μ€€λΉ„λœ 데이터(/data/corpus.txt)둜 λͺ¨λΈμ„ μ‚¬μ „ν•™μŠ΅ν•˜κ³ , κ·Έ λͺ¨λΈλ‘œ 감성뢄석 3클래슀 λΆ„λ₯˜ νŒŒμΈνŠœλ‹κΉŒμ§€ μ§„ν–‰ν•΄μ€˜",
            "μ‚¬μ „ν•™μŠ΅λœ λͺ¨λΈ(/models/pretrained/gpt2)둜 spam detection 2클래슀 λΆ„λ₯˜ λͺ¨λΈ ν•™μŠ΅ν•˜κ³  μ„±λŠ₯ ν‰κ°€ν•΄μ€˜",
            "product_reviews ν…Œμ΄λΈ”μ—μ„œ 2024λ…„ 리뷰 데이터 μΆ”μΆœν•˜κ³  λ°”λ‘œ 별점 예츑 5클래슀 λΆ„λ₯˜ λͺ¨λΈ νŒŒμΈνŠœλ‹ν•΄μ€˜",

            # 단일 κΈ°λŠ₯ 예제
            "μ€€λΉ„λœ 데이터(/data/pretraining/corpus.txt)둜 GPT-2 λͺ¨λΈμ„ 3 에포크 μ‚¬μ „ν•™μŠ΅λ§Œ ν•΄μ€˜",
            "μ‚¬μ „ν•™μŠ΅λœ λͺ¨λΈ(/models/pretrained/checkpoint)으둜 감성뢄석 3클래슀 λΆ„λ₯˜ νŒŒμΈνŠœλ‹λ§Œ μ§„ν–‰ν•΄μ€˜",
            "ν•™μŠ΅λœ λͺ¨λΈ(/models/finetuned/model)을 ν…ŒμŠ€νŠΈ 데이터(/data/test.jsonl)둜 ν‰κ°€λ§Œ ν•΄μ€˜"
        ],
        cache_examples=False
    )

if __name__ == "__main__":
    demo.launch(ssr_mode=False)