mathysgrapotte commited on
Commit
a25664c
·
1 Parent(s): a40bde5

adding smollagent logs to the output

Browse files
Files changed (1) hide show
  1. main.py +224 -121
main.py CHANGED
@@ -5,6 +5,81 @@ from agents.query_ontology_db import agent
5
  import yaml
6
  import time
7
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def extract_format_terms_from_result(result):
10
  """Extract EDAM format terms from agent result string"""
@@ -83,114 +158,108 @@ def format_ontology_results_html(results, meta_yml):
83
 
84
  return html_content
85
 
86
- def run_multi_agent(module_name, progress=gr.Progress()):
87
- """Enhanced function with progress tracking"""
88
-
89
- progress(0, desc="🦙 Llama is waking up...")
90
- time.sleep(0.5)
91
 
92
- ### RETRIEVE INFORMATION FROM META.YML ###
93
- progress(0.1, desc="🔍 Fetching meta.yml file...")
94
- meta_yml = get_meta_yml_file(module_name=module_name)
95
- time.sleep(0.5)
 
 
96
 
97
- progress(0.2, desc="🦙 Llama is analyzing the module structure...")
98
- # module_info = extract_module_name_description(meta_file=meta_yml)
99
- # module_tools = extract_tools_from_meta_json(meta_file=meta_yml)
100
- time.sleep(0.5)
101
-
102
- # ### FIND THE MODULE TOOL ###
103
- progress(0.3, desc="🧠 Llama is thinking about the best tool...")
104
- # if len(module_info) == 1:
105
- # module_yaml_name = module_info[0]
106
- # module_description = module_info[1]
107
- # else:
108
- # # TODO: agent to choose the right tool
109
- # first_prompt = f"""
110
- # The module {module_info[0]} with desciption '{module_info[1]}' contains a series of tools.
111
- # Find the tool that best describes the module. Return only one tool. Return the name.
112
- # This is the list of tools:
113
- # {"\n\t".join(f"{tool[0]}: {tool[1]}" for tool in module_tools)}
114
- # """
115
- # module_yaml_name = "fastqc" # TODO: this would be the answer of the first agent
116
- # module_description = "my description" # TODO: this would be the answer of the first agent
117
-
118
- # ### EXTRACT INFO FROM META.YML ###
119
- progress(0.4, desc="📊 Extracting metadata information...")
120
- # meta_info = extract_information_from_meta_json(meta_file=meta_yml, tool_name=module_yaml_name)
121
- time.sleep(0.5)
122
-
123
- # ### FETCH ONOTOLOGIES FROM BIO.TOOLS ###
124
- progress(0.5, desc="🔬 Searching bio.tools database...")
125
- # if meta_info["bio_tools_id"] == "":
126
- # bio_tools_list = get_biotools_response(module_yaml_name)
127
-
128
- # # TODO: agent to select the best match from all possible bio.tools entries
129
- # # The answer should be the entry ID
130
- # second_prompt = "" # TODO: update
131
- # bio_tools_tool = "FastQC" # TODO: this should be the answer form the second agent
132
-
133
- # ontology = get_biotools_ontology(module_yaml_name, bio_tools_tool)
134
-
135
- # ### CLASSIFY ALL INPUT AND OUTPUT ONTOLOGIES INTO THE APPROPRIATE CHANNELS ###
136
-
137
- # # TODO !!!
138
- # # Create an agent which classifies the ontologeis into the right i/o
139
- # # From biotols we get a list of ontologies for inputs and a list of ontologies for outputs
140
- # # but in most nf-core modules we will have finles separated into different channels
141
- # # For example bam, bai, sam...
142
- # # The agent should recieve the i/o from the module, the ontologies found in bio.tools, and assigne the correct ones to each channel.
143
-
144
- # ### FETCH ONTOLOGY TERMS FROM EDAM DATABASE ###
145
- progress(0.6, desc="🦙 Llama is consulting the EDAM database...")
146
  results = {"input": {}, "output": {}}
147
-
148
- total_inputs = len(meta_yml.get("input", []))
149
- current_input = 0
150
 
151
- for input_channel in meta_yml["input"]:
152
- current_input += 1
153
- progress(0.6 + (current_input / total_inputs) * 0.3,
154
- desc=f"🔍 Processing input channel {current_input}/{total_inputs}...")
155
-
156
- for ch_element in input_channel:
157
- for key, value in ch_element.items():
158
- if value["type"] == "file":
159
- progress(0.6 + (current_input / total_inputs) * 0.3,
160
- desc=f"🦙 Llama is analyzing {key}...")
161
- result = agent.run(f"You are presentend with a file format for the input {key}, which is a file and is described by the following description: '{value['description']}', search for the best matches out of possible matches in the edam ontology (formated as format_XXXX), and return the answer (a list of ontology classes) in a final_answer call such as final_answer([format_XXXX, format_XXXX, ...])")
162
- results["input"][key] = result
163
 
164
- # for output_channel in meta_info["outputs"]:
165
- # for ch_element in output_channel:
166
- # for key, value in ch_element.items():
167
- # if value["type"] == "file":
168
- # result = agent.run(f"You are presentend with a file format for the output {key}, which is a file and is described by the following description: '{value['description']}', search for the best matches out of possible matches in the edam ontology (formated as format_XXXX), and return the answer (a list of ontology classes) in a final_answer call such as final_answer([format_XXXX, format_XXXX, ...])")
169
- # results["outputs"][key] = result
170
-
171
- ### FINAL AGENT TO BENCHMARK AND FIND THE COMMONALITIES BETWEEN BIO.TOOLS AND EDAM ###
172
- progress(0.9, desc="🔄 Finalizing ontology mappings...")
173
- # TODO !!!
174
- # Get results from bio.tools and EDAM
175
- # The agent should doublecheck if results are correct (?)
176
- # and return the ones that make more sense
177
- # and remove duplicates (this can be done through a python function?)
 
178
 
179
- ### UPDATE META.YML FILE ADDING ONTOLOGIES AND RETURN THE ANSWER ###
180
- progress(0.95, desc="💾 Generating updated meta.yml...")
181
- # TODO: placeholder
182
- # This is returning the original meta.yml, but it should return the modified one with the ontologies added
183
- with open("tmp_meta.yml", "w") as fh:
184
- yaml.dump(meta_yml, fh)
185
-
186
- progress(1.0, desc="✅ Llama has finished! Meta.yml updated successfully!")
187
- time.sleep(0.5)
188
 
189
  # Format the results into a nice HTML display
190
  formatted_results = format_ontology_results_html(results, meta_yml)
191
 
192
  return formatted_results, "tmp_meta.yml"
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  def run_interface():
195
  """ Function to run the agent with a Gradio interface.
196
  This function sets up the Gradio interface and launches it.
@@ -250,6 +319,39 @@ def run_interface():
250
  min-height: 100vh;
251
  }
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  .main-header {
254
  text-align: center;
255
  padding: 2rem 0;
@@ -612,6 +714,7 @@ def run_interface():
612
  gr.HTML("""
613
  <div class="section-header">
614
  nf-core module
 
615
  """)
616
 
617
  # create the input textbox for the nf-core module name
@@ -629,9 +732,6 @@ def run_interface():
629
  elem_classes="btn-primary",
630
  size="lg"
631
  )
632
-
633
- # Llama status indicator
634
- status_display = gr.HTML(visible=False)
635
 
636
  with gr.Column(scale=1, elem_classes="output-container"):
637
  gr.HTML("""
@@ -650,32 +750,35 @@ def run_interface():
650
  label="download original meta.yml with ontologies",
651
  elem_classes="result-container"
652
  )
653
-
654
- # Progress indicator function
655
- def show_llama_status():
656
- return gr.HTML("""
657
- <div class="llama-loader">
658
- <div class="llama-emoji">🦙</div>
659
- <div class="llama-text">nf-core Llama is working hard!</div>
660
- <div class="llama-subtext">Analyzing ontologies and enhancing your meta.yml...</div>
661
- </div>
662
- """, visible=True)
663
 
664
- def hide_llama_status():
665
- return gr.HTML("", visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
666
 
667
- # set the function to run when the button is clicked
 
 
 
 
 
668
  fetch_btn.click(
669
- fn=show_llama_status,
670
- outputs=status_display
671
  ).then(
672
- fn=run_multi_agent,
673
  inputs=module_input,
674
- outputs=[ontology_output, download_button],
675
- show_progress="full"
676
- ).then(
677
- fn=hide_llama_status,
678
- outputs=status_display
679
  )
680
 
681
  # Footer with nf-core branding
@@ -689,7 +792,7 @@ def run_interface():
689
  </div>
690
  """)
691
 
692
- demo.launch()
693
 
694
  if __name__ == "__main__":
695
  run_interface()
 
5
  import yaml
6
  import time
7
  import re
8
+ import io
9
+ import logging
10
+ import threading
11
+ from contextlib import redirect_stdout, redirect_stderr
12
+ import queue
13
+ import sys
14
+
15
+ # Global log queue for streaming logs to Gradio
16
+ log_queue = queue.Queue()
17
+
18
+ class GradioLogHandler(logging.Handler):
19
+ """Custom logging handler that sends logs to both terminal and Gradio queue"""
20
+
21
+ def __init__(self, log_queue):
22
+ super().__init__()
23
+ self.log_queue = log_queue
24
+ self.terminal_handler = logging.StreamHandler(sys.__stdout__)
25
+ self.terminal_handler.setFormatter(
26
+ logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
27
+ )
28
+
29
+ def emit(self, record):
30
+ # Send to terminal
31
+ self.terminal_handler.emit(record)
32
+
33
+ # Send to Gradio queue
34
+ try:
35
+ log_msg = self.format(record)
36
+ self.log_queue.put(log_msg)
37
+ except Exception:
38
+ pass
39
+
40
+ class QueueWriter:
41
+ """A stream-like object that writes to a queue, to capture stdout."""
42
+ def __init__(self, queue):
43
+ self.queue = queue
44
+ self.ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
45
+
46
+ def write(self, text):
47
+ # Print raw output to terminal to preserve colors
48
+ sys.__stdout__.write(text)
49
+ sys.__stdout__.flush()
50
+
51
+ # Clean ANSI codes for Gradio display
52
+ clean_text = self.ansi_escape.sub('', text)
53
+ if clean_text.strip():
54
+ self.queue.put(clean_text.rstrip())
55
+
56
+ def flush(self):
57
+ # Also flush stdout
58
+ sys.__stdout__.flush()
59
+
60
+ def setup_logging():
61
+ """Setup logging to capture smolagents logs"""
62
+ # Create custom handler
63
+ gradio_handler = GradioLogHandler(log_queue)
64
+ gradio_handler.setFormatter(
65
+ logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
66
+ )
67
+
68
+ # Configure smolagents logger
69
+ smolagents_logger = logging.getLogger("smolagents")
70
+ smolagents_logger.setLevel(logging.INFO)
71
+ smolagents_logger.addHandler(gradio_handler)
72
+
73
+ # Also capture other relevant loggers that might be used
74
+ for logger_name in ["transformers", "huggingface_hub", "agents"]:
75
+ logger = logging.getLogger(logger_name)
76
+ logger.setLevel(logging.INFO)
77
+ logger.addHandler(gradio_handler)
78
+
79
+ return gradio_handler
80
+
81
+ # Initialize logging setup
82
+ log_handler = setup_logging()
83
 
84
  def extract_format_terms_from_result(result):
85
  """Extract EDAM format terms from agent result string"""
 
158
 
159
  return html_content
160
 
161
+ def run_multi_agent_with_logs(module_name):
162
+ """Enhanced function with progress tracking and live log streaming"""
 
 
 
163
 
164
+ # Clear the log queue before starting
165
+ while not log_queue.empty():
166
+ try:
167
+ log_queue.get_nowait()
168
+ except queue.Empty:
169
+ break
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  results = {"input": {}, "output": {}}
172
+ meta_yml = None
 
 
173
 
174
+ try:
175
+ ### RETRIEVE INFORMATION FROM META.YML ###
176
+ meta_yml = get_meta_yml_file(module_name=module_name)
177
+ time.sleep(0.5)
 
 
 
 
 
 
 
 
178
 
179
+ ### FETCH ONTOLOGY TERMS FROM EDAM DATABASE ###
180
+ total_inputs = len(meta_yml.get("input", []))
181
+ current_input = 0
182
+
183
+ for input_channel in meta_yml["input"]:
184
+ current_input += 1
185
+
186
+ for ch_element in input_channel:
187
+ for key, value in ch_element.items():
188
+ if value["type"] == "file":
189
+ # This is where the agent runs - logs should be captured automatically
190
+ result = agent.run(f"You are presentend with a file format for the input {key}, which is a file and is described by the following description: '{value['description']}', search for the best matches out of possible matches in the edam ontology (formated as format_XXXX), and return the answer (a list of ontology classes) in a final_answer call such as final_answer([format_XXXX, format_XXXX, ...])")
191
+ results["input"][key] = result
192
+
193
+ format_terms = extract_format_terms_from_result(result)
194
 
195
+ ### UPDATE META.YML FILE ADDING ONTOLOGIES AND RETURN THE ANSWER ###
196
+ with open("tmp_meta.yml", "w") as fh:
197
+ yaml.dump(meta_yml, fh)
198
+
199
+ except Exception as e:
200
+ raise e
 
 
 
201
 
202
  # Format the results into a nice HTML display
203
  formatted_results = format_ontology_results_html(results, meta_yml)
204
 
205
  return formatted_results, "tmp_meta.yml"
206
 
207
+ def stream_logs_and_run_agent(module_name):
208
+ """Generator function that streams logs while running the agent"""
209
+
210
+ # Start the agent in a separate thread
211
+ result_container = {"ontology_output": None, "file_output": None, "error": None}
212
+
213
+ def run_agent_thread():
214
+ try:
215
+ queue_writer = QueueWriter(log_queue)
216
+ with redirect_stdout(queue_writer), redirect_stderr(queue_writer):
217
+ ontology_output, file_output = run_multi_agent_with_logs(module_name)
218
+ result_container["ontology_output"] = ontology_output
219
+ result_container["file_output"] = file_output
220
+ except Exception as e:
221
+ # The error will be redirected to the queue via stderr
222
+ result_container["error"] = str(e)
223
+
224
+ # Start the thread
225
+ agent_thread = threading.Thread(target=run_agent_thread)
226
+ agent_thread.start()
227
+
228
+ # Stream logs while the agent is running
229
+ accumulated_logs = ""
230
+
231
+ while agent_thread.is_alive() or not log_queue.empty():
232
+ try:
233
+ # Get log message with a short timeout
234
+ log_msg = log_queue.get(timeout=0.1)
235
+ accumulated_logs += log_msg + "\n"
236
+
237
+ # Yield the updated logs
238
+ yield f"```text\n{accumulated_logs}\n```", None, None
239
+
240
+ except queue.Empty:
241
+ # If no new logs and thread is still alive, yield current state
242
+ if agent_thread.is_alive():
243
+ yield f"```text\n{accumulated_logs}\n```", None, None
244
+ continue
245
+
246
+ # Wait for the thread to complete
247
+ agent_thread.join()
248
+
249
+ # Check for any remaining logs
250
+ while not log_queue.empty():
251
+ try:
252
+ log_msg = log_queue.get_nowait()
253
+ accumulated_logs += log_msg + "\n"
254
+ except queue.Empty:
255
+ break
256
+
257
+ # Return final results
258
+ if result_container["error"]:
259
+ yield f"```text\n{accumulated_logs}\n```", None, None
260
+ else:
261
+ yield f"```text\n{accumulated_logs}\n```", result_container["ontology_output"], result_container["file_output"]
262
+
263
  def run_interface():
264
  """ Function to run the agent with a Gradio interface.
265
  This function sets up the Gradio interface and launches it.
 
319
  min-height: 100vh;
320
  }
321
 
322
+ /* Live logs styling */
323
+ .live-logs {
324
+ background: rgba(33, 37, 41, 0.95) !important;
325
+ border: 2px solid rgba(36, 176, 100, 0.4) !important;
326
+ border-radius: 15px !important;
327
+ color: #e9ecef !important;
328
+ font-family: 'Fira Code', 'Monaco', 'Consolas', monospace !important;
329
+ font-size: 0.9rem !important;
330
+ line-height: 1.4 !important;
331
+ max-height: 400px !important;
332
+ overflow-y: auto !important;
333
+ padding: 1rem !important;
334
+ white-space: pre-wrap !important;
335
+ }
336
+
337
+ .live-logs::-webkit-scrollbar {
338
+ width: 8px;
339
+ }
340
+
341
+ .live-logs::-webkit-scrollbar-track {
342
+ background: rgba(52, 58, 64, 0.5);
343
+ border-radius: 4px;
344
+ }
345
+
346
+ .live-logs::-webkit-scrollbar-thumb {
347
+ background: rgba(36, 176, 100, 0.6);
348
+ border-radius: 4px;
349
+ }
350
+
351
+ .live-logs::-webkit-scrollbar-thumb:hover {
352
+ background: rgba(36, 176, 100, 0.8);
353
+ }
354
+
355
  .main-header {
356
  text-align: center;
357
  padding: 2rem 0;
 
714
  gr.HTML("""
715
  <div class="section-header">
716
  nf-core module
717
+ </div>
718
  """)
719
 
720
  # create the input textbox for the nf-core module name
 
732
  elem_classes="btn-primary",
733
  size="lg"
734
  )
 
 
 
735
 
736
  with gr.Column(scale=1, elem_classes="output-container"):
737
  gr.HTML("""
 
750
  label="download original meta.yml with ontologies",
751
  elem_classes="result-container"
752
  )
 
 
 
 
 
 
 
 
 
 
753
 
754
+ # Live logs section
755
+ with gr.Row():
756
+ with gr.Column(elem_classes="input-container"):
757
+ gr.HTML("""
758
+ <div class="section-header">
759
+ 🦙 live agent logs
760
+ </div>
761
+ """)
762
+
763
+ # Live log display
764
+ live_logs = gr.Markdown(
765
+ "**Logs will appear here when the agent starts working...**",
766
+ elem_classes="live-logs",
767
+ )
768
 
769
+ # Event handling for the streaming logs
770
+ def clear_outputs():
771
+ """Clear all outputs when starting a new analysis"""
772
+ return "", "", None
773
+
774
+ # Set the function to run when the button is clicked
775
  fetch_btn.click(
776
+ fn=clear_outputs,
777
+ outputs=[live_logs, ontology_output, download_button]
778
  ).then(
779
+ fn=stream_logs_and_run_agent,
780
  inputs=module_input,
781
+ outputs=[live_logs, ontology_output, download_button]
 
 
 
 
782
  )
783
 
784
  # Footer with nf-core branding
 
792
  </div>
793
  """)
794
 
795
+ demo.launch(debug=True)
796
 
797
  if __name__ == "__main__":
798
  run_interface()