| |
|
| |
|
| |
|
| | import tkinter as tk
|
| | from tkinter import ttk, filedialog, messagebox, scrolledtext
|
| | import pandas as pd
|
| | import requests
|
| | import json
|
| | import os
|
| | import time
|
| | from typing import Optional, Dict, List
|
| | from datetime import datetime
|
| | import threading
|
| | import sys
|
| |
|
| |
|
| |
|
| | class DifyBasicChat:
|
| | """Dify Basic Chat Function Encapsulation"""
|
| |
|
| | def __init__(self, api_key: str, base_url: str = "http://localhost/v1"):
|
| | self.api_key = api_key
|
| | self.base_url = base_url.rstrip("/")
|
| | self.headers = {
|
| | "Authorization": f"Bearer {api_key}",
|
| | "Content-Type": "application/json"
|
| | }
|
| |
|
| | def send_message(
|
| | self,
|
| | query: str,
|
| | user: str,
|
| | conversation_id: Optional[str] = None,
|
| | response_mode: str = "blocking",
|
| | inputs: dict = None
|
| | ) -> tuple:
|
| | """Send chat message"""
|
| | url = f"{self.base_url}/chat-messages"
|
| | payload = {
|
| | "query": query,
|
| | "user": user,
|
| | "response_mode": response_mode,
|
| | "inputs": inputs or {}
|
| | }
|
| | if conversation_id:
|
| | payload["conversation_id"] = conversation_id
|
| |
|
| | full_response = None
|
| | try:
|
| | if response_mode == "blocking":
|
| | res = requests.post(url, headers=self.headers, json=payload, timeout=120)
|
| | res.raise_for_status()
|
| | full_response = res.json()
|
| | answer = full_response.get("answer", "")
|
| | conv_id = full_response.get("conversation_id")
|
| | return answer, conv_id, full_response
|
| |
|
| | else:
|
| | full_answer = ""
|
| | conv_id = None
|
| | res = requests.post(url, headers=self.headers, json=payload, stream=True, timeout=120)
|
| | res.raise_for_status()
|
| | for line in res.iter_lines():
|
| | if line:
|
| | line_data = line.decode("utf-8").lstrip("data: ")
|
| | if line_data:
|
| | try:
|
| | data = json.loads(line_data)
|
| | full_response = data
|
| | if data.get("event") == "message":
|
| | full_answer += data.get("answer", "")
|
| | elif data.get("event") == "message_end":
|
| | conv_id = data.get("conversation_id")
|
| | break
|
| | elif data.get("event") == "error":
|
| | raise Exception(f"Streaming Error: {data.get('message')}")
|
| | except json.JSONDecodeError:
|
| | continue
|
| | return full_answer, conv_id, full_response
|
| |
|
| | except requests.exceptions.RequestException as e:
|
| | error_msg = f"Request Failed: {str(e)}"
|
| | return error_msg, None, {"error": error_msg}
|
| |
|
| |
|
| | def parse_dify_response(answer_text: str) -> dict:
|
| | """Parse classification and complete information returned by Dify"""
|
| | result = {
|
| |
|
| | "Main Category": "",
|
| | "Additional Category 1": "",
|
| | "Additional Category 2": "",
|
| | "EndpointName": [],
|
| | "XLogP": "",
|
| | "BioPathway": "",
|
| | "ToxicityInfo": "",
|
| | "KnownUse": "",
|
| | "DisorderDisease": ""
|
| | }
|
| |
|
| | try:
|
| | clean_text = answer_text.strip()
|
| |
|
| | if clean_text.startswith("```json"):
|
| | clean_text = clean_text.replace("```json", "").replace("```", "").strip()
|
| | elif clean_text.startswith("```"):
|
| | clean_text = clean_text.replace("```", "").strip()
|
| |
|
| |
|
| | response_json = json.loads(clean_text)
|
| |
|
| | if isinstance(response_json, dict):
|
| |
|
| | compound_name = next(iter(response_json.keys())) if response_json else ""
|
| |
|
| | if compound_name and isinstance(response_json.get(compound_name), dict):
|
| |
|
| | category_info = response_json[compound_name]
|
| |
|
| |
|
| |
|
| | result["Main Category"] = category_info.get("Main Category", "")
|
| | result["Additional Category 1"] = category_info.get("Additional Category 1", "")
|
| | result["Additional Category 2"] = category_info.get("Additional Category 2", "")
|
| |
|
| |
|
| | endpoint_value = category_info.get("EndpointName", [])
|
| | if isinstance(endpoint_value, list):
|
| | result["EndpointName"] = endpoint_value
|
| | elif isinstance(endpoint_value, str):
|
| | result["EndpointName"] = [endpoint_value] if endpoint_value else []
|
| |
|
| | result["XLogP"] = category_info.get("XLogP", "")
|
| | result["BioPathway"] = category_info.get("BioPathway", "")
|
| | result["ToxicityInfo"] = category_info.get("ToxicityInfo", "")
|
| | result["KnownUse"] = category_info.get("KnownUse", "")
|
| | result["DisorderDisease"] = category_info.get("DisorderDisease", "")
|
| |
|
| | else:
|
| |
|
| | result["Main Category"] = response_json.get("Main Category", "")
|
| | result["Additional Category 1"] = response_json.get("Additional Category 1", "")
|
| | result["Additional Category 2"] = response_json.get("Additional Category 2", "")
|
| |
|
| | except json.JSONDecodeError as e:
|
| | result["Main Category"] = f"JSON Parsing Error: {str(e)}"
|
| | except Exception as e:
|
| | result["Main Category"] = f"Parsing Failed: {str(e)}"
|
| |
|
| | return result
|
| |
|
| |
|
| | def normalize_compound_name(name: str) -> str:
|
| | """Normalize compound name (remove quotes, etc.)"""
|
| | if not isinstance(name, str):
|
| | return ""
|
| |
|
| |
|
| | name = name.strip()
|
| | if name.startswith('"') and name.endswith('"'):
|
| | name = name[1:-1]
|
| | elif name.startswith("'") and name.endswith("'"):
|
| | name = name[1:-1]
|
| |
|
| |
|
| | name = ' '.join(name.split())
|
| |
|
| | return name
|
| |
|
| |
|
| | def expand_endpoint_rows(parsed_result: dict, compound_name: str) -> list:
|
| | """
|
| | Expand EndpointName into multiple rows (without AC50 matching)
|
| | """
|
| | rows = []
|
| |
|
| |
|
| | compound_clean = normalize_compound_name(compound_name)
|
| |
|
| | endpoint_names = parsed_result.get("EndpointName", [])
|
| |
|
| | if not endpoint_names:
|
| |
|
| | row = {
|
| | "CompoundName": compound_clean,
|
| | "OriginalCompoundName": compound_name,
|
| |
|
| | "MainCategory": parsed_result.get("Main Category", ""),
|
| | "AdditionalCategory1": parsed_result.get("Additional Category 1", ""),
|
| | "AdditionalCategory2": parsed_result.get("Additional Category 2", ""),
|
| | "EndpointName": "",
|
| | "XLogP": parsed_result.get("XLogP", ""),
|
| | "BioPathway": parsed_result.get("BioPathway", ""),
|
| | "ToxicityInfo": parsed_result.get("ToxicityInfo", ""),
|
| | "KnownUse": parsed_result.get("KnownUse", ""),
|
| | "DisorderDisease": parsed_result.get("DisorderDisease", "")
|
| | }
|
| | rows.append(row)
|
| | else:
|
| |
|
| | for endpoint in endpoint_names:
|
| | row = {
|
| | "CompoundName": compound_clean,
|
| | "OriginalCompoundName": compound_name,
|
| |
|
| | "MainCategory": parsed_result.get("Main Category", ""),
|
| | "AdditionalCategory1": parsed_result.get("Additional Category 1", ""),
|
| | "AdditionalCategory2": parsed_result.get("Additional Category 2", ""),
|
| | "EndpointName": endpoint,
|
| | "XLogP": parsed_result.get("XLogP", ""),
|
| | "BioPathway": parsed_result.get("BioPathway", ""),
|
| | "ToxicityInfo": parsed_result.get("ToxicityInfo", ""),
|
| | "KnownUse": parsed_result.get("KnownUse", ""),
|
| | "DisorderDisease": parsed_result.get("DisorderDisease", "")
|
| | }
|
| | rows.append(row)
|
| |
|
| | return rows
|
| |
|
| |
|
| | def batch_process_compounds_gui(
|
| | csv_path: str,
|
| | save_root: str,
|
| | api_key: str,
|
| | base_url: str,
|
| | log_text: tk.Text,
|
| | progress_var: tk.DoubleVar,
|
| | user_id: str = "batch_compound_user",
|
| | compound_col: str = "IUPAC_name",
|
| | batch_num: int = 1,
|
| | csv_encoding: str = "utf-8",
|
| | csv_sep: str = ","
|
| | ):
|
| | """Batch process compounds (adapted for GUI)"""
|
| |
|
| | def log(message, color="black"):
|
| | """Output log to GUI text box"""
|
| | log_text.config(state=tk.NORMAL)
|
| | log_text.insert(tk.END, f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {message}\n")
|
| | log_text.tag_add(color, f"end-2l", f"end-1l")
|
| | log_text.tag_config(color, foreground=color)
|
| | log_text.see(tk.END)
|
| | log_text.config(state=tk.DISABLED)
|
| | log_text.update()
|
| |
|
| | try:
|
| |
|
| | log("Initializing Dify connection...", "blue")
|
| | chat = DifyBasicChat(api_key=api_key, base_url=base_url)
|
| |
|
| |
|
| | result_folder = os.path.join(save_root,
|
| | f"Compound_Classification_Results_Batch{batch_num}_{datetime.now().strftime('%Y%m%d%H%M%S')}")
|
| | os.makedirs(result_folder, exist_ok=True)
|
| | log(f"Result save folder: {result_folder}", "blue")
|
| |
|
| |
|
| | log("Reading CSV file...", "blue")
|
| | df = pd.read_csv(
|
| | csv_path,
|
| | encoding=csv_encoding,
|
| | sep=csv_sep,
|
| | na_filter=True
|
| | )
|
| | df = df.reset_index(drop=True)
|
| |
|
| |
|
| | if compound_col not in df.columns:
|
| | raise ValueError(
|
| | f"Column not found in CSV: [{compound_col}]\n"
|
| | f"Current CSV columns: {list(df.columns)}"
|
| | )
|
| |
|
| |
|
| | compounds = df[compound_col].dropna().unique()
|
| | total = len(compounds)
|
| | log(f"Successfully read {total} non-empty and unique compound names", "green")
|
| |
|
| | all_rows = []
|
| | failed_list = []
|
| |
|
| |
|
| | for idx, compound in enumerate(compounds, 1):
|
| | compound = str(compound).strip()
|
| | if not compound:
|
| | continue
|
| |
|
| |
|
| | progress = (idx / total) * 100
|
| | progress_var.set(progress)
|
| | log(f"Processing {idx}/{total}:{compound}", "black")
|
| |
|
| | try:
|
| |
|
| | answer, _, full_response = chat.send_message(
|
| | query=compound,
|
| | user=f"{user_id}_batch{batch_num}",
|
| | response_mode="blocking"
|
| | )
|
| |
|
| |
|
| | parsed_categories = parse_dify_response(answer)
|
| |
|
| |
|
| | expanded_rows = expand_endpoint_rows(parsed_categories, compound)
|
| | all_rows.extend(expanded_rows)
|
| |
|
| |
|
| | record_file = os.path.join(result_folder, f"Original_Record_{idx}.json")
|
| | with open(record_file, "w", encoding="utf-8") as f:
|
| | json.dump({
|
| | "Input Compound": compound,
|
| | "Dify Original Response": answer,
|
| | "Complete Response": full_response,
|
| | "Parsed Classification": parsed_categories,
|
| | "Expanded Rows Count": len(expanded_rows),
|
| | "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| | }, f, ensure_ascii=False, indent=4)
|
| |
|
| | log(f"✅ Processing completed: {compound} | Main Category: {parsed_categories['Main Category']} | Generated {len(expanded_rows)} rows",
|
| | "green")
|
| | time.sleep(0.5)
|
| |
|
| | except Exception as e:
|
| | error_msg = str(e)
|
| | log(f"❌ Processing failed: {compound} | Error: {error_msg}", "red")
|
| | failed_list.append({
|
| | "CompoundName": normalize_compound_name(compound),
|
| | "OriginalCompoundName": compound,
|
| |
|
| | "MainCategory": f"Processing Failed: {error_msg}",
|
| | "AdditionalCategory1": "",
|
| | "AdditionalCategory2": "",
|
| | "EndpointName": "",
|
| | "XLogP": "",
|
| | "BioPathway": "",
|
| | "ToxicityInfo": "",
|
| | "KnownUse": "",
|
| | "DisorderDisease": ""
|
| | })
|
| |
|
| |
|
| | result_df = pd.DataFrame(all_rows)
|
| |
|
| |
|
| | if failed_list:
|
| | failed_df = pd.DataFrame(failed_list)
|
| | result_df = pd.concat([result_df, failed_df], ignore_index=True)
|
| |
|
| |
|
| | column_order = [
|
| | "CompoundName",
|
| | "OriginalCompoundName",
|
| |
|
| | "MainCategory",
|
| | "AdditionalCategory1",
|
| | "AdditionalCategory2",
|
| | "EndpointName",
|
| | "XLogP",
|
| | "BioPathway",
|
| | "ToxicityInfo",
|
| | "KnownUse",
|
| | "DisorderDisease"
|
| | ]
|
| |
|
| |
|
| | for col in column_order:
|
| | if col not in result_df.columns:
|
| | result_df[col] = ""
|
| |
|
| |
|
| | result_df = result_df.reindex(columns=column_order)
|
| |
|
| |
|
| | csv_filename = f"Compound_Query_Results_Batch{batch_num}.csv"
|
| | csv_path_out = os.path.join(result_folder, csv_filename)
|
| | result_df.to_csv(csv_path_out, index=False, encoding="utf-8-sig")
|
| | log(f"📄 Result file saved to: {csv_path_out}", "blue")
|
| | log(f"📊 Total Rows: {len(result_df)} rows", "blue")
|
| |
|
| |
|
| | if failed_list:
|
| | fail_file = os.path.join(result_folder, f"Failed_List_Batch{batch_num}.csv")
|
| | pd.DataFrame(failed_list).to_csv(fail_file, index=False, encoding="utf-8-sig")
|
| | log(f"❌ {len(failed_list)} compounds failed to process, details: {fail_file}", "red")
|
| |
|
| |
|
| | progress_var.set(100)
|
| | log(f"\n{'=' * 40}", "blue")
|
| | log(f"🏁 Processing Complete!", "green")
|
| | log(f"{'=' * 40}", "blue")
|
| | log(f"📊 Statistics: Total Compounds={total} | Successful Rows={len(all_rows)} | Failed Compounds={len(failed_list)}", "blue")
|
| | log(f"📁 All results saved to: {result_folder}", "blue")
|
| |
|
| |
|
| | if messagebox.askyesno("Processing Complete", f"Batch processing completed!\nTotal {len(result_df)} rows of data generated\nOpen result folder?"):
|
| | if os.name == 'nt':
|
| | os.startfile(result_folder)
|
| | elif os.name == 'posix':
|
| | import subprocess
|
| | try:
|
| | if sys.platform == 'darwin':
|
| | subprocess.run(['open', result_folder])
|
| | else:
|
| | subprocess.run(['xdg-open', result_folder])
|
| | except:
|
| | pass
|
| |
|
| | except Exception as e:
|
| | log(f"❌ Overall processing failed: {str(e)}", "red")
|
| | messagebox.showerror("Error", f"Processing failed: {str(e)}")
|
| | finally:
|
| |
|
| | progress_var.set(0)
|
| |
|
| |
|
| |
|
| | class CompoundBatchToolGUI:
|
| | def __init__(self, root):
|
| | self.root = root
|
| | self.root.title("CECs BatchAnnotator v1.0")
|
| | self.root.geometry("850x700")
|
| | self.root.resizable(True, True)
|
| |
|
| |
|
| | self.default_api_key = "app-QRGuoLVqSksMsG4t9O53cITj"
|
| | self.default_base_url = "http://192.168.0.179:8080/v1"
|
| | self.default_save_root = "./Compound_Query_Results"
|
| | self.default_compound_col = "IUPAC_name"
|
| | self.default_csv_encoding = "utf-8"
|
| | self.default_csv_sep = ","
|
| |
|
| |
|
| | main_frame = ttk.Frame(root, padding="20")
|
| | main_frame.pack(fill=tk.BOTH, expand=True)
|
| |
|
| |
|
| | file_frame = ttk.LabelFrame(main_frame, text="1. Select CSV File", padding="10")
|
| | file_frame.pack(fill=tk.X, pady=5)
|
| |
|
| | self.csv_path_var = tk.StringVar()
|
| | ttk.Entry(file_frame, textvariable=self.csv_path_var, state="readonly", width=65).grid(row=0, column=1, padx=5,
|
| | pady=5)
|
| | ttk.Button(file_frame, text="Select File", command=self.select_csv_file).grid(row=0, column=0, padx=5, pady=5)
|
| |
|
| |
|
| | param_frame = ttk.LabelFrame(main_frame, text="2. Parameter Configuration", padding="10")
|
| | param_frame.pack(fill=tk.X, pady=5)
|
| |
|
| |
|
| | ttk.Label(param_frame, text="Dify API Key:").grid(row=0, column=0, sticky=tk.W, padx=5, pady=3)
|
| | self.api_key_var = tk.StringVar(value=self.default_api_key)
|
| | ttk.Entry(param_frame, textvariable=self.api_key_var, width=60).grid(row=0, column=1, columnspan=3, padx=5,
|
| | pady=3)
|
| |
|
| | ttk.Label(param_frame, text="Dify URL:").grid(row=1, column=0, sticky=tk.W, padx=5, pady=3)
|
| | self.base_url_var = tk.StringVar(value=self.default_base_url)
|
| | ttk.Entry(param_frame, textvariable=self.base_url_var, width=60).grid(row=1, column=1, columnspan=3, padx=5,
|
| | pady=3)
|
| |
|
| |
|
| | ttk.Label(param_frame, text="Compound Column Name:").grid(row=2, column=0, sticky=tk.W, padx=5, pady=3)
|
| | self.compound_col_var = tk.StringVar(value=self.default_compound_col)
|
| | ttk.Entry(param_frame, textvariable=self.compound_col_var, width=20).grid(row=2, column=1, padx=5, pady=3)
|
| |
|
| | ttk.Label(param_frame, text="CSV Encoding:").grid(row=2, column=2, sticky=tk.W, padx=5, pady=3)
|
| | self.csv_encoding_var = tk.StringVar(value=self.default_csv_encoding)
|
| | ttk.Entry(param_frame, textvariable=self.csv_encoding_var, width=15).grid(row=2, column=3, padx=5, pady=3)
|
| |
|
| | ttk.Label(param_frame, text="CSV Separator:").grid(row=3, column=0, sticky=tk.W, padx=5, pady=3)
|
| | self.csv_sep_var = tk.StringVar(value=self.default_csv_sep)
|
| | ttk.Entry(param_frame, textvariable=self.csv_sep_var, width=20).grid(row=3, column=1, padx=5, pady=3)
|
| |
|
| |
|
| | ttk.Label(param_frame, text="Result Save Path:").grid(row=4, column=0, sticky=tk.W, padx=5, pady=3)
|
| | self.save_root_var = tk.StringVar(value=self.default_save_root)
|
| | ttk.Entry(param_frame, textvariable=self.save_root_var, width=50).grid(row=4, column=1, columnspan=2, padx=5,
|
| | pady=3)
|
| | ttk.Button(param_frame, text="Select Path", command=self.select_save_root).grid(row=4, column=3, padx=5, pady=3)
|
| |
|
| |
|
| | op_frame = ttk.LabelFrame(main_frame, text="3. Start Processing", padding="10")
|
| | op_frame.pack(fill=tk.X, pady=5)
|
| |
|
| | self.progress_var = tk.DoubleVar()
|
| | progress_bar = ttk.Progressbar(op_frame, variable=self.progress_var, maximum=100)
|
| | progress_bar.pack(fill=tk.X, padx=5, pady=5)
|
| |
|
| | self.start_btn = ttk.Button(op_frame, text="Start Batch Processing", command=self.start_processing)
|
| | self.start_btn.pack(pady=5)
|
| |
|
| |
|
| | log_frame = ttk.LabelFrame(main_frame, text="4. Processing Log", padding="10")
|
| | log_frame.pack(fill=tk.BOTH, expand=True, pady=5)
|
| |
|
| | self.log_text = scrolledtext.ScrolledText(log_frame, wrap=tk.WORD, state=tk.DISABLED)
|
| | self.log_text.pack(fill=tk.BOTH, expand=True)
|
| |
|
| | self.log_text.tag_config("red", foreground="red")
|
| | self.log_text.tag_config("green", foreground="green")
|
| | self.log_text.tag_config("blue", foreground="blue")
|
| | self.log_text.tag_config("orange", foreground="orange")
|
| | self.log_text.tag_config("gray", foreground="gray")
|
| |
|
| |
|
| | tip_label = ttk.Label(main_frame,
|
| | text="Tip: Each endpoint returned by Dify generates a separate row in the result",
|
| | foreground="gray")
|
| | tip_label.pack(side=tk.BOTTOM, pady=10)
|
| |
|
| | def select_csv_file(self):
|
| | """Select CSV file"""
|
| | file_path = filedialog.askopenfilename(
|
| | title="Select Compound CSV File",
|
| | filetypes=[("CSV Files", "*.csv"), ("All Files", "*.*")]
|
| | )
|
| | if file_path:
|
| | self.csv_path_var.set(file_path)
|
| |
|
| | def select_save_root(self):
|
| | """Select save path"""
|
| | folder_path = filedialog.askdirectory(title="Select Result Save Folder")
|
| | if folder_path:
|
| | self.save_root_var.set(folder_path)
|
| |
|
| | def start_processing(self):
|
| | """Start batch processing (new thread to avoid UI freezing)"""
|
| |
|
| | csv_path = self.csv_path_var.get()
|
| | if not csv_path:
|
| | messagebox.showwarning("Warning", "Please select a CSV file first!")
|
| | return
|
| |
|
| | api_key = self.api_key_var.get().strip()
|
| | if not api_key:
|
| | messagebox.showwarning("Warning", "Please fill in the Dify API Key!")
|
| | return
|
| |
|
| | base_url = self.base_url_var.get().strip()
|
| | if not base_url:
|
| | messagebox.showwarning("Warning", "Please fill in the Dify URL!")
|
| | return
|
| |
|
| |
|
| | self.start_btn.config(state=tk.DISABLED)
|
| |
|
| |
|
| | self.log_text.config(state=tk.NORMAL)
|
| | self.log_text.delete(1.0, tk.END)
|
| | self.log_text.config(state=tk.DISABLED)
|
| |
|
| |
|
| | def process_thread():
|
| | try:
|
| | batch_process_compounds_gui(
|
| | csv_path=csv_path,
|
| | save_root=self.save_root_var.get(),
|
| | api_key=api_key,
|
| | base_url=base_url,
|
| | log_text=self.log_text,
|
| | progress_var=self.progress_var,
|
| | compound_col=self.compound_col_var.get(),
|
| | csv_encoding=self.csv_encoding_var.get(),
|
| | csv_sep=self.csv_sep_var.get()
|
| | )
|
| | finally:
|
| |
|
| | self.start_btn.config(state=tk.NORMAL)
|
| |
|
| | threading.Thread(target=process_thread, daemon=True).start()
|
| |
|
| |
|
| |
|
| | if __name__ == "__main__":
|
| |
|
| | root = tk.Tk()
|
| | app = CompoundBatchToolGUI(root)
|
| | root.mainloop() |