Upload 2 files

Browse files

Files changed (2) hide show

step1_pubchemlite_invitrodb_to_dify_en.py +151 -0
step2_CECs_annotating_agent_v1.0.py +578 -0

step1_pubchemlite_invitrodb_to_dify_en.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# Import dependency libraries
+from flask import Flask, request, jsonify
+import pandas as pd
+import sqlalchemy
+from sqlalchemy.exc import SQLAlchemyError
+# Create Flask application
+app = Flask(__name__)
+# Configure connection information for two databases (please modify username/password/address according to actual environment)
+DB_CONFIGS = {
+    "pubchemlite": {
+        "uri": "mysql+pymysql://sql_user:SQLUSER@localhost:3306/pubchemlite"
+    },
+    "invitrodb_v4_3": {
+        "uri": "mysql+pymysql://sql_user:SQLUSER@localhost:3306/invitrodb_v4_3"  # Please confirm database name and password
+    }
+}
+# List of allowed database identifiers
+ALLOWED_DB_IDENTIFIERS = list(DB_CONFIGS.keys())
+# Only allow SELECT operations (security restriction)
+ALLOWED_SQL_OPERATIONS = ["select"]
+# Core interface: /execute_sql (supports POST method, GET only returns description)
+@app.route('/execute_sql', methods=['GET', 'POST'])
+def execute_sql():
+    # GET request returns interface description
+    if request.method == 'GET':
+        return jsonify({
+            "status": "info",
+            "message": "Please use POST method to call, parameters include db_identifier (pubchemlite/invitrodb_v4_3) and sql (SELECT only)"
+        })
+    try:
+        # 1. Get and validate request parameters
+        data = request.json
+        if not data:
+            return jsonify({
+                "status": "error",
+                "message": "Request body cannot be empty, must contain db_identifier and sql parameters"
+            }), 400
+        # Get database identifier and validate
+        db_identifier = data.get("db_identifier")
+        if not db_identifier:
+            return jsonify({
+                "status": "error",
+                "message": "Missing required parameter: db_identifier (optional values: pubchemlite/invitrodb_v4_3)"
+            }), 400
+        if db_identifier not in ALLOWED_DB_IDENTIFIERS:
+            return jsonify({
+                "status": "error",
+                "message": f"Invalid db_identifier: {db_identifier}, only supports: {','.join(ALLOWED_DB_IDENTIFIERS)}"
+            }), 400
+        # Get SQL and validate
+        sql = data.get("sql")
+        if not sql:
+            return jsonify({
+                "status": "error",
+                "message": "Missing required parameter: sql (SELECT query statements only)"
+            }), 400
+        # Security check: only allow SELECT operations
+        sql_lower = sql.strip().lower()
+        if not sql_lower.startswith(tuple(ALLOWED_SQL_OPERATIONS)):
+            return jsonify({
+                "status": "error",
+                "message": "Only SELECT query operations are supported, dangerous operations like INSERT/UPDATE/DELETE/DROP are prohibited"
+            }), 400
+        # 2. Create connection engine for the corresponding database
+        db_uri = DB_CONFIGS[db_identifier]["uri"]
+        engine = sqlalchemy.create_engine(
+            db_uri,
+            pool_pre_ping=True,  # Check connection validity
+            pool_recycle=3600  # Recycle connections every 1 hour to prevent timeouts
+        )
+        # 3. Execute SQL query
+        with engine.connect() as conn:
+            df = pd.read_sql(sql, conn)
+        # 4. Return successful result
+        return jsonify({
+            "status": "success",
+            "data": df.to_dict(orient="records"),
+            "message": ""
+        })
+    # Database execution error (table/field does not exist, etc.)
+    except SQLAlchemyError as e:
+        error_msg = str(e)
+        # Refine error messages to fit the two database scenarios
+        if "pubchemlite_ccs" in error_msg and db_identifier == "invitrodb_v4_3":
+            error_msg = f"pubchemlite_ccs table does not exist in invitrodb_v4_3 database (this table is only supported in pubchemlite): {error_msg}"
+        elif "CompoundName" in error_msg and db_identifier == "invitrodb_v4_3":
+            error_msg = f"CompoundName field does not exist in invitrodb_v4_3 database (this field is only supported in pubchemlite_ccs table of pubchemlite): {error_msg}"
+        elif "assay" in error_msg and db_identifier == "pubchemlite":
+            error_msg = f"assay table does not exist in pubchemlite database (this table is only supported in invitrodb_v4_3): {error_msg}"
+        return jsonify({
+            "status": "error",
+            "message": f"Database execution error: {error_msg}"
+        }), 500
+    # Client issues such as parameter/format errors
+    except Exception as e:
+        return jsonify({
+            "status": "error",
+            "message": f"Request processing failed: {str(e)}"
+        }), 400
+# Root directory route (provides interface usage instructions)
+@app.route('/')
+def home():
+    return """
+<h1>SQL Execution API (Supports Dual Databases)</h1>
+<p>Please send POST requests to <code>/execute_sql</code> to execute SQL queries.</p>
+<p>Request parameters:</p>
+<ul>
+  <li>db_identifier: Target database (required, optional values: pubchemlite/invitrodb_v4_3)</li>
+  <li>sql: SELECT query statement (required, SELECT operations only)</li>
+</ul>
+<p>Example 1 (query pubchemlite):</p>
+<pre>
+curl -X POST http://127.0.0.1:5000/execute_sql \\
+-H "Content-Type: application/json" \\
+-d '{
+  "db_identifier": "pubchemlite",
+  "sql": "SELECT Identifier, CompoundName, MolecularFormula FROM pubchemlite_ccs WHERE PubMed_Count > 5 LIMIT 10"
+}'
+</pre>
+<p>Example 2 (query invitrodb_v4_3):</p>
+<pre>
+curl -X POST http://127.0.0.1:5000/execute_sql \\
+-H "Content-Type: application/json" \\
+-d '{
+  "db_identifier": "invitrodb_v4_3",
+  "sql": "SELECT aid, assay_name, organism FROM assay WHERE ncbi_taxon_id = 9606 LIMIT 10"
+}'
+</pre>
+"""
+# Start the application (listen on all network interfaces, port 5000, debug mode can be turned off)
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=False)

step2_CECs_annotating_agent_v1.0.py ADDED Viewed

	@@ -0,0 +1,578 @@

+# ==================== Compound Batch Query Tool (Desktop Version) ====================
+# Supports batch query (AC50 matching function removed)
+import tkinter as tk
+from tkinter import ttk, filedialog, messagebox, scrolledtext
+import pandas as pd
+import requests
+import json
+import os
+import time
+from typing import Optional, Dict, List
+from datetime import datetime
+import threading
+import sys
+# ==================== Core Function Module ====================
+class DifyBasicChat:
+    """Dify Basic Chat Function Encapsulation"""
+    def __init__(self, api_key: str, base_url: str = "http://localhost/v1"):
+        self.api_key = api_key
+        self.base_url = base_url.rstrip("/")
+        self.headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+    def send_message(
+            self,
+            query: str,
+            user: str,
+            conversation_id: Optional[str] = None,
+            response_mode: str = "blocking",
+            inputs: dict = None
+    ) -> tuple:
+        """Send chat message"""
+        url = f"{self.base_url}/chat-messages"
+        payload = {
+            "query": query,
+            "user": user,
+            "response_mode": response_mode,
+            "inputs": inputs or {}
+        }
+        if conversation_id:
+            payload["conversation_id"] = conversation_id
+        full_response = None
+        try:
+            if response_mode == "blocking":
+                res = requests.post(url, headers=self.headers, json=payload, timeout=120)
+                res.raise_for_status()
+                full_response = res.json()
+                answer = full_response.get("answer", "")
+                conv_id = full_response.get("conversation_id")
+                return answer, conv_id, full_response
+            else:
+                full_answer = ""
+                conv_id = None
+                res = requests.post(url, headers=self.headers, json=payload, stream=True, timeout=120)
+                res.raise_for_status()
+                for line in res.iter_lines():
+                    if line:
+                        line_data = line.decode("utf-8").lstrip("data: ")
+                        if line_data:
+                            try:
+                                data = json.loads(line_data)
+                                full_response = data
+                                if data.get("event") == "message":
+                                    full_answer += data.get("answer", "")
+                                elif data.get("event") == "message_end":
+                                    conv_id = data.get("conversation_id")
+                                    break
+                                elif data.get("event") == "error":
+                                    raise Exception(f"Streaming Error: {data.get('message')}")
+                            except json.JSONDecodeError:
+                                continue
+                return full_answer, conv_id, full_response
+        except requests.exceptions.RequestException as e:
+            error_msg = f"Request Failed: {str(e)}"
+            return error_msg, None, {"error": error_msg}
+def parse_dify_response(answer_text: str) -> dict:
+    """Parse classification and complete information returned by Dify"""
+    result = {
+        #"CASRN": "",
+        "Main Category": "",
+        "Additional Category 1": "",
+        "Additional Category 2": "",
+        "EndpointName": [],  # Keep for compatibility, no longer used for matching
+        "XLogP": "",
+        "BioPathway": "",
+        "ToxicityInfo": "",
+        "KnownUse": "",
+        "DisorderDisease": ""
+    }
+    try:
+        clean_text = answer_text.strip()
+        # Clean code block markers
+        if clean_text.startswith("```json"):
+            clean_text = clean_text.replace("```json", "").replace("```", "").strip()
+        elif clean_text.startswith("```"):
+            clean_text = clean_text.replace("```", "").strip()
+        # Parse JSON
+        response_json = json.loads(clean_text)
+        if isinstance(response_json, dict):
+            # Get compound name (first key)
+            compound_name = next(iter(response_json.keys())) if response_json else ""
+            if compound_name and isinstance(response_json.get(compound_name), dict):
+                # Nested format: {"CompoundName": {...}}
+                category_info = response_json[compound_name]
+                # Extract all fields
+                #result["CASRN"] = category_info.get("CASRN", "")
+                result["Main Category"] = category_info.get("Main Category", "")
+                result["Additional Category 1"] = category_info.get("Additional Category 1", "")
+                result["Additional Category 2"] = category_info.get("Additional Category 2", "")
+                # Process EndpointName - may be list or string
+                endpoint_value = category_info.get("EndpointName", [])
+                if isinstance(endpoint_value, list):
+                    result["EndpointName"] = endpoint_value
+                elif isinstance(endpoint_value, str):
+                    result["EndpointName"] = [endpoint_value] if endpoint_value else []
+                result["XLogP"] = category_info.get("XLogP", "")
+                result["BioPathway"] = category_info.get("BioPathway", "")
+                result["ToxicityInfo"] = category_info.get("ToxicityInfo", "")
+                result["KnownUse"] = category_info.get("KnownUse", "")
+                result["DisorderDisease"] = category_info.get("DisorderDisease", "")
+            else:
+                # Flat format (compatible with old format)
+                result["Main Category"] = response_json.get("Main Category", "")
+                result["Additional Category 1"] = response_json.get("Additional Category 1", "")
+                result["Additional Category 2"] = response_json.get("Additional Category 2", "")
+    except json.JSONDecodeError as e:
+        result["Main Category"] = f"JSON Parsing Error: {str(e)}"
+    except Exception as e:
+        result["Main Category"] = f"Parsing Failed: {str(e)}"
+    return result
+def normalize_compound_name(name: str) -> str:
+    """Normalize compound name (remove quotes, etc.)"""
+    if not isinstance(name, str):
+        return ""
+    # Remove quotes
+    name = name.strip()
+    if name.startswith('"') and name.endswith('"'):
+        name = name[1:-1]
+    elif name.startswith("'") and name.endswith("'"):
+        name = name[1:-1]
+    # Remove extra spaces
+    name = ' '.join(name.split())
+    return name
+def expand_endpoint_rows(parsed_result: dict, compound_name: str) -> list:
+    """
+    Expand EndpointName into multiple rows (without AC50 matching)
+    """
+    rows = []
+    # Normalize compound name
+    compound_clean = normalize_compound_name(compound_name)
+    endpoint_names = parsed_result.get("EndpointName", [])
+    if not endpoint_names:
+        # Create one row if no EndpointName
+        row = {
+            "CompoundName": compound_clean,
+            "OriginalCompoundName": compound_name,
+            #"CASRN": parsed_result.get("CASRN", ""),
+            "MainCategory": parsed_result.get("Main Category", ""),
+            "AdditionalCategory1": parsed_result.get("Additional Category 1", ""),
+            "AdditionalCategory2": parsed_result.get("Additional Category 2", ""),
+            "EndpointName": "",
+            "XLogP": parsed_result.get("XLogP", ""),
+            "BioPathway": parsed_result.get("BioPathway", ""),
+            "ToxicityInfo": parsed_result.get("ToxicityInfo", ""),
+            "KnownUse": parsed_result.get("KnownUse", ""),
+            "DisorderDisease": parsed_result.get("DisorderDisease", "")
+        }
+        rows.append(row)
+    else:
+        # Create one row per endpoint (without AC50 matching)
+        for endpoint in endpoint_names:
+            row = {
+                "CompoundName": compound_clean,
+                "OriginalCompoundName": compound_name,
+                #"CASRN": parsed_result.get("CASRN", ""),
+                "MainCategory": parsed_result.get("Main Category", ""),
+                "AdditionalCategory1": parsed_result.get("Additional Category 1", ""),
+                "AdditionalCategory2": parsed_result.get("Additional Category 2", ""),
+                "EndpointName": endpoint,
+                "XLogP": parsed_result.get("XLogP", ""),
+                "BioPathway": parsed_result.get("BioPathway", ""),
+                "ToxicityInfo": parsed_result.get("ToxicityInfo", ""),
+                "KnownUse": parsed_result.get("KnownUse", ""),
+                "DisorderDisease": parsed_result.get("DisorderDisease", "")
+            }
+            rows.append(row)
+    return rows
+def batch_process_compounds_gui(
+        csv_path: str,
+        save_root: str,
+        api_key: str,
+        base_url: str,
+        log_text: tk.Text,
+        progress_var: tk.DoubleVar,
+        user_id: str = "batch_compound_user",
+        compound_col: str = "IUPAC_name",
+        batch_num: int = 1,
+        csv_encoding: str = "utf-8",
+        csv_sep: str = ","
+):
+    """Batch process compounds (adapted for GUI, AC50 matching removed)"""
+    def log(message, color="black"):
+        """Output log to GUI text box"""
+        log_text.config(state=tk.NORMAL)
+        log_text.insert(tk.END, f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {message}\n")
+        log_text.tag_add(color, f"end-2l", f"end-1l")
+        log_text.tag_config(color, foreground=color)
+        log_text.see(tk.END)
+        log_text.config(state=tk.DISABLED)
+        log_text.update()
+    try:
+        # Initialize Dify client
+        log("Initializing Dify connection...", "blue")
+        chat = DifyBasicChat(api_key=api_key, base_url=base_url)
+        # Create save folder
+        result_folder = os.path.join(save_root,
+                                     f"Compound_Classification_Results_Batch{batch_num}_{datetime.now().strftime('%Y%m%d%H%M%S')}")
+        os.makedirs(result_folder, exist_ok=True)
+        log(f"Result save folder: {result_folder}", "blue")
+        # Read CSV
+        log("Reading CSV file...", "blue")
+        df = pd.read_csv(
+            csv_path,
+            encoding=csv_encoding,
+            sep=csv_sep,
+            na_filter=True
+        )
+        df = df.reset_index(drop=True)
+        # Check if column exists
+        if compound_col not in df.columns:
+            raise ValueError(
+                f"Column not found in CSV: [{compound_col}]\n"
+                f"Current CSV columns: {list(df.columns)}"
+            )
+        # Remove duplicates and empty values
+        compounds = df[compound_col].dropna().unique()
+        total = len(compounds)
+        log(f"Successfully read {total} non-empty and unique compound names", "green")
+        all_rows = []  # Store all row data
+        failed_list = []
+        # Batch processing
+        for idx, compound in enumerate(compounds, 1):
+            compound = str(compound).strip()
+            if not compound:
+                continue
+            # Update progress
+            progress = (idx / total) * 100
+            progress_var.set(progress)
+            log(f"Processing {idx}/{total}：{compound}", "black")
+            try:
+                # Call Dify API
+                answer, _, full_response = chat.send_message(
+                    query=compound,
+                    user=f"{user_id}_batch{batch_num}",
+                    response_mode="blocking"
+                )
+                # Parse results
+                parsed_categories = parse_dify_response(answer)
+                # Expand EndpointName into multiple rows (without AC50 matching)
+                expanded_rows = expand_endpoint_rows(parsed_categories, compound)
+                all_rows.extend(expanded_rows)
+                # Save original record (for debugging)
+                record_file = os.path.join(result_folder, f"Original_Record_{idx}.json")
+                with open(record_file, "w", encoding="utf-8") as f:
+                    json.dump({
+                        "Input Compound": compound,
+                        "Dify Original Response": answer,
+                        "Complete Response": full_response,
+                        "Parsed Classification": parsed_categories,
+                        "Expanded Rows Count": len(expanded_rows),
+                        "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                    }, f, ensure_ascii=False, indent=4)
+                log(f"✅ Processing completed: {compound} | Main Category: {parsed_categories['Main Category']} | Generated {len(expanded_rows)} rows",
+                    "green")
+                time.sleep(0.5)  # Avoid too fast requests
+            except Exception as e:
+                error_msg = str(e)
+                log(f"❌ Processing failed: {compound} | Error: {error_msg}", "red")
+                failed_list.append({
+                    "CompoundName": normalize_compound_name(compound),
+                    "OriginalCompoundName": compound,
+                    #"CASRN": "",
+                    "MainCategory": f"Processing Failed: {error_msg}",
+                    "AdditionalCategory1": "",
+                    "AdditionalCategory2": "",
+                    "EndpointName": "",
+                    "XLogP": "",
+                    "BioPathway": "",
+                    "ToxicityInfo": "",
+                    "KnownUse": "",
+                    "DisorderDisease": ""
+                })
+        # Merge results and save
+        result_df = pd.DataFrame(all_rows)
+        # Add failed records
+        if failed_list:
+            failed_df = pd.DataFrame(failed_list)
+            result_df = pd.concat([result_df, failed_df], ignore_index=True)
+        # Define column order
+        column_order = [
+            "CompoundName",
+            "OriginalCompoundName",
+            #"CASRN",
+            "MainCategory",
+            "AdditionalCategory1",
+            "AdditionalCategory2",
+            "EndpointName",
+            "XLogP",
+            "BioPathway",
+            "ToxicityInfo",
+            "KnownUse",
+            "DisorderDisease"
+        ]
+        # Ensure all columns exist
+        for col in column_order:
+            if col not in result_df.columns:
+                result_df[col] = ""
+        # Reorder columns
+        result_df = result_df.reindex(columns=column_order)
+        # Save final CSV
+        csv_filename = f"Compound_Query_Results_Batch{batch_num}.csv"
+        csv_path_out = os.path.join(result_folder, csv_filename)
+        result_df.to_csv(csv_path_out, index=False, encoding="utf-8-sig")
+        log(f"📄 Result file saved to: {csv_path_out}", "blue")
+        log(f"📊 Total Rows: {len(result_df)} rows", "blue")
+        # Save failed list (separate file)
+        if failed_list:
+            fail_file = os.path.join(result_folder, f"Failed_List_Batch{batch_num}.csv")
+            pd.DataFrame(failed_list).to_csv(fail_file, index=False, encoding="utf-8-sig")
+            log(f"❌ {len(failed_list)} compounds failed to process, details: {fail_file}", "red")
+        # Update progress and log after completion
+        progress_var.set(100)
+        log(f"\n{'=' * 40}", "blue")
+        log(f"🏁 Processing Complete!", "green")
+        log(f"{'=' * 40}", "blue")
+        log(f"📊 Statistics: Total Compounds={total} | Successful Rows={len(all_rows)} | Failed Compounds={len(failed_list)}", "blue")
+        log(f"📁 All results saved to: {result_folder}", "blue")
+        # Ask if open result folder
+        if messagebox.askyesno("Processing Complete", f"Batch processing completed!\nTotal {len(result_df)} rows of data generated\nOpen result folder?"):
+            if os.name == 'nt':  # Windows
+                os.startfile(result_folder)
+            elif os.name == 'posix':  # macOS, Linux
+                import subprocess
+                try:
+                    if sys.platform == 'darwin':
+                        subprocess.run(['open', result_folder])
+                    else:
+                        subprocess.run(['xdg-open', result_folder])
+                except:
+                    pass
+    except Exception as e:
+        log(f"❌ Overall processing failed: {str(e)}", "red")
+        messagebox.showerror("Error", f"Processing failed: {str(e)}")
+    finally:
+        # Reset progress
+        progress_var.set(0)
+# ==================== Graphical User Interface Module ====================
+class CompoundBatchToolGUI:
+    def __init__(self, root):
+        self.root = root
+        self.root.title("Compound Batch Query Tool v1.0")
+        self.root.geometry("850x700")
+        self.root.resizable(True, True)
+        # Default configuration
+        self.default_api_key = "app-QRGuoLVqSksMsG4t9O53cITj"
+        self.default_base_url = "http://192.168.0.179:8080/v1"
+        self.default_save_root = "./Compound_Query_Results"
+        self.default_compound_col = "IUPAC_name"
+        self.default_csv_encoding = "utf-8"
+        self.default_csv_sep = ","
+        # Create main frame
+        main_frame = ttk.Frame(root, padding="20")
+        main_frame.pack(fill=tk.BOTH, expand=True)
+        # 1. File selection area
+        file_frame = ttk.LabelFrame(main_frame, text="1. Select CSV File", padding="10")
+        file_frame.pack(fill=tk.X, pady=5)
+        self.csv_path_var = tk.StringVar()
+        ttk.Entry(file_frame, textvariable=self.csv_path_var, state="readonly", width=65).grid(row=0, column=1, padx=5,
+                                                                                               pady=5)
+        ttk.Button(file_frame, text="Select File", command=self.select_csv_file).grid(row=0, column=0, padx=5, pady=5)
+        # 2. Parameter configuration area
+        param_frame = ttk.LabelFrame(main_frame, text="2. Parameter Configuration", padding="10")
+        param_frame.pack(fill=tk.X, pady=5)
+        # 2.1 Dify configuration
+        ttk.Label(param_frame, text="Dify API Key：").grid(row=0, column=0, sticky=tk.W, padx=5, pady=3)
+        self.api_key_var = tk.StringVar(value=self.default_api_key)
+        ttk.Entry(param_frame, textvariable=self.api_key_var, width=60).grid(row=0, column=1, columnspan=3, padx=5,
+                                                                             pady=3)
+        ttk.Label(param_frame, text="Dify URL：").grid(row=1, column=0, sticky=tk.W, padx=5, pady=3)
+        self.base_url_var = tk.StringVar(value=self.default_base_url)
+        ttk.Entry(param_frame, textvariable=self.base_url_var, width=60).grid(row=1, column=1, columnspan=3, padx=5,
+                                                                              pady=3)
+        # 2.2 CSV configuration
+        ttk.Label(param_frame, text="Compound Column Name：").grid(row=2, column=0, sticky=tk.W, padx=5, pady=3)
+        self.compound_col_var = tk.StringVar(value=self.default_compound_col)
+        ttk.Entry(param_frame, textvariable=self.compound_col_var, width=20).grid(row=2, column=1, padx=5, pady=3)
+        ttk.Label(param_frame, text="CSV Encoding：").grid(row=2, column=2, sticky=tk.W, padx=5, pady=3)
+        self.csv_encoding_var = tk.StringVar(value=self.default_csv_encoding)
+        ttk.Entry(param_frame, textvariable=self.csv_encoding_var, width=15).grid(row=2, column=3, padx=5, pady=3)
+        ttk.Label(param_frame, text="CSV Separator：").grid(row=3, column=0, sticky=tk.W, padx=5, pady=3)
+        self.csv_sep_var = tk.StringVar(value=self.default_csv_sep)
+        ttk.Entry(param_frame, textvariable=self.csv_sep_var, width=20).grid(row=3, column=1, padx=5, pady=3)
+        # 2.3 Save configuration (AC50 folder removed)
+        ttk.Label(param_frame, text="Result Save Path：").grid(row=4, column=0, sticky=tk.W, padx=5, pady=3)
+        self.save_root_var = tk.StringVar(value=self.default_save_root)
+        ttk.Entry(param_frame, textvariable=self.save_root_var, width=50).grid(row=4, column=1, columnspan=2, padx=5,
+                                                                               pady=3)
+        ttk.Button(param_frame, text="Select Path", command=self.select_save_root).grid(row=4, column=3, padx=5, pady=3)
+        # 3. Operation area
+        op_frame = ttk.LabelFrame(main_frame, text="3. Start Processing", padding="10")
+        op_frame.pack(fill=tk.X, pady=5)
+        self.progress_var = tk.DoubleVar()
+        progress_bar = ttk.Progressbar(op_frame, variable=self.progress_var, maximum=100)
+        progress_bar.pack(fill=tk.X, padx=5, pady=5)
+        self.start_btn = ttk.Button(op_frame, text="Start Batch Processing", command=self.start_processing)
+        self.start_btn.pack(pady=5)
+        # 4. Log output area
+        log_frame = ttk.LabelFrame(main_frame, text="4. Processing Log", padding="10")
+        log_frame.pack(fill=tk.BOTH, expand=True, pady=5)
+        self.log_text = scrolledtext.ScrolledText(log_frame, wrap=tk.WORD, state=tk.DISABLED)
+        self.log_text.pack(fill=tk.BOTH, expand=True)
+        # Set log color tags
+        self.log_text.tag_config("red", foreground="red")
+        self.log_text.tag_config("green", foreground="green")
+        self.log_text.tag_config("blue", foreground="blue")
+        self.log_text.tag_config("orange", foreground="orange")
+        self.log_text.tag_config("gray", foreground="gray")
+        # 5. Bottom tip (AC50 related tip removed)
+        tip_label = ttk.Label(main_frame,
+                              text="Tip: Each endpoint returned by Dify generates a separate row in the result",
+                              foreground="gray")
+        tip_label.pack(side=tk.BOTTOM, pady=10)
+    def select_csv_file(self):
+        """Select CSV file"""
+        file_path = filedialog.askopenfilename(
+            title="Select Compound CSV File",
+            filetypes=[("CSV Files", "*.csv"), ("All Files", "*.*")]
+        )
+        if file_path:
+            self.csv_path_var.set(file_path)
+    def select_save_root(self):
+        """Select save path"""
+        folder_path = filedialog.askdirectory(title="Select Result Save Folder")
+        if folder_path:
+            self.save_root_var.set(folder_path)
+    def start_processing(self):
+        """Start batch processing (new thread to avoid UI freezing)"""
+        # Verify required parameters
+        csv_path = self.csv_path_var.get()
+        if not csv_path:
+            messagebox.showwarning("Warning", "Please select a CSV file first!")
+            return
+        api_key = self.api_key_var.get().strip()
+        if not api_key:
+            messagebox.showwarning("Warning", "Please fill in the Dify API Key!")
+            return
+        base_url = self.base_url_var.get().strip()
+        if not base_url:
+            messagebox.showwarning("Warning", "Please fill in the Dify URL!")
+            return
+        # Disable start button to prevent duplicate clicks
+        self.start_btn.config(state=tk.DISABLED)
+        # Clear log
+        self.log_text.config(state=tk.NORMAL)
+        self.log_text.delete(1.0, tk.END)
+        self.log_text.config(state=tk.DISABLED)
+        # New thread for processing (avoid UI freezing)
+        def process_thread():
+            try:
+                batch_process_compounds_gui(
+                    csv_path=csv_path,
+                    save_root=self.save_root_var.get(),
+                    api_key=api_key,
+                    base_url=base_url,
+                    log_text=self.log_text,
+                    progress_var=self.progress_var,
+                    compound_col=self.compound_col_var.get(),
+                    csv_encoding=self.csv_encoding_var.get(),
+                    csv_sep=self.csv_sep_var.get()
+                )
+            finally:
+                # Restore button state
+                self.start_btn.config(state=tk.NORMAL)
+        threading.Thread(target=process_thread, daemon=True).start()
+# ==================== Start Program ====================
+if __name__ == "__main__":
+    # Normal GUI startup (AC50 debug mode removed)
+    root = tk.Tk()
+    app = CompoundBatchToolGUI(root)
+    root.mainloop()