File size: 6,243 Bytes
978fed5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""Full workflow form and runner."""

from pathlib import Path

import streamlit as st
from loguru import logger
from utils import _rm_upload_root, save_and_extract_upload

from scider.core import constant
from scider.workflows.full_workflow_with_ideation import FullWorkflowWithIdeation


def run_full(cfg, workspace_path):
    """Run full workflow. Called from background thread."""
    data_path = None
    if cfg.get("data_path"):
        dp = cfg["data_path"]
        data_path = Path(dp) if Path(dp).exists() else Path(str(dp))

    if data_path:
        logger.info(f"Running full workflow with data path: {data_path}")

    run_ideation = cfg.get("run_ideation", True)
    run_paper_writing = cfg.get("run_paper_writing", False)

    w = FullWorkflowWithIdeation(
        user_query=cfg["query"],
        workspace_path=workspace_path,
        data_path=data_path,
        run_data_workflow=cfg["run_data"],
        run_experiment_workflow=cfg["run_exp"],
        max_revisions=5,
        skip_ideation=not run_ideation,
        run_paper_writing=run_paper_writing,
    )
    w.run()
    return w.final_summary or "Workflow finished", []


def render_form():
    """Render the full workflow form. Returns workflow_config dict or None."""
    hf_enabled = constant.HF_DATASET_DOWNLOAD_ENABLED

    if hf_enabled:
        data_source = st.radio(
            "Data Source",
            ["Upload local file", "HuggingFace dataset"],
            horizontal=True,
            key="full_data_source_radio",
        )
    else:
        data_source = "Upload local file"

    with st.form("full_form", clear_on_submit=True):
        st.markdown("### Full Workflow")
        topic = st.text_input("Research Topic", placeholder="Enter your research topic...")

        if data_source == "HuggingFace dataset":
            hf_repo = st.text_input(
                "HuggingFace Dataset Repo",
                placeholder="e.g. scikit-learn/iris",
                help="Enter a HuggingFace dataset repository name.",
            )
        else:
            hf_repo = None
            st.caption("Data (for Data Analysis): upload zip or enter path")
            uploaded_full_zip = st.file_uploader(
                "Upload ZIP dataset (optional)",
                type=["zip"],
                key="full_upload",
                help="Zip dataset for Data Analysis. Extracted temporarily, deleted on reset.",
            )
            if st.session_state.get("uploaded_full_data_path"):
                st.info(f"Using: `{st.session_state.uploaded_full_data_path}`")

        run_ideation = st.checkbox("Run Ideation (literature search & idea generation)", value=True)
        run_data = st.checkbox("Run Data Analysis", value=True)
        run_exp = st.checkbox("Run Experiment", value=True)
        run_paper_writing = st.checkbox(
            "Run Paper Writing (LaTeX + PDF via PaperOrchestra)",
            value=False,
            help=(
                "After data + experiment finish, bootstrap a paper workspace under "
                "`<workspace>/paper/` and run the writing agent through the six "
                "PaperOrchestra steps to produce `final/paper.pdf`. Requires "
                "`latexmk` on the host."
            ),
        )
        submitted = st.form_submit_button(
            "Run Full Workflow",
        )

        if submitted and topic:
            data_path_to_use = None

            if run_data:
                # HuggingFace mode
                if data_source == "HuggingFace dataset":
                    if not hf_repo or not hf_repo.strip():
                        st.error("Please enter a HuggingFace dataset repository name.")
                        return None
                    data_path_to_use = hf_repo.strip()
                else:
                    # Local file mode
                    if uploaded_full_zip:
                        prev = st.session_state.get("uploaded_full_data_path")
                        if prev:
                            _rm_upload_root(Path(prev))
                            if "uploaded_full_data_path" in st.session_state:
                                del st.session_state.uploaded_full_data_path
                        extracted = save_and_extract_upload(uploaded_full_zip)
                        if extracted and extracted.exists():
                            extracted = extracted.resolve()
                            st.session_state.uploaded_full_data_path = str(extracted)
                            st.session_state.workspace_path = extracted.parent
                            data_path_to_use = str(extracted)
                            st.success(f"File uploaded and extracted to: {data_path_to_use}")
                        else:
                            st.error(
                                f"Failed to process uploaded zip file. Extracted path: {extracted}"
                            )
                            data_path_to_use = None
                    elif st.session_state.get("uploaded_full_data_path"):
                        p = Path(st.session_state.uploaded_full_data_path).resolve()
                        if p.exists():
                            data_path_to_use = str(p)
                            st.session_state.workspace_path = p.parent
                        else:
                            st.warning(f"Previously uploaded path no longer exists: {p}")
                            if "uploaded_full_data_path" in st.session_state:
                                del st.session_state.uploaded_full_data_path

                    if not data_path_to_use:
                        st.error(
                            "Run Data Analysis requires uploading a zip or entering a data path."
                        )
                        data_path_to_use = None

            if data_path_to_use is not None or not run_data:
                return {
                    "type": "full",
                    "query": topic,
                    "data_path": data_path_to_use,
                    "run_ideation": run_ideation,
                    "run_data": run_data,
                    "run_exp": run_exp,
                    "run_paper_writing": run_paper_writing,
                }
    return None