Spaces:

AI4Research
/

scider

Running

App Files Files Community

scider / streamlit-client /forms /data.py

leonardklin

Upload 328 files

978fed5 verified 29 days ago

raw

history blame contribute delete

7.39 kB

	"""Data analysis workflow form and runner."""

	from pathlib import Path

	import streamlit as st
	from loguru import logger
	from utils import cleanup_uploaded_data, save_and_extract_upload

	from scider.core import constant
	from scider.workflows.data_workflow import DataWorkflow
	from scider.workflows.hypo_data_workflow import HypoDataWorkflow


	def run_data(path, q, workspace_path):
	"""Run data workflow. Called from background thread."""
	data_path = Path(path) if Path(path).exists() else path # may be HF repo name
	logger.info(f"Running data analysis on path: {data_path}")

	w = DataWorkflow(
	data_path=Path(path) if Path(path).exists() else Path(str(path)),
	workspace_path=workspace_path,
	recursion_limit=100,
	)
	w.run()
	intermediate_state = getattr(w, "data_agent_intermediate_state", [])
	if w.final_status != "success":
	error_msg = w.error_message or "Data workflow failed"
	return f"Data workflow failed: {error_msg}", intermediate_state
	out = ["## Data Analysis Complete"]
	if w.data_summary:
	out.append(w.data_summary)
	return "\n\n".join(out), intermediate_state


	def run_hypo_data(feature_desc, num_rows, query, workspace_path):
	"""Run hypothetical data workflow. Called from background thread."""
	logger.info(f"Running hypothetical data generation: {feature_desc[:100]}...")

	w = HypoDataWorkflow(
	feature_desc=feature_desc,
	workspace_path=workspace_path,
	num_rows=num_rows,
	user_query=query,
	recursion_limit=100,
	)
	w.run()

	if w.final_status != "success":
	error_msg = w.error_message or "Hypothetical data workflow failed"
	return f"Workflow failed: {error_msg}", []
	out = ["## Hypothetical Data Analysis Complete"]
	if w.data_summary:
	out.append(w.data_summary)
	return "\n\n".join(out), []


	def render_form():
	"""Render the data analysis form. Returns workflow_config dict or None."""
	hf_enabled = constant.HF_DATASET_DOWNLOAD_ENABLED

	source_options = ["Upload local file"]
	if hf_enabled:
	source_options.append("HuggingFace dataset")
	source_options.append("Generate hypothetical data")

	data_source = st.radio(
	"Data Source",
	source_options,
	horizontal=True,
	key="data_source_radio",
	)

	with st.form("data_form", clear_on_submit=True):
	st.markdown("### Data Analysis Workflow")

	if data_source == "Generate hypothetical data":
	feature_desc = st.text_area(
	"Describe the data you want to generate",
	placeholder=(
	"e.g. A dataset about house prices with features: "
	"square footage (1000-5000 sq ft), number of bedrooms (1-6), "
	"age of house (0-100 years), price ($100k-$1M)"
	),
	height=150,
	help="Describe the features, their ranges, and the domain of the dataset.",
	)
	num_rows = st.number_input(
	"Number of rows",
	min_value=10,
	max_value=100000,
	value=1000,
	step=100,
	)
	query = st.text_input(
	"Analysis query",
	placeholder="What would you like to analyze about this data?",
	)
	submitted = st.form_submit_button("Generate & Analyze")
	if submitted:
	if not feature_desc or not feature_desc.strip():
	st.error("Please describe the data you want to generate.")
	return None
	return {
	"type": "data_hypo",
	"feature_desc": feature_desc.strip(),
	"num_rows": num_rows,
	"query": query
	or f"Analyze this synthetic dataset: {feature_desc.strip()[:200]}",
	}

	elif data_source == "HuggingFace dataset":
	hf_repo = st.text_input(
	"HuggingFace Dataset Repo",
	placeholder="e.g. scikit-learn/iris",
	help="Enter a HuggingFace dataset repository name. It will be downloaded automatically.",
	)
	query = st.text_input("Query", placeholder="What would you like to analyze?")
	submitted = st.form_submit_button("Run Data Analysis")
	if submitted:
	if not hf_repo or not hf_repo.strip():
	st.error("Please enter a HuggingFace dataset repository name.")
	return None
	if not query or not query.strip():
	query = "Analyze this dataset — explore its structure, key patterns, and notable findings."
	return {"type": "data", "path": hf_repo.strip(), "query": query}

	else:
	st.caption("Upload a zip dataset or enter a path to existing data")
	uploaded_zip = st.file_uploader(
	"Upload ZIP dataset (optional)",
	type=["zip"],
	help="Upload a zip file containing your dataset. Extracted temporarily, deleted on reset.",
	)
	if st.session_state.get("uploaded_data_path"):
	st.info(f"Using uploaded data: `{st.session_state.uploaded_data_path}`")
	query = st.text_input("Query", placeholder="What would you like to analyze?")
	submitted = st.form_submit_button("Run Data Analysis")
	if submitted:
	if not query or not query.strip():
	query = "Analyze this dataset — explore its structure, key patterns, and notable findings."
	path_to_use = None
	if uploaded_zip:
	cleanup_uploaded_data()
	extracted = save_and_extract_upload(uploaded_zip)
	if extracted and extracted.exists():
	extracted = extracted.resolve()
	st.session_state.uploaded_data_path = str(extracted)
	st.session_state.workspace_path = extracted.parent
	path_to_use = str(extracted)
	st.success(f"File uploaded and extracted to: {path_to_use}")
	else:
	st.error(
	f"Failed to process uploaded zip file. Extracted path: {extracted}"
	)
	elif st.session_state.get("uploaded_data_path"):
	path = Path(st.session_state.uploaded_data_path).resolve()
	if path.exists():
	path_to_use = str(path)
	st.session_state.workspace_path = path.parent
	else:
	st.warning(f"Previously uploaded path no longer exists: {path}")
	cleanup_uploaded_data()
	if path_to_use:
	verify_path = Path(path_to_use).resolve()
	if not verify_path.exists():
	st.error(f"Path does not exist: {path_to_use}")
	else:
	return {"type": "data", "path": str(verify_path), "query": query}
	else:
	st.error("Please upload a zip file or enter a data path.")
	return None