Spaces:

Martico2432
/

DatasetCreator

Sleeping

App Files Files Community

DatasetCreator / src /streamlit_app.py

Martico2432

Update src/streamlit_app.py

c170dd7 verified about 1 month ago

raw

history blame contribute delete

3.59 kB

	import streamlit as st
	import json
	import io

	def main():
	st.set_page_config(page_title="Private LLM Dataset Builder", layout="wide")
	st.title("🧠 Private Dataset Creator")
	st.info("Everything is stored in your browser RAM. Refreshing the page will clear your progress.")

	# 1. Initialize session states
	if "full_dataset" not in st.session_state:
	st.session_state.full_dataset = [] # This holds all completed conversations

	if "current_conversation" not in st.session_state:
	st.session_state.current_conversation = [] # This holds the active draft

	# Sidebar: Stats & Download
	with st.sidebar:
	st.header("Your Session Dataset")
	count = len(st.session_state.full_dataset)
	st.metric("Conversations Saved", count)

	if count > 0:
	# Create the JSONL content in memory
	jsonl_str = ""
	for conv in st.session_state.full_dataset:
	jsonl_str += json.dumps({"messages": conv}, ensure_ascii=False) + "\n"

	# Download button using an in-memory buffer
	st.download_button(
	label="📥 Download My Dataset (.jsonl)",
	data=jsonl_str,
	file_name="my_private_dataset.jsonl",
	mime="application/jsonl",
	type="primary"
	)

	if st.button("🗑️ Wipe All Data"):
	st.session_state.full_dataset = []
	st.session_state.current_conversation = []
	st.rerun()

	# Layout: Input (Left) and Preview (Right)
	col1, col2 = st.columns([1, 1.2])

	with col1:
	st.subheader("Add Message")
	role_map = {
	"User": "user",
	"Assistant": "assistant",
	"Tool Response": "tool"
	}
	selected_label = st.selectbox("Role", list(role_map.keys()))
	actual_role = role_map[selected_label]

	# Use a key for the text area to allow manual clearing if needed
	content = st.text_area(
	"Content",
	placeholder="Text, <think> tags, or code blocks...",
	height=300,
	key="input_text"
	)

	if st.button("Add Message to Draft"):
	if content.strip():
	st.session_state.current_conversation.append({
	"role": actual_role,
	"content": content
	})
	st.rerun()

	with col2:
	st.subheader("Current Draft Preview")
	if not st.session_state.current_conversation:
	st.write("Draft is empty.")

	for idx, msg in enumerate(st.session_state.current_conversation):
	with st.chat_message(msg["role"]):
	st.markdown(f"{msg['role'].upper()}")
	st.code(msg["content"], language="markdown")
	if st.button(f"Delete msg {idx}", key=f"del_{idx}"):
	st.session_state.current_conversation.pop(idx)
	st.rerun()

	if len(st.session_state.current_conversation) > 0:
	st.divider()
	if st.button("✅ SAVE CONVERSATION TO SESSION", use_container_width=True):
	# Move current draft to the full dataset list
	st.session_state.full_dataset.append(list(st.session_state.current_conversation))
	# Clear draft
	st.session_state.current_conversation = []
	st.toast("Saved to session memory!")
	st.rerun()

	if __name__ == "__main__":
	main()