DatasetCreator / src /streamlit_app.py
Martico2432's picture
Update src/streamlit_app.py
c170dd7 verified
import streamlit as st
import json
import io
def main():
st.set_page_config(page_title="Private LLM Dataset Builder", layout="wide")
st.title("🧠 Private Dataset Creator")
st.info("Everything is stored in your browser RAM. Refreshing the page will clear your progress.")
# 1. Initialize session states
if "full_dataset" not in st.session_state:
st.session_state.full_dataset = [] # This holds all completed conversations
if "current_conversation" not in st.session_state:
st.session_state.current_conversation = [] # This holds the active draft
# Sidebar: Stats & Download
with st.sidebar:
st.header("Your Session Dataset")
count = len(st.session_state.full_dataset)
st.metric("Conversations Saved", count)
if count > 0:
# Create the JSONL content in memory
jsonl_str = ""
for conv in st.session_state.full_dataset:
jsonl_str += json.dumps({"messages": conv}, ensure_ascii=False) + "\n"
# Download button using an in-memory buffer
st.download_button(
label="📥 Download My Dataset (.jsonl)",
data=jsonl_str,
file_name="my_private_dataset.jsonl",
mime="application/jsonl",
type="primary"
)
if st.button("🗑️ Wipe All Data"):
st.session_state.full_dataset = []
st.session_state.current_conversation = []
st.rerun()
# Layout: Input (Left) and Preview (Right)
col1, col2 = st.columns([1, 1.2])
with col1:
st.subheader("Add Message")
role_map = {
"User": "user",
"Assistant": "assistant",
"Tool Response": "tool"
}
selected_label = st.selectbox("Role", list(role_map.keys()))
actual_role = role_map[selected_label]
# Use a key for the text area to allow manual clearing if needed
content = st.text_area(
"Content",
placeholder="Text, <think> tags, or code blocks...",
height=300,
key="input_text"
)
if st.button("Add Message to Draft"):
if content.strip():
st.session_state.current_conversation.append({
"role": actual_role,
"content": content
})
st.rerun()
with col2:
st.subheader("Current Draft Preview")
if not st.session_state.current_conversation:
st.write("Draft is empty.")
for idx, msg in enumerate(st.session_state.current_conversation):
with st.chat_message(msg["role"]):
st.markdown(f"**{msg['role'].upper()}**")
st.code(msg["content"], language="markdown")
if st.button(f"Delete msg {idx}", key=f"del_{idx}"):
st.session_state.current_conversation.pop(idx)
st.rerun()
if len(st.session_state.current_conversation) > 0:
st.divider()
if st.button("✅ SAVE CONVERSATION TO SESSION", use_container_width=True):
# Move current draft to the full dataset list
st.session_state.full_dataset.append(list(st.session_state.current_conversation))
# Clear draft
st.session_state.current_conversation = []
st.toast("Saved to session memory!")
st.rerun()
if __name__ == "__main__":
main()