import streamlit as st import json import io def main(): st.set_page_config(page_title="Private LLM Dataset Builder", layout="wide") st.title("🧠 Private Dataset Creator") st.info("Everything is stored in your browser RAM. Refreshing the page will clear your progress.") # 1. Initialize session states if "full_dataset" not in st.session_state: st.session_state.full_dataset = [] # This holds all completed conversations if "current_conversation" not in st.session_state: st.session_state.current_conversation = [] # This holds the active draft # Sidebar: Stats & Download with st.sidebar: st.header("Your Session Dataset") count = len(st.session_state.full_dataset) st.metric("Conversations Saved", count) if count > 0: # Create the JSONL content in memory jsonl_str = "" for conv in st.session_state.full_dataset: jsonl_str += json.dumps({"messages": conv}, ensure_ascii=False) + "\n" # Download button using an in-memory buffer st.download_button( label="📥 Download My Dataset (.jsonl)", data=jsonl_str, file_name="my_private_dataset.jsonl", mime="application/jsonl", type="primary" ) if st.button("🗑️ Wipe All Data"): st.session_state.full_dataset = [] st.session_state.current_conversation = [] st.rerun() # Layout: Input (Left) and Preview (Right) col1, col2 = st.columns([1, 1.2]) with col1: st.subheader("Add Message") role_map = { "User": "user", "Assistant": "assistant", "Tool Response": "tool" } selected_label = st.selectbox("Role", list(role_map.keys())) actual_role = role_map[selected_label] # Use a key for the text area to allow manual clearing if needed content = st.text_area( "Content", placeholder="Text, tags, or code blocks...", height=300, key="input_text" ) if st.button("Add Message to Draft"): if content.strip(): st.session_state.current_conversation.append({ "role": actual_role, "content": content }) st.rerun() with col2: st.subheader("Current Draft Preview") if not st.session_state.current_conversation: st.write("Draft is empty.") for idx, msg in enumerate(st.session_state.current_conversation): with st.chat_message(msg["role"]): st.markdown(f"**{msg['role'].upper()}**") st.code(msg["content"], language="markdown") if st.button(f"Delete msg {idx}", key=f"del_{idx}"): st.session_state.current_conversation.pop(idx) st.rerun() if len(st.session_state.current_conversation) > 0: st.divider() if st.button("✅ SAVE CONVERSATION TO SESSION", use_container_width=True): # Move current draft to the full dataset list st.session_state.full_dataset.append(list(st.session_state.current_conversation)) # Clear draft st.session_state.current_conversation = [] st.toast("Saved to session memory!") st.rerun() if __name__ == "__main__": main()