Spaces:
Sleeping
Sleeping
File size: 3,591 Bytes
c03482d 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e c170dd7 1d0662e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import streamlit as st
import json
import io
def main():
st.set_page_config(page_title="Private LLM Dataset Builder", layout="wide")
st.title("🧠 Private Dataset Creator")
st.info("Everything is stored in your browser RAM. Refreshing the page will clear your progress.")
# 1. Initialize session states
if "full_dataset" not in st.session_state:
st.session_state.full_dataset = [] # This holds all completed conversations
if "current_conversation" not in st.session_state:
st.session_state.current_conversation = [] # This holds the active draft
# Sidebar: Stats & Download
with st.sidebar:
st.header("Your Session Dataset")
count = len(st.session_state.full_dataset)
st.metric("Conversations Saved", count)
if count > 0:
# Create the JSONL content in memory
jsonl_str = ""
for conv in st.session_state.full_dataset:
jsonl_str += json.dumps({"messages": conv}, ensure_ascii=False) + "\n"
# Download button using an in-memory buffer
st.download_button(
label="📥 Download My Dataset (.jsonl)",
data=jsonl_str,
file_name="my_private_dataset.jsonl",
mime="application/jsonl",
type="primary"
)
if st.button("🗑️ Wipe All Data"):
st.session_state.full_dataset = []
st.session_state.current_conversation = []
st.rerun()
# Layout: Input (Left) and Preview (Right)
col1, col2 = st.columns([1, 1.2])
with col1:
st.subheader("Add Message")
role_map = {
"User": "user",
"Assistant": "assistant",
"Tool Response": "tool"
}
selected_label = st.selectbox("Role", list(role_map.keys()))
actual_role = role_map[selected_label]
# Use a key for the text area to allow manual clearing if needed
content = st.text_area(
"Content",
placeholder="Text, <think> tags, or code blocks...",
height=300,
key="input_text"
)
if st.button("Add Message to Draft"):
if content.strip():
st.session_state.current_conversation.append({
"role": actual_role,
"content": content
})
st.rerun()
with col2:
st.subheader("Current Draft Preview")
if not st.session_state.current_conversation:
st.write("Draft is empty.")
for idx, msg in enumerate(st.session_state.current_conversation):
with st.chat_message(msg["role"]):
st.markdown(f"**{msg['role'].upper()}**")
st.code(msg["content"], language="markdown")
if st.button(f"Delete msg {idx}", key=f"del_{idx}"):
st.session_state.current_conversation.pop(idx)
st.rerun()
if len(st.session_state.current_conversation) > 0:
st.divider()
if st.button("✅ SAVE CONVERSATION TO SESSION", use_container_width=True):
# Move current draft to the full dataset list
st.session_state.full_dataset.append(list(st.session_state.current_conversation))
# Clear draft
st.session_state.current_conversation = []
st.toast("Saved to session memory!")
st.rerun()
if __name__ == "__main__":
main() |