Spaces:

Martico2432
/

DatasetCreator

Sleeping

File size: 3,591 Bytes

c03482d
1d0662e
c170dd7
1d0662e
 
c170dd7
 
 
1d0662e
c170dd7
 
 
 
 
 
1d0662e
c170dd7
1d0662e
c170dd7
 
 
1d0662e
c170dd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d0662e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c170dd7
1d0662e
 
c170dd7
 
 
1d0662e
 
c170dd7
1d0662e
c170dd7
 
 
 
1d0662e
 
 
c170dd7
 
 
1d0662e
c170dd7
1d0662e
c170dd7
1d0662e
 
c170dd7
1d0662e
 
c170dd7
1d0662e
c170dd7
 
 
 
 
 
1d0662e

import streamlit as st
import json
import io

def main():
    st.set_page_config(page_title="Private LLM Dataset Builder", layout="wide")
    st.title("🧠 Private Dataset Creator")
    st.info("Everything is stored in your browser RAM. Refreshing the page will clear your progress.")

    # 1. Initialize session states
    if "full_dataset" not in st.session_state:
        st.session_state.full_dataset = []  # This holds all completed conversations
    
    if "current_conversation" not in st.session_state:
        st.session_state.current_conversation = [] # This holds the active draft

    # Sidebar: Stats & Download
    with st.sidebar:
        st.header("Your Session Dataset")
        count = len(st.session_state.full_dataset)
        st.metric("Conversations Saved", count)

        if count > 0:
            # Create the JSONL content in memory
            jsonl_str = ""
            for conv in st.session_state.full_dataset:
                jsonl_str += json.dumps({"messages": conv}, ensure_ascii=False) + "\n"
            
            # Download button using an in-memory buffer
            st.download_button(
                label="📥 Download My Dataset (.jsonl)",
                data=jsonl_str,
                file_name="my_private_dataset.jsonl",
                mime="application/jsonl",
                type="primary"
            )
        
        if st.button("🗑️ Wipe All Data"):
            st.session_state.full_dataset = []
            st.session_state.current_conversation = []
            st.rerun()

    # Layout: Input (Left) and Preview (Right)
    col1, col2 = st.columns([1, 1.2])
    
    with col1:
        st.subheader("Add Message")
        role_map = {
            "User": "user",
            "Assistant": "assistant",
            "Tool Response": "tool"
        }
        selected_label = st.selectbox("Role", list(role_map.keys()))
        actual_role = role_map[selected_label]
        
        # Use a key for the text area to allow manual clearing if needed
        content = st.text_area(
            "Content", 
            placeholder="Text, <think> tags, or code blocks...",
            height=300,
            key="input_text"
        )
        
        if st.button("Add Message to Draft"):
            if content.strip():
                st.session_state.current_conversation.append({
                    "role": actual_role, 
                    "content": content
                })
                st.rerun()

    with col2:
        st.subheader("Current Draft Preview")
        if not st.session_state.current_conversation:
            st.write("Draft is empty.")
        
        for idx, msg in enumerate(st.session_state.current_conversation):
            with st.chat_message(msg["role"]):
                st.markdown(f"**{msg['role'].upper()}**")
                st.code(msg["content"], language="markdown")
                if st.button(f"Delete msg {idx}", key=f"del_{idx}"):
                    st.session_state.current_conversation.pop(idx)
                    st.rerun()

        if len(st.session_state.current_conversation) > 0:
            st.divider()
            if st.button("✅ SAVE CONVERSATION TO SESSION", use_container_width=True):
                # Move current draft to the full dataset list
                st.session_state.full_dataset.append(list(st.session_state.current_conversation))
                # Clear draft
                st.session_state.current_conversation = []
                st.toast("Saved to session memory!")
                st.rerun()

if __name__ == "__main__":
    main()