Spaces:

MusoraProductDepartment
/

CustomTopics

Sleeping

File size: 10,998 Bytes

b7d7186

import streamlit as st
import os
import json
import clickhouse_connect

# -------------------------------
# Config
# -------------------------------
CH_PASS = os.getenv("CH_PASS")
CH_DATABASE = os.getenv("CH_DATABASE", "topic_tables")
APP_PASSWORD = os.getenv("APP_PASSWORD")

# Check if credentials are available
if not CH_PASS:
    st.error("CH_PASS environment variable not found. Please set it before running the app.")
    st.stop()

if not APP_PASSWORD:
    st.error("APP_PASSWORD environment variable not found. Please set it before running the app.")
    st.stop()

# -------------------------------
# ClickHouse connection
# -------------------------------
def get_clickhouse_client():
    """Create a new ClickHouse client."""
    return clickhouse_connect.get_client(
        host='td6vvza14q.us-east-2.aws.clickhouse.cloud',
        user='internal_tool_builder',
        password=CH_PASS,
        secure=True,
        connect_timeout=30,
        send_receive_timeout=60
    )

# Brand and content type options
BRANDS = ["drumeo", "pianote", "guitareo", "singeo"]
CONTENT_TYPES = ["song", "lesson"]

# -------------------------------
# Streamlit App
# -------------------------------
st.set_page_config(page_title="Custom Topics Manager", layout="wide")

# -------------------------------
# Password Protection
# -------------------------------
# Initialize session state for authentication
if "authenticated" not in st.session_state:
    st.session_state.authenticated = False

# Show login form if not authenticated
if not st.session_state.authenticated:
    st.title("Custom Topics Manager")
    st.subheader("Please enter the password to continue")

    # Create a form for password input
    with st.form("login_form"):
        password_input = st.text_input("Password", type="password")
        submit_button = st.form_submit_button("Login")

        if submit_button:
            if password_input == APP_PASSWORD:
                st.session_state.authenticated = True
                st.success("Authentication successful!")
                st.rerun()
            else:
                st.error("Incorrect password. Please try again.")

    st.stop()  # Stop execution here if not authenticated

# If we reach here, user is authenticated
st.title("Custom Topics Manager")

# Sidebar for brand and content type selection
st.sidebar.header("Settings")
brand = st.sidebar.selectbox("Select Brand", BRANDS, index=0)
content_type = st.sidebar.selectbox("Select Content Type", CONTENT_TYPES, index=0)

# Logout button
st.sidebar.divider()
if st.sidebar.button("🔓 Logout"):
    st.session_state.authenticated = False
    st.rerun()

# Debug info
st.sidebar.divider()
st.sidebar.caption(f"ClickHouse password: {'✓' if CH_PASS else '✗'}")
st.sidebar.caption(f"Database: {CH_DATABASE}")

# -------------------------------
# Load data from ClickHouse
# -------------------------------
# Table naming pattern: TOPIC_GROUPS_{BRAND}_{CONTENT_TYPE}S (e.g., TOPIC_GROUPS_DRUMEO_SONGS)
content_type_suffix = f"{content_type}s"
table_name = f"{CH_DATABASE}.TOPIC_GROUPS_{brand.upper()}_{content_type_suffix.upper()}"

try:
    with st.spinner(f"Loading topics for {brand} ({content_type}s)..."):
        # Get ClickHouse client
        ch_client = get_clickhouse_client()

        # Query all rows from the table including feedback columns
        query = f"""
            SELECT group_id, brand, content_type, title, item_ids, created_at,
                   accepted, suggested_title, reviewer_comments
            FROM {table_name}
            ORDER BY created_at DESC
        """
        result = ch_client.query(query)

        # Convert to list of dictionaries for easier processing
        topics = []
        for row in result.result_rows:
            topics.append({
                "group_id": row[0],
                "brand": row[1],
                "content_type": row[2],
                "title": row[3],
                "item_ids": row[4],  # Already a JSON string
                "created_at": str(row[5]),
                "accepted": row[6],
                "suggested_title": row[7],
                "reviewer_comments": row[8]
            })

    st.success(f"Loaded {len(topics)} topics from {table_name}")

    # -------------------------------
    # Fetch content titles from DIM_CONTENT
    # -------------------------------
    with st.spinner("Loading content titles..."):
        # Collect all unique content IDs from all topics
        all_content_ids = set()
        for topic in topics:
            try:
                item_ids = json.loads(topic['item_ids'])
                all_content_ids.update(item_ids)
            except:
                pass

        # Query DIM_CONTENT for titles
        content_titles = {}
        if all_content_ids:
            # Convert to list and create IN clause
            ids_list = list(all_content_ids)
            ids_str = ",".join([f"'{id}'" for id in ids_list])

            dim_content_query = f"""
                SELECT CONTENT_ID, CONTENT_TITLE
                FROM snowflake_synced_tables.DIM_CONTENT
                WHERE CONTENT_ID IN ({ids_str})
            """

            try:
                content_result = ch_client.query(dim_content_query)
                for row in content_result.result_rows:
                    content_titles[str(row[0])] = row[1]

                st.success(f"Loaded titles for {len(content_titles)} content items")
            except Exception as e:
                st.warning(f"Could not load content titles: {e}")
                st.info("Continuing without titles...")

    # -------------------------------
    # Display topics
    # -------------------------------
    st.header(f"Topics for {brand.capitalize()} - {content_type.capitalize()}s")

    # Add search/filter
    search_query = st.text_input("Search topics by title", "")

    # Filter topics based on search
    filtered_topics = []
    for topic in topics:
        if search_query.lower() in topic["title"].lower():
            filtered_topics.append(topic)

    if not filtered_topics:
        st.warning("No topics match your search query.")
    else:
        st.write(f"Showing {len(filtered_topics)} of {len(topics)} topics")

        # Display each topic
        for topic in filtered_topics:

            with st.expander(f"**{topic['title']}** ({topic['group_id']})", expanded=False):
                col1, col2 = st.columns([1, 3])

                with col1:
                    st.write("**Metadata:**")
                    st.write(f"- Brand: `{topic['brand']}`")
                    st.write(f"- Content Type: `{topic['content_type']}`")
                    st.write(f"- Created: `{topic['created_at']}`")

                    # Parse item_ids from JSON string
                    try:
                        item_ids = json.loads(topic['item_ids'])
                        st.write(f"- **Items:** {len(item_ids)}")
                    except:
                        item_ids = []
                        st.error("Error parsing item IDs")

                with col2:
                    st.write("**Content Items:**")
                    if item_ids:
                        # Display as a numbered list with ID and title
                        for i, item_id in enumerate(item_ids, 1):
                            title = content_titles.get(str(item_id), "Title not found")
                            st.write(f"{i}. **{title}**")
                            st.write(f"   `ID: {item_id}`")
                    else:
                        st.write("No items found")

                # Feedback section
                st.divider()
                st.write("**Reviewer Feedback:**")

                feedback_col1, feedback_col2 = st.columns([1, 2])

                with feedback_col1:
                    # Accept/Reject checkbox
                    accepted = st.checkbox(
                        "✓ Accept this topic",
                        value=topic['accepted'] if topic['accepted'] is not None else False,
                        key=f"accept_{topic['group_id']}"
                    )

                with feedback_col2:
                    # Suggested alternate title
                    suggested_title = st.text_input(
                        "Suggested alternate title (optional)",
                        value=topic['suggested_title'] if topic['suggested_title'] else "",
                        key=f"title_{topic['group_id']}"
                    )

                # Reviewer comments
                reviewer_comments = st.text_area(
                    "Comments (optional)",
                    value=topic['reviewer_comments'] if topic['reviewer_comments'] else "",
                    height=100,
                    key=f"comments_{topic['group_id']}"
                )

                # Save button
                if st.button("💾 Save Feedback", key=f"save_{topic['group_id']}"):
                    try:
                        # Create a fresh client for the update operation
                        update_client = get_clickhouse_client()

                        # Update the database
                        update_query = f"""
                            ALTER TABLE {table_name}
                            UPDATE
                                accepted = %(accepted)s,
                                suggested_title = %(suggested_title)s,
                                reviewer_comments = %(reviewer_comments)s
                            WHERE group_id = %(group_id)s
                        """

                        update_client.command(update_query, parameters={
                            "accepted": accepted,
                            "suggested_title": suggested_title if suggested_title else None,
                            "reviewer_comments": reviewer_comments if reviewer_comments else None,
                            "group_id": topic['group_id']
                        })

                        st.success("✓ Feedback saved!")
                        st.rerun()  # Refresh to show updated data
                    except Exception as e:
                        st.error(f"Error saving feedback: {e}")

except Exception as e:
    st.error(f"Error loading data from ClickHouse: {e}")
    st.info(f"Troubleshooting steps:")
    st.markdown(f"""
    1. **Verify the table exists**: Check that `{table_name}` exists in ClickHouse
    2. **Check authentication**: Make sure your CH_PASS environment variable is set correctly
    3. **Verify database**: Confirm the database `{CH_DATABASE}` exists
    4. **Test connection**: Try connecting to ClickHouse directly to verify credentials

    If the table doesn't exist or is empty, you can:
    - Run `python transfer.py` to copy data from HuggingFace to ClickHouse
    - Or run `python gemini_topics_clickhouse.py` to generate new topics directly to ClickHouse

    (Make sure to set `brand = "{brand}"` and `content_type = "{content_type}"` in the script first)
    """)