File size: 10,998 Bytes
b7d7186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import streamlit as st
import os
import json
import clickhouse_connect

# -------------------------------
# Config
# -------------------------------
CH_PASS = os.getenv("CH_PASS")
CH_DATABASE = os.getenv("CH_DATABASE", "topic_tables")
APP_PASSWORD = os.getenv("APP_PASSWORD")

# Check if credentials are available
if not CH_PASS:
    st.error("CH_PASS environment variable not found. Please set it before running the app.")
    st.stop()

if not APP_PASSWORD:
    st.error("APP_PASSWORD environment variable not found. Please set it before running the app.")
    st.stop()

# -------------------------------
# ClickHouse connection
# -------------------------------
def get_clickhouse_client():
    """Create a new ClickHouse client."""
    return clickhouse_connect.get_client(
        host='td6vvza14q.us-east-2.aws.clickhouse.cloud',
        user='internal_tool_builder',
        password=CH_PASS,
        secure=True,
        connect_timeout=30,
        send_receive_timeout=60
    )

# Brand and content type options
BRANDS = ["drumeo", "pianote", "guitareo", "singeo"]
CONTENT_TYPES = ["song", "lesson"]

# -------------------------------
# Streamlit App
# -------------------------------
st.set_page_config(page_title="Custom Topics Manager", layout="wide")

# -------------------------------
# Password Protection
# -------------------------------
# Initialize session state for authentication
if "authenticated" not in st.session_state:
    st.session_state.authenticated = False

# Show login form if not authenticated
if not st.session_state.authenticated:
    st.title("Custom Topics Manager")
    st.subheader("Please enter the password to continue")

    # Create a form for password input
    with st.form("login_form"):
        password_input = st.text_input("Password", type="password")
        submit_button = st.form_submit_button("Login")

        if submit_button:
            if password_input == APP_PASSWORD:
                st.session_state.authenticated = True
                st.success("Authentication successful!")
                st.rerun()
            else:
                st.error("Incorrect password. Please try again.")

    st.stop()  # Stop execution here if not authenticated

# If we reach here, user is authenticated
st.title("Custom Topics Manager")

# Sidebar for brand and content type selection
st.sidebar.header("Settings")
brand = st.sidebar.selectbox("Select Brand", BRANDS, index=0)
content_type = st.sidebar.selectbox("Select Content Type", CONTENT_TYPES, index=0)

# Logout button
st.sidebar.divider()
if st.sidebar.button("πŸ”“ Logout"):
    st.session_state.authenticated = False
    st.rerun()

# Debug info
st.sidebar.divider()
st.sidebar.caption(f"ClickHouse password: {'βœ“' if CH_PASS else 'βœ—'}")
st.sidebar.caption(f"Database: {CH_DATABASE}")

# -------------------------------
# Load data from ClickHouse
# -------------------------------
# Table naming pattern: TOPIC_GROUPS_{BRAND}_{CONTENT_TYPE}S (e.g., TOPIC_GROUPS_DRUMEO_SONGS)
content_type_suffix = f"{content_type}s"
table_name = f"{CH_DATABASE}.TOPIC_GROUPS_{brand.upper()}_{content_type_suffix.upper()}"

try:
    with st.spinner(f"Loading topics for {brand} ({content_type}s)..."):
        # Get ClickHouse client
        ch_client = get_clickhouse_client()

        # Query all rows from the table including feedback columns
        query = f"""
            SELECT group_id, brand, content_type, title, item_ids, created_at,
                   accepted, suggested_title, reviewer_comments
            FROM {table_name}
            ORDER BY created_at DESC
        """
        result = ch_client.query(query)

        # Convert to list of dictionaries for easier processing
        topics = []
        for row in result.result_rows:
            topics.append({
                "group_id": row[0],
                "brand": row[1],
                "content_type": row[2],
                "title": row[3],
                "item_ids": row[4],  # Already a JSON string
                "created_at": str(row[5]),
                "accepted": row[6],
                "suggested_title": row[7],
                "reviewer_comments": row[8]
            })

    st.success(f"Loaded {len(topics)} topics from {table_name}")

    # -------------------------------
    # Fetch content titles from DIM_CONTENT
    # -------------------------------
    with st.spinner("Loading content titles..."):
        # Collect all unique content IDs from all topics
        all_content_ids = set()
        for topic in topics:
            try:
                item_ids = json.loads(topic['item_ids'])
                all_content_ids.update(item_ids)
            except:
                pass

        # Query DIM_CONTENT for titles
        content_titles = {}
        if all_content_ids:
            # Convert to list and create IN clause
            ids_list = list(all_content_ids)
            ids_str = ",".join([f"'{id}'" for id in ids_list])

            dim_content_query = f"""
                SELECT CONTENT_ID, CONTENT_TITLE
                FROM snowflake_synced_tables.DIM_CONTENT
                WHERE CONTENT_ID IN ({ids_str})
            """

            try:
                content_result = ch_client.query(dim_content_query)
                for row in content_result.result_rows:
                    content_titles[str(row[0])] = row[1]

                st.success(f"Loaded titles for {len(content_titles)} content items")
            except Exception as e:
                st.warning(f"Could not load content titles: {e}")
                st.info("Continuing without titles...")

    # -------------------------------
    # Display topics
    # -------------------------------
    st.header(f"Topics for {brand.capitalize()} - {content_type.capitalize()}s")

    # Add search/filter
    search_query = st.text_input("Search topics by title", "")

    # Filter topics based on search
    filtered_topics = []
    for topic in topics:
        if search_query.lower() in topic["title"].lower():
            filtered_topics.append(topic)

    if not filtered_topics:
        st.warning("No topics match your search query.")
    else:
        st.write(f"Showing {len(filtered_topics)} of {len(topics)} topics")

        # Display each topic
        for topic in filtered_topics:

            with st.expander(f"**{topic['title']}** ({topic['group_id']})", expanded=False):
                col1, col2 = st.columns([1, 3])

                with col1:
                    st.write("**Metadata:**")
                    st.write(f"- Brand: `{topic['brand']}`")
                    st.write(f"- Content Type: `{topic['content_type']}`")
                    st.write(f"- Created: `{topic['created_at']}`")

                    # Parse item_ids from JSON string
                    try:
                        item_ids = json.loads(topic['item_ids'])
                        st.write(f"- **Items:** {len(item_ids)}")
                    except:
                        item_ids = []
                        st.error("Error parsing item IDs")

                with col2:
                    st.write("**Content Items:**")
                    if item_ids:
                        # Display as a numbered list with ID and title
                        for i, item_id in enumerate(item_ids, 1):
                            title = content_titles.get(str(item_id), "Title not found")
                            st.write(f"{i}. **{title}**")
                            st.write(f"   `ID: {item_id}`")
                    else:
                        st.write("No items found")

                # Feedback section
                st.divider()
                st.write("**Reviewer Feedback:**")

                feedback_col1, feedback_col2 = st.columns([1, 2])

                with feedback_col1:
                    # Accept/Reject checkbox
                    accepted = st.checkbox(
                        "βœ“ Accept this topic",
                        value=topic['accepted'] if topic['accepted'] is not None else False,
                        key=f"accept_{topic['group_id']}"
                    )

                with feedback_col2:
                    # Suggested alternate title
                    suggested_title = st.text_input(
                        "Suggested alternate title (optional)",
                        value=topic['suggested_title'] if topic['suggested_title'] else "",
                        key=f"title_{topic['group_id']}"
                    )

                # Reviewer comments
                reviewer_comments = st.text_area(
                    "Comments (optional)",
                    value=topic['reviewer_comments'] if topic['reviewer_comments'] else "",
                    height=100,
                    key=f"comments_{topic['group_id']}"
                )

                # Save button
                if st.button("πŸ’Ύ Save Feedback", key=f"save_{topic['group_id']}"):
                    try:
                        # Create a fresh client for the update operation
                        update_client = get_clickhouse_client()

                        # Update the database
                        update_query = f"""
                            ALTER TABLE {table_name}
                            UPDATE
                                accepted = %(accepted)s,
                                suggested_title = %(suggested_title)s,
                                reviewer_comments = %(reviewer_comments)s
                            WHERE group_id = %(group_id)s
                        """

                        update_client.command(update_query, parameters={
                            "accepted": accepted,
                            "suggested_title": suggested_title if suggested_title else None,
                            "reviewer_comments": reviewer_comments if reviewer_comments else None,
                            "group_id": topic['group_id']
                        })

                        st.success("βœ“ Feedback saved!")
                        st.rerun()  # Refresh to show updated data
                    except Exception as e:
                        st.error(f"Error saving feedback: {e}")

except Exception as e:
    st.error(f"Error loading data from ClickHouse: {e}")
    st.info(f"Troubleshooting steps:")
    st.markdown(f"""
    1. **Verify the table exists**: Check that `{table_name}` exists in ClickHouse
    2. **Check authentication**: Make sure your CH_PASS environment variable is set correctly
    3. **Verify database**: Confirm the database `{CH_DATABASE}` exists
    4. **Test connection**: Try connecting to ClickHouse directly to verify credentials

    If the table doesn't exist or is empty, you can:
    - Run `python transfer.py` to copy data from HuggingFace to ClickHouse
    - Or run `python gemini_topics_clickhouse.py` to generate new topics directly to ClickHouse

    (Make sure to set `brand = "{brand}"` and `content_type = "{content_type}"` in the script first)
    """)