gmedin commited on
Commit
b7d7186
·
verified ·
1 Parent(s): 3b44819

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +287 -0
app.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import json
4
+ import clickhouse_connect
5
+
6
+ # -------------------------------
7
+ # Config
8
+ # -------------------------------
9
+ CH_PASS = os.getenv("CH_PASS")
10
+ CH_DATABASE = os.getenv("CH_DATABASE", "topic_tables")
11
+ APP_PASSWORD = os.getenv("APP_PASSWORD")
12
+
13
+ # Check if credentials are available
14
+ if not CH_PASS:
15
+ st.error("CH_PASS environment variable not found. Please set it before running the app.")
16
+ st.stop()
17
+
18
+ if not APP_PASSWORD:
19
+ st.error("APP_PASSWORD environment variable not found. Please set it before running the app.")
20
+ st.stop()
21
+
22
+ # -------------------------------
23
+ # ClickHouse connection
24
+ # -------------------------------
25
+ def get_clickhouse_client():
26
+ """Create a new ClickHouse client."""
27
+ return clickhouse_connect.get_client(
28
+ host='td6vvza14q.us-east-2.aws.clickhouse.cloud',
29
+ user='internal_tool_builder',
30
+ password=CH_PASS,
31
+ secure=True,
32
+ connect_timeout=30,
33
+ send_receive_timeout=60
34
+ )
35
+
36
+ # Brand and content type options
37
+ BRANDS = ["drumeo", "pianote", "guitareo", "singeo"]
38
+ CONTENT_TYPES = ["song", "lesson"]
39
+
40
+ # -------------------------------
41
+ # Streamlit App
42
+ # -------------------------------
43
+ st.set_page_config(page_title="Custom Topics Manager", layout="wide")
44
+
45
+ # -------------------------------
46
+ # Password Protection
47
+ # -------------------------------
48
+ # Initialize session state for authentication
49
+ if "authenticated" not in st.session_state:
50
+ st.session_state.authenticated = False
51
+
52
+ # Show login form if not authenticated
53
+ if not st.session_state.authenticated:
54
+ st.title("Custom Topics Manager")
55
+ st.subheader("Please enter the password to continue")
56
+
57
+ # Create a form for password input
58
+ with st.form("login_form"):
59
+ password_input = st.text_input("Password", type="password")
60
+ submit_button = st.form_submit_button("Login")
61
+
62
+ if submit_button:
63
+ if password_input == APP_PASSWORD:
64
+ st.session_state.authenticated = True
65
+ st.success("Authentication successful!")
66
+ st.rerun()
67
+ else:
68
+ st.error("Incorrect password. Please try again.")
69
+
70
+ st.stop() # Stop execution here if not authenticated
71
+
72
+ # If we reach here, user is authenticated
73
+ st.title("Custom Topics Manager")
74
+
75
+ # Sidebar for brand and content type selection
76
+ st.sidebar.header("Settings")
77
+ brand = st.sidebar.selectbox("Select Brand", BRANDS, index=0)
78
+ content_type = st.sidebar.selectbox("Select Content Type", CONTENT_TYPES, index=0)
79
+
80
+ # Logout button
81
+ st.sidebar.divider()
82
+ if st.sidebar.button("🔓 Logout"):
83
+ st.session_state.authenticated = False
84
+ st.rerun()
85
+
86
+ # Debug info
87
+ st.sidebar.divider()
88
+ st.sidebar.caption(f"ClickHouse password: {'✓' if CH_PASS else '✗'}")
89
+ st.sidebar.caption(f"Database: {CH_DATABASE}")
90
+
91
+ # -------------------------------
92
+ # Load data from ClickHouse
93
+ # -------------------------------
94
+ # Table naming pattern: TOPIC_GROUPS_{BRAND}_{CONTENT_TYPE}S (e.g., TOPIC_GROUPS_DRUMEO_SONGS)
95
+ content_type_suffix = f"{content_type}s"
96
+ table_name = f"{CH_DATABASE}.TOPIC_GROUPS_{brand.upper()}_{content_type_suffix.upper()}"
97
+
98
+ try:
99
+ with st.spinner(f"Loading topics for {brand} ({content_type}s)..."):
100
+ # Get ClickHouse client
101
+ ch_client = get_clickhouse_client()
102
+
103
+ # Query all rows from the table including feedback columns
104
+ query = f"""
105
+ SELECT group_id, brand, content_type, title, item_ids, created_at,
106
+ accepted, suggested_title, reviewer_comments
107
+ FROM {table_name}
108
+ ORDER BY created_at DESC
109
+ """
110
+ result = ch_client.query(query)
111
+
112
+ # Convert to list of dictionaries for easier processing
113
+ topics = []
114
+ for row in result.result_rows:
115
+ topics.append({
116
+ "group_id": row[0],
117
+ "brand": row[1],
118
+ "content_type": row[2],
119
+ "title": row[3],
120
+ "item_ids": row[4], # Already a JSON string
121
+ "created_at": str(row[5]),
122
+ "accepted": row[6],
123
+ "suggested_title": row[7],
124
+ "reviewer_comments": row[8]
125
+ })
126
+
127
+ st.success(f"Loaded {len(topics)} topics from {table_name}")
128
+
129
+ # -------------------------------
130
+ # Fetch content titles from DIM_CONTENT
131
+ # -------------------------------
132
+ with st.spinner("Loading content titles..."):
133
+ # Collect all unique content IDs from all topics
134
+ all_content_ids = set()
135
+ for topic in topics:
136
+ try:
137
+ item_ids = json.loads(topic['item_ids'])
138
+ all_content_ids.update(item_ids)
139
+ except:
140
+ pass
141
+
142
+ # Query DIM_CONTENT for titles
143
+ content_titles = {}
144
+ if all_content_ids:
145
+ # Convert to list and create IN clause
146
+ ids_list = list(all_content_ids)
147
+ ids_str = ",".join([f"'{id}'" for id in ids_list])
148
+
149
+ dim_content_query = f"""
150
+ SELECT CONTENT_ID, CONTENT_TITLE
151
+ FROM snowflake_synced_tables.DIM_CONTENT
152
+ WHERE CONTENT_ID IN ({ids_str})
153
+ """
154
+
155
+ try:
156
+ content_result = ch_client.query(dim_content_query)
157
+ for row in content_result.result_rows:
158
+ content_titles[str(row[0])] = row[1]
159
+
160
+ st.success(f"Loaded titles for {len(content_titles)} content items")
161
+ except Exception as e:
162
+ st.warning(f"Could not load content titles: {e}")
163
+ st.info("Continuing without titles...")
164
+
165
+ # -------------------------------
166
+ # Display topics
167
+ # -------------------------------
168
+ st.header(f"Topics for {brand.capitalize()} - {content_type.capitalize()}s")
169
+
170
+ # Add search/filter
171
+ search_query = st.text_input("Search topics by title", "")
172
+
173
+ # Filter topics based on search
174
+ filtered_topics = []
175
+ for topic in topics:
176
+ if search_query.lower() in topic["title"].lower():
177
+ filtered_topics.append(topic)
178
+
179
+ if not filtered_topics:
180
+ st.warning("No topics match your search query.")
181
+ else:
182
+ st.write(f"Showing {len(filtered_topics)} of {len(topics)} topics")
183
+
184
+ # Display each topic
185
+ for topic in filtered_topics:
186
+
187
+ with st.expander(f"**{topic['title']}** ({topic['group_id']})", expanded=False):
188
+ col1, col2 = st.columns([1, 3])
189
+
190
+ with col1:
191
+ st.write("**Metadata:**")
192
+ st.write(f"- Brand: `{topic['brand']}`")
193
+ st.write(f"- Content Type: `{topic['content_type']}`")
194
+ st.write(f"- Created: `{topic['created_at']}`")
195
+
196
+ # Parse item_ids from JSON string
197
+ try:
198
+ item_ids = json.loads(topic['item_ids'])
199
+ st.write(f"- **Items:** {len(item_ids)}")
200
+ except:
201
+ item_ids = []
202
+ st.error("Error parsing item IDs")
203
+
204
+ with col2:
205
+ st.write("**Content Items:**")
206
+ if item_ids:
207
+ # Display as a numbered list with ID and title
208
+ for i, item_id in enumerate(item_ids, 1):
209
+ title = content_titles.get(str(item_id), "Title not found")
210
+ st.write(f"{i}. **{title}**")
211
+ st.write(f" `ID: {item_id}`")
212
+ else:
213
+ st.write("No items found")
214
+
215
+ # Feedback section
216
+ st.divider()
217
+ st.write("**Reviewer Feedback:**")
218
+
219
+ feedback_col1, feedback_col2 = st.columns([1, 2])
220
+
221
+ with feedback_col1:
222
+ # Accept/Reject checkbox
223
+ accepted = st.checkbox(
224
+ "✓ Accept this topic",
225
+ value=topic['accepted'] if topic['accepted'] is not None else False,
226
+ key=f"accept_{topic['group_id']}"
227
+ )
228
+
229
+ with feedback_col2:
230
+ # Suggested alternate title
231
+ suggested_title = st.text_input(
232
+ "Suggested alternate title (optional)",
233
+ value=topic['suggested_title'] if topic['suggested_title'] else "",
234
+ key=f"title_{topic['group_id']}"
235
+ )
236
+
237
+ # Reviewer comments
238
+ reviewer_comments = st.text_area(
239
+ "Comments (optional)",
240
+ value=topic['reviewer_comments'] if topic['reviewer_comments'] else "",
241
+ height=100,
242
+ key=f"comments_{topic['group_id']}"
243
+ )
244
+
245
+ # Save button
246
+ if st.button("💾 Save Feedback", key=f"save_{topic['group_id']}"):
247
+ try:
248
+ # Create a fresh client for the update operation
249
+ update_client = get_clickhouse_client()
250
+
251
+ # Update the database
252
+ update_query = f"""
253
+ ALTER TABLE {table_name}
254
+ UPDATE
255
+ accepted = %(accepted)s,
256
+ suggested_title = %(suggested_title)s,
257
+ reviewer_comments = %(reviewer_comments)s
258
+ WHERE group_id = %(group_id)s
259
+ """
260
+
261
+ update_client.command(update_query, parameters={
262
+ "accepted": accepted,
263
+ "suggested_title": suggested_title if suggested_title else None,
264
+ "reviewer_comments": reviewer_comments if reviewer_comments else None,
265
+ "group_id": topic['group_id']
266
+ })
267
+
268
+ st.success("✓ Feedback saved!")
269
+ st.rerun() # Refresh to show updated data
270
+ except Exception as e:
271
+ st.error(f"Error saving feedback: {e}")
272
+
273
+ except Exception as e:
274
+ st.error(f"Error loading data from ClickHouse: {e}")
275
+ st.info(f"Troubleshooting steps:")
276
+ st.markdown(f"""
277
+ 1. **Verify the table exists**: Check that `{table_name}` exists in ClickHouse
278
+ 2. **Check authentication**: Make sure your CH_PASS environment variable is set correctly
279
+ 3. **Verify database**: Confirm the database `{CH_DATABASE}` exists
280
+ 4. **Test connection**: Try connecting to ClickHouse directly to verify credentials
281
+
282
+ If the table doesn't exist or is empty, you can:
283
+ - Run `python transfer.py` to copy data from HuggingFace to ClickHouse
284
+ - Or run `python gemini_topics_clickhouse.py` to generate new topics directly to ClickHouse
285
+
286
+ (Make sure to set `brand = "{brand}"` and `content_type = "{content_type}"` in the script first)
287
+ """)