gmedin commited on
Commit
3b44819
·
verified ·
1 Parent(s): 0f8a347

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -288
app.py DELETED
@@ -1,288 +0,0 @@
1
- import streamlit as st
2
- import os
3
- import json
4
- import clickhouse_connect
5
- import sys
6
-
7
- # -------------------------------
8
- # Config
9
- # -------------------------------
10
- CH_PASS = os.getenv("CH_PASS")
11
- CH_DATABASE = os.getenv("CH_DATABASE", "topic_tables")
12
- APP_PASSWORD = os.getenv("APP_PASSWORD")
13
-
14
- # Check if credentials are available
15
- if not CH_PASS:
16
- st.error("CH_PASS environment variable not found. Please set it before running the app.")
17
- st.stop()
18
-
19
- if not APP_PASSWORD:
20
- st.error("APP_PASSWORD environment variable not found. Please set it before running the app.")
21
- st.stop()
22
-
23
- # -------------------------------
24
- # ClickHouse connection
25
- # -------------------------------
26
- def get_clickhouse_client():
27
- """Create a new ClickHouse client."""
28
- return clickhouse_connect.get_client(
29
- host='td6vvza14q.us-east-2.aws.clickhouse.cloud',
30
- user='internal_tool_builder',
31
- password=CH_PASS,
32
- secure=True,
33
- connect_timeout=30,
34
- send_receive_timeout=60
35
- )
36
-
37
- # Brand and content type options
38
- BRANDS = ["drumeo", "pianote", "guitareo", "singeo"]
39
- CONTENT_TYPES = ["song", "lesson"]
40
-
41
- # -------------------------------
42
- # Streamlit App
43
- # -------------------------------
44
- st.set_page_config(page_title="Custom Topics Manager", layout="wide")
45
-
46
- # -------------------------------
47
- # Password Protection
48
- # -------------------------------
49
- # Initialize session state for authentication
50
- if "authenticated" not in st.session_state:
51
- st.session_state.authenticated = False
52
-
53
- # Show login form if not authenticated
54
- if not st.session_state.authenticated:
55
- st.title("Custom Topics Manager")
56
- st.subheader("Please enter the password to continue")
57
-
58
- # Create a form for password input
59
- with st.form("login_form"):
60
- password_input = st.text_input("Password", type="password")
61
- submit_button = st.form_submit_button("Login")
62
-
63
- if submit_button:
64
- if password_input == APP_PASSWORD:
65
- st.session_state.authenticated = True
66
- st.success("Authentication successful!")
67
- st.rerun()
68
- else:
69
- st.error("Incorrect password. Please try again.")
70
-
71
- st.stop() # Stop execution here if not authenticated
72
-
73
- # If we reach here, user is authenticated
74
- st.title("Custom Topics Manager")
75
-
76
- # Sidebar for brand and content type selection
77
- st.sidebar.header("Settings")
78
- brand = st.sidebar.selectbox("Select Brand", BRANDS, index=0)
79
- content_type = st.sidebar.selectbox("Select Content Type", CONTENT_TYPES, index=0)
80
-
81
- # Logout button
82
- st.sidebar.divider()
83
- if st.sidebar.button("🔓 Logout"):
84
- st.session_state.authenticated = False
85
- st.rerun()
86
-
87
- # Debug info
88
- st.sidebar.divider()
89
- st.sidebar.caption(f"ClickHouse password: {'✓' if CH_PASS else '✗'}")
90
- st.sidebar.caption(f"Database: {CH_DATABASE}")
91
-
92
- # -------------------------------
93
- # Load data from ClickHouse
94
- # -------------------------------
95
- # Table naming pattern: TOPIC_GROUPS_{BRAND}_{CONTENT_TYPE}S (e.g., TOPIC_GROUPS_DRUMEO_SONGS)
96
- content_type_suffix = f"{content_type}s"
97
- table_name = f"{CH_DATABASE}.TOPIC_GROUPS_{brand.upper()}_{content_type_suffix.upper()}"
98
-
99
- try:
100
- with st.spinner(f"Loading topics for {brand} ({content_type}s)..."):
101
- # Get ClickHouse client
102
- ch_client = get_clickhouse_client()
103
-
104
- # Query all rows from the table including feedback columns
105
- query = f"""
106
- SELECT group_id, brand, content_type, title, item_ids, created_at,
107
- accepted, suggested_title, reviewer_comments
108
- FROM {table_name}
109
- ORDER BY created_at DESC
110
- """
111
- result = ch_client.query(query)
112
-
113
- # Convert to list of dictionaries for easier processing
114
- topics = []
115
- for row in result.result_rows:
116
- topics.append({
117
- "group_id": row[0],
118
- "brand": row[1],
119
- "content_type": row[2],
120
- "title": row[3],
121
- "item_ids": row[4], # Already a JSON string
122
- "created_at": str(row[5]),
123
- "accepted": row[6],
124
- "suggested_title": row[7],
125
- "reviewer_comments": row[8]
126
- })
127
-
128
- st.success(f"Loaded {len(topics)} topics from {table_name}")
129
-
130
- # -------------------------------
131
- # Fetch content titles from DIM_CONTENT
132
- # -------------------------------
133
- with st.spinner("Loading content titles..."):
134
- # Collect all unique content IDs from all topics
135
- all_content_ids = set()
136
- for topic in topics:
137
- try:
138
- item_ids = json.loads(topic['item_ids'])
139
- all_content_ids.update(item_ids)
140
- except:
141
- pass
142
-
143
- # Query DIM_CONTENT for titles
144
- content_titles = {}
145
- if all_content_ids:
146
- # Convert to list and create IN clause
147
- ids_list = list(all_content_ids)
148
- ids_str = ",".join([f"'{id}'" for id in ids_list])
149
-
150
- dim_content_query = f"""
151
- SELECT CONTENT_ID, CONTENT_TITLE
152
- FROM snowflake_synced_tables.DIM_CONTENT
153
- WHERE CONTENT_ID IN ({ids_str})
154
- """
155
-
156
- try:
157
- content_result = ch_client.query(dim_content_query)
158
- for row in content_result.result_rows:
159
- content_titles[str(row[0])] = row[1]
160
-
161
- st.success(f"Loaded titles for {len(content_titles)} content items")
162
- except Exception as e:
163
- st.warning(f"Could not load content titles: {e}")
164
- st.info("Continuing without titles...")
165
-
166
- # -------------------------------
167
- # Display topics
168
- # -------------------------------
169
- st.header(f"Topics for {brand.capitalize()} - {content_type.capitalize()}s")
170
-
171
- # Add search/filter
172
- search_query = st.text_input("Search topics by title", "")
173
-
174
- # Filter topics based on search
175
- filtered_topics = []
176
- for topic in topics:
177
- if search_query.lower() in topic["title"].lower():
178
- filtered_topics.append(topic)
179
-
180
- if not filtered_topics:
181
- st.warning("No topics match your search query.")
182
- else:
183
- st.write(f"Showing {len(filtered_topics)} of {len(topics)} topics")
184
-
185
- # Display each topic
186
- for topic in filtered_topics:
187
-
188
- with st.expander(f"**{topic['title']}** ({topic['group_id']})", expanded=False):
189
- col1, col2 = st.columns([1, 3])
190
-
191
- with col1:
192
- st.write("**Metadata:**")
193
- st.write(f"- Brand: `{topic['brand']}`")
194
- st.write(f"- Content Type: `{topic['content_type']}`")
195
- st.write(f"- Created: `{topic['created_at']}`")
196
-
197
- # Parse item_ids from JSON string
198
- try:
199
- item_ids = json.loads(topic['item_ids'])
200
- st.write(f"- **Items:** {len(item_ids)}")
201
- except:
202
- item_ids = []
203
- st.error("Error parsing item IDs")
204
-
205
- with col2:
206
- st.write("**Content Items:**")
207
- if item_ids:
208
- # Display as a numbered list with ID and title
209
- for i, item_id in enumerate(item_ids, 1):
210
- title = content_titles.get(str(item_id), "Title not found")
211
- st.write(f"{i}. **{title}**")
212
- st.write(f" `ID: {item_id}`")
213
- else:
214
- st.write("No items found")
215
-
216
- # Feedback section
217
- st.divider()
218
- st.write("**Reviewer Feedback:**")
219
-
220
- feedback_col1, feedback_col2 = st.columns([1, 2])
221
-
222
- with feedback_col1:
223
- # Accept/Reject checkbox
224
- accepted = st.checkbox(
225
- "✓ Accept this topic",
226
- value=topic['accepted'] if topic['accepted'] is not None else False,
227
- key=f"accept_{topic['group_id']}"
228
- )
229
-
230
- with feedback_col2:
231
- # Suggested alternate title
232
- suggested_title = st.text_input(
233
- "Suggested alternate title (optional)",
234
- value=topic['suggested_title'] if topic['suggested_title'] else "",
235
- key=f"title_{topic['group_id']}"
236
- )
237
-
238
- # Reviewer comments
239
- reviewer_comments = st.text_area(
240
- "Comments (optional)",
241
- value=topic['reviewer_comments'] if topic['reviewer_comments'] else "",
242
- height=100,
243
- key=f"comments_{topic['group_id']}"
244
- )
245
-
246
- # Save button
247
- if st.button("💾 Save Feedback", key=f"save_{topic['group_id']}"):
248
- try:
249
- # Create a fresh client for the update operation
250
- update_client = get_clickhouse_client()
251
-
252
- # Update the database
253
- update_query = f"""
254
- ALTER TABLE {table_name}
255
- UPDATE
256
- accepted = %(accepted)s,
257
- suggested_title = %(suggested_title)s,
258
- reviewer_comments = %(reviewer_comments)s
259
- WHERE group_id = %(group_id)s
260
- """
261
-
262
- update_client.command(update_query, parameters={
263
- "accepted": accepted,
264
- "suggested_title": suggested_title if suggested_title else None,
265
- "reviewer_comments": reviewer_comments if reviewer_comments else None,
266
- "group_id": topic['group_id']
267
- })
268
-
269
- st.success("✓ Feedback saved!")
270
- st.rerun() # Refresh to show updated data
271
- except Exception as e:
272
- st.error(f"Error saving feedback: {e}")
273
-
274
- except Exception as e:
275
- st.error(f"Error loading data from ClickHouse: {e}")
276
- st.info(f"Troubleshooting steps:")
277
- st.markdown(f"""
278
- 1. **Verify the table exists**: Check that `{table_name}` exists in ClickHouse
279
- 2. **Check authentication**: Make sure your CH_PASS environment variable is set correctly
280
- 3. **Verify database**: Confirm the database `{CH_DATABASE}` exists
281
- 4. **Test connection**: Try connecting to ClickHouse directly to verify credentials
282
-
283
- If the table doesn't exist or is empty, you can:
284
- - Run `python transfer.py` to copy data from HuggingFace to ClickHouse
285
- - Or run `python gemini_topics_clickhouse.py` to generate new topics directly to ClickHouse
286
-
287
- (Make sure to set `brand = "{brand}"` and `content_type = "{content_type}"` in the script first)
288
- """)