Tesneem commited on
Commit
62fc9d5
·
verified ·
1 Parent(s): 9ec318a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +267 -113
app.py CHANGED
@@ -1,118 +1,272 @@
1
- import os
 
 
 
 
 
 
 
 
2
  import streamlit as st
3
- import tempfile
4
  from pymongo import MongoClient
5
- from datetime import datetime
6
- from pathlib import Path
7
- from document_chunker import DocumentChunker
8
- from urllib.parse import quote_plus
9
-
10
- # === MongoDB connection via Hugging Face secrets ===
11
- user = quote_plus(os.getenv("MONGO_USER"))
12
- password = quote_plus(os.getenv("MONGO_PASS"))
13
- cluster = os.getenv("MONGO_CLUSTER")
14
- db_name = os.environ.get("MONGO_DB", "grant_docs")
15
- mongo_uri = f"mongodb+srv://{user}:{password}@{cluster}/{db_name}?retryWrites=true&w=majority&tls=true&tlsAllowInvalidCertificates=true"
16
- client = MongoClient(mongo_uri, tls=True, tlsAllowInvalidCertificates=True, serverSelectionTimeoutMS=20000)
17
- db = client[db_name]
18
- st.set_page_config(page_title="Doc Chunker", layout="wide")
19
- def gate_ui():
20
- APP_PASSWORD=st.secrets.get("APP_PASSWORD", os.getenv("APP_PASSWORD")).strip()
21
- if "authed" not in st.session_state:
22
- st.session_state.authed = False
23
-
24
- if not APP_PASSWORD:
25
- st.session_state.authed = True
26
- return True
27
- if st.session_state.authed:
28
- return True
29
- st.title("🔒 Document Chunker Login")
30
- pwd=st.text_input("Enter password", type="password")
31
- if st.button("Login"):
32
- if pwd==APP_PASSWORD:
33
- st.session_state.authed=True
34
- st.rerun()
35
- else:
36
- st.error("Incorrect password.")
37
- return False
38
- # === Streamlit UI ===
39
- def main():
40
- if not gate_ui():
41
- return
42
- st.title("📄 Document Chunker & Uploader")
43
-
44
- with st.sidebar:
45
- st.header("Settings")
46
-
47
- # Fetch collection names for dropdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  try:
49
- existing_categories = db["final_chunks"].distinct("collection_category") or []
50
  except Exception:
51
- existing_categories = []
52
- existing_categories=sorted([c for c in existing_categories if c])+["Create New Category"]
53
- selected_category = st.selectbox(
54
- "Choose Category (collection_category)",
55
- existing_categories,
56
- index=existing_categories.index("Create New Category") if "Create New Category" in existing_categories else 0
57
- )
58
- if selected_category == "Create New Category":
59
- selected_category = st.sidebar.text_input("Enter Category Name:")
60
- if not selected_category:
61
- st.warning("⚠️ Enter a category name to proceed.")
62
- st.stop()
63
-
64
- is_grant_app = st.toggle("Is this a Grant Application?", value=False)
65
-
66
- uploaded_file = st.file_uploader("Upload a DOCX, TXT, or PDF file", type=["docx", "txt", "pdf"])
67
-
68
- if uploaded_file:
69
- temp_path = Path(tempfile.gettempdir()) / uploaded_file.name
70
- with open(temp_path, "wb") as f:
71
- f.write(uploaded_file.getbuffer())
72
-
73
- st.success(f"Uploaded `{uploaded_file.name}`")
74
-
75
- modified_time = datetime.now().isoformat()
76
- collection = db['final_chunks']
77
- already = collection.find_one({
78
- "metadata.title": uploaded_file.name,
79
- "collection_category": selected_category
80
- })
81
-
82
- if already:
83
- st.warning(f"⚠️ `{uploaded_file.name}` already exists in category `{selected_category}`. Skipping…")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  else:
85
- st.write(" Processing with DocumentChunker...")
86
- chunker = DocumentChunker()
87
- chunks = chunker.process_document(str(temp_path))
88
-
89
- if chunks:
90
- for chunk in chunks:
91
- chunk['collection_category']=selected_category
92
- chunk['metadata'].update({
93
- "title": uploaded_file.name,
94
- "uploaded_at": modified_time,
95
- "is_grant_app": is_grant_app,
96
- })
97
- collection.insert_one(chunk)
98
-
99
- st.success(f"✅ {len(chunks)} chunks inserted into `final_chunks` (category: `{selected_category}`)")
100
-
101
- # Show a few previews
102
- for i, c in enumerate(chunks[:3]):
103
- st.subheader(f"Chunk {i+1}: {c['metadata'].get('header') or 'No Header'}")
104
- st.markdown(c['text'][:400] + "...")
105
- st.caption(f"Topics: {', '.join(c['metadata']['topics'])} | Category: {c['metadata']['category']}")
106
- st.progress(c['metadata']['confidence_score'])
107
-
108
- if len(chunks) > 3:
109
- st.info(f"... and {len(chunks)-3} more chunks processed.")
110
-
111
- else:
112
- st.warning("⚠️ No chunks were generated.")
113
- if __name__ == "__main__":
114
- main()
115
- # try:
116
- # os.remove(temp_path)
117
- # except Exception as e:
118
- # st.warning(f"⚠️ Could not delete temp file: {e}")
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import json
3
+ import math
4
+ from datetime import datetime
5
+ from typing import Dict, List
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import plotly.graph_objects as go
10
  import streamlit as st
 
11
  from pymongo import MongoClient
12
+
13
+ st.set_page_config(page_title="Student Skill Radar", layout="wide")
14
+
15
+ # ------------------- Constants -------------------
16
+ SKILLS = [
17
+ "Problem-Solving",
18
+ "Critical Thinking",
19
+ "Analytical Reasoning",
20
+ "Adaptability",
21
+ "Continuous Learning",
22
+ "Creativity",
23
+ "Communication",
24
+ "Collaboration",
25
+ "Community Engagement",
26
+ "Emotional Intelligence",
27
+ "Ethical Decision-Making",
28
+ "Time Management",
29
+ "Tech Aptitude",
30
+ ]
31
+
32
+ SKILL_GROUPS = {
33
+ "Problem-Solving, Critical Thinking, Analytical Reasoning": [
34
+ "Problem-Solving",
35
+ "Critical Thinking",
36
+ "Analytical Reasoning",
37
+ ],
38
+ "Adaptability, Continuous Learning, Creativity": [
39
+ "Adaptability",
40
+ "Continuous Learning",
41
+ "Creativity",
42
+ ],
43
+ "Time Management": ["Time Management"],
44
+ "Communication, Teamwork, Collaboration, Community Engagement": [
45
+ "Communication",
46
+ "Collaboration",
47
+ "Community Engagement",
48
+ ],
49
+ "Emotional Intelligence, Ethical Decision Making": [
50
+ "Emotional Intelligence",
51
+ "Ethical Decision-Making",
52
+ ],
53
+ "Tech Aptitude": ["Tech Aptitude"],
54
+ }
55
+
56
+ # ------------------- Helpers -------------------
57
+ def safe_mean(vals):
58
+ vals = [v for v in vals if v is not None]
59
+ return float(np.mean(vals)) if vals else 0.0
60
+
61
+
62
+ def to_frame(records: List[dict]) -> pd.DataFrame:
63
+ if not records:
64
+ return pd.DataFrame()
65
+ df = pd.DataFrame(records)
66
+ # Expand skills into columns
67
+ skill_df = pd.json_normalize(df["skills"]).reindex(columns=SKILLS)
68
+ for k in SKILLS:
69
+ if k not in skill_df:
70
+ skill_df[k] = 0.0
71
+ df = pd.concat([df.drop(columns=["skills"]), skill_df], axis=1)
72
+ return df
73
+
74
+
75
+ def summarize_records(records: List[dict], level: str = "student") -> pd.DataFrame:
76
+ df = to_frame(records)
77
+ if df.empty:
78
+ return df
79
+ if level == "student+source":
80
+ df["label"] = df["student"].astype(str) + " — " + df["source"].astype(str)
81
+ else:
82
+ df["label"] = df["student"].astype(str)
83
+ return df.groupby("label")[SKILLS].mean().reset_index()
84
+
85
+
86
+ def aggregate_groups(row: pd.Series) -> Dict[str, float]:
87
+ out = {}
88
+ for group, members in SKILL_GROUPS.items():
89
+ out[group] = safe_mean([float(row.get(m, 0.0)) for m in members])
90
+ return out
91
+
92
+
93
+ def polar_radar(df: pd.DataFrame, grouped: bool, title: str):
94
+ if df.empty:
95
+ return go.Figure()
96
+
97
+ if grouped:
98
+ labels = list(SKILL_GROUPS.keys())
99
+ traces = []
100
+ for _, r in df.iterrows():
101
+ grp = aggregate_groups(r)
102
+ values = [grp[k] for k in labels]
103
+ traces.append(
104
+ go.Scatterpolar(r=values + [values[0]], theta=labels + [labels[0]], name=r["label"], fill="toself")
105
+ )
106
+ else:
107
+ labels = SKILLS
108
+ traces = []
109
+ for _, r in df.iterrows():
110
+ values = [float(r.get(k, 0.0)) for k in SKILLS]
111
+ traces.append(
112
+ go.Scatterpolar(r=values + [values[0]], theta=labels + [labels[0]], name=r["label"], fill="toself")
113
+ )
114
+
115
+ fig = go.Figure(traces)
116
+ fig.update_layout(
117
+ title=title or "Skill Radar",
118
+ showlegend=True,
119
+ polar=dict(radialaxis=dict(range=[0, 1.0], tickvals=[0.2, 0.4, 0.6, 0.8])),
120
+ margin=dict(l=30, r=30, t=60, b=30),
121
+ )
122
+ return fig
123
+
124
+
125
+ # ------------------- Data Loaders -------------------
126
+ @st.cache_data(show_spinner=False)
127
+ def parse_summary_files(files) -> pd.DataFrame:
128
+ """Uploads: list of per-student summary JSON files"""
129
+ rows = []
130
+ for f in files or []:
131
  try:
132
+ data = json.loads(f.read().decode("utf-8"))
133
  except Exception:
134
+ f.seek(0)
135
+ data = json.load(f)
136
+ name = data.get("Name") or data.get("Student") or "Unknown"
137
+ scores = data.get("Average Skill Scores") or {}
138
+ row = {"label": name}
139
+ for k in SKILLS:
140
+ row[k] = float(scores.get(k, 0.0))
141
+ rows.append(row)
142
+ return pd.DataFrame(rows)
143
+
144
+
145
+ @st.cache_data(show_spinner=False)
146
+ def mongo_records(uri: str, db_name: str, coll_name: str, student: str | None, source: str | None, start: str | None, end: str | None) -> List[dict]:
147
+ if not (uri and db_name and coll_name):
148
+ return []
149
+ client = MongoClient(uri, serverSelectionTimeoutMS=6000)
150
+ coll = client[db_name][coll_name]
151
+
152
+ q = {}
153
+ if student and student != "(All)":
154
+ q["student"] = student
155
+ if source and source != "(All)":
156
+ q["source"] = source
157
+ if start or end:
158
+ q["date"] = {}
159
+ if start:
160
+ q["date"]["$gte"] = start
161
+ if end:
162
+ q["date"]["$lte"] = end
163
+
164
+ cur = coll.find(q, {"_id": 0, "student": 1, "source": 1, "date": 1, "skills": 1})
165
+ recs = []
166
+ for r in cur:
167
+ r.setdefault("skills", {})
168
+ r["skills"] = {k: float(r["skills"].get(k, 0.0)) for k in SKILLS}
169
+ recs.append(r)
170
+ return recs
171
+
172
+
173
+ @st.cache_data(show_spinner=False)
174
+ def mongo_distinct(uri: str, db_name: str, coll_name: str, field: str) -> List[str]:
175
+ if not (uri and db_name and coll_name):
176
+ return []
177
+ try:
178
+ client = MongoClient(uri, serverSelectionTimeoutMS=6000)
179
+ coll = client[db_name][coll_name]
180
+ vals = coll.distinct(field)
181
+ return sorted([v for v in vals if isinstance(v, str) and v.strip()])
182
+ except Exception:
183
+ return []
184
+
185
+
186
+ # ------------------- UI -------------------
187
+ st.title("Student Skill Radar — Streamlit")
188
+
189
+ with st.sidebar:
190
+ st.subheader("Data Source")
191
+ data_source = st.radio("Select source", ["Upload JSON summaries", "MongoDB"], index=0)
192
+ use_groups = st.toggle("Grouped skills (skill clusters)", value=False)
193
+ agg_level = st.selectbox("Aggregation level", ["student", "student+source"], index=0, help="How to average records before plotting")
194
+ chart_title = st.text_input("Chart title", value="")
195
+
196
+ if data_source == "Upload JSON summaries":
197
+ files = st.file_uploader("Upload 1+ summary JSON files", type=["json"], accept_multiple_files=True)
198
+ df = parse_summary_files(files)
199
+
200
+ # Student dropdown based on uploaded files
201
+ labels = ["(All)"] + (sorted(df["label"].unique().tolist()) if not df.empty else [])
202
+ selected = st.sidebar.selectbox("Select student", labels)
203
+
204
+ if selected != "(All)" and not df.empty:
205
+ df = df[df["label"] == selected]
206
+
207
+ else:
208
+ st.sidebar.subheader("MongoDB Settings")
209
+ default_uri = st.secrets.get("MONGO_URI", "")
210
+ mongo_uri = st.sidebar.text_input("MongoDB URI", value=default_uri, type="password")
211
+ db_name = st.sidebar.text_input("Database name", value="grant_docs")
212
+ coll_name = st.sidebar.text_input("Collection name", value="doc_chunks")
213
+
214
+ # Dynamic dropdowns from MongoDB
215
+ students = ["(All)"] + mongo_distinct(mongo_uri, db_name, coll_name, "student")
216
+ sources = ["(All)"] + mongo_distinct(mongo_uri, db_name, coll_name, "source")
217
+
218
+ student_choice = st.sidebar.selectbox("Select student", students)
219
+ source_choice = st.sidebar.selectbox("Select source/week", sources)
220
+
221
+ c1, c2 = st.sidebar.columns(2)
222
+ start_date = c1.text_input("Start date (YYYY-MM-DD)", value="")
223
+ end_date = c2.text_input("End date (YYYY-MM-DD)", value="")
224
+
225
+ recs = mongo_records(mongo_uri, db_name, coll_name, student_choice, source_choice, start_date or None, end_date or None)
226
+ df_raw = to_frame(recs)
227
+ if not df_raw.empty:
228
+ if agg_level == "student+source":
229
+ df_raw["label"] = df_raw["student"].astype(str) + " — " + df_raw["source"].astype(str)
230
  else:
231
+ df_raw["label"] = df_raw["student"].astype(str)
232
+ df = df_raw.groupby("label")[SKILLS].mean().reset_index()
233
+ else:
234
+ df = pd.DataFrame()
235
+
236
+ # ------------------- Output -------------------
237
+ left, right = st.columns([2, 1])
238
+
239
+ with left:
240
+ fig = polar_radar(df if not df.empty else pd.DataFrame(), use_groups, chart_title)
241
+ st.plotly_chart(fig, use_container_width=True)
242
+
243
+ with right:
244
+ st.subheader("Averaged Scores")
245
+ if df.empty:
246
+ st.info("No data yet. Upload summaries or configure MongoDB, then select a student.")
247
+ else:
248
+ st.dataframe(df, use_container_width=True, height=450)
249
+ # CSV download
250
+ csv = df.to_csv(index=False).encode("utf-8")
251
+ st.download_button("Download CSV", data=csv, file_name="skill_scores.csv", mime="text/csv")
252
+
253
+ # --------------- README (for reference in Space) ---------------
254
+ """
255
+ To deploy on Hugging Face Spaces:
256
+ 1) Create a new Space → SDK: Streamlit → Python.
257
+ 2) Add `app.py` and `requirements.txt` below.
258
+ 3) (Optional) Add a Secret named `MONGO_URI` for your Mongo connection.
259
+
260
+ Accepted Schemas
261
+ - Summary JSON (per student):
262
+ {
263
+ "Name": "Student Name",
264
+ "Average Skill Scores": {"Problem-Solving": 0.6, ...}
265
+ }
266
+ - MongoDB record (per response):
267
+ {
268
+ "uid": "...", "student": "...", "source": "week_2", "date": "YYYY-MM-DD",
269
+ "prompt": "...", "answer": "...",
270
+ "skills": { "Problem-Solving": 0.6, "Collaboration": 0.7, ... }
271
+ }
272
+ """