stevafernandes commited on
Commit
dc544bb
Β·
verified Β·
1 Parent(s): a289e39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +240 -151
app.py CHANGED
@@ -3,104 +3,121 @@ import pandas as pd
3
  import numpy as np
4
  import pickle
5
  import os
6
- from sentence_transformers import SentenceTransformer
7
- from sklearn.model_selection import train_test_split
8
- from sklearn.preprocessing import StandardScaler
9
- from sklearn.metrics.pairwise import cosine_similarity
10
- from sklearn.ensemble import RandomForestClassifier
11
- from sklearn.dummy import DummyRegressor
12
- import xgboost as xgb
13
  import re
14
- import warnings
15
- warnings.filterwarnings('ignore')
16
 
17
- # Initialize Streamlit - MUST BE AT THE TOP
18
  st.set_page_config(
19
  page_title="Medical School Personal Statement Analyzer",
20
  page_icon="πŸ₯",
21
  layout="wide"
22
  )
23
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # Categories definition
25
  CATEGORIES = {
26
  'Spark': {
27
  'description': 'Opening that spurs interest in medicine',
28
  'keywords': ['growing up', 'childhood', 'family', 'realized', 'inspired', 'first',
29
  'beginning', 'early', 'experience that', 'moment', 'when I was'],
30
- 'patterns': [r'when I was \d+', r'at age \d+', r'since I was', r'as a child'],
31
  'rubric': {
32
  1: 'disconnected or confusing',
33
- 2: 'somewhat connected but unclear',
34
  3: 'connected and clear',
35
  4: 'engaging and logical flow'
36
- },
37
- 'rubric_features': {
38
- 'positive': ['engaging', 'logical', 'clear', 'compelling', 'authentic'],
39
- 'negative': ['disconnected', 'confusing', 'random', 'unclear', 'generic']
40
  }
41
  },
42
  'Healthcare Experience': {
43
  'description': 'Clinical/medical experiences',
44
  'keywords': ['shadowed', 'clinical', 'hospital', 'patient', 'doctor', 'physician',
45
  'medical', 'treatment', 'observed', 'volunteer', 'clinic'],
46
- 'patterns': [r'\d+ hours', r'volunteered at', r'shadowing', r'clinical experience'],
47
  'rubric': {
48
  1: 'passive, uninteresting, negative',
49
  2: 'bland but not problematic',
50
  3: 'interesting and relevant',
51
  4: 'vivid, active, thoughtful, memorable'
52
- },
53
- 'rubric_features': {
54
- 'positive': ['vivid', 'active', 'thoughtful', 'memorable', 'optimistic'],
55
- 'negative': ['passive', 'uninteresting', 'irrelevant', 'problematic']
56
  }
57
  },
58
  'Showing Doctor Qualities': {
59
  'description': 'Leadership and doctor qualities',
60
  'keywords': ['leadership', 'empathy', 'compassion', 'responsibility', 'communication',
61
  'advocate', 'caring', 'helping', 'service', 'volunteer'],
62
- 'patterns': [r'as (president|leader|captain)', r'I organized', r'I founded'],
63
  'rubric': {
64
  1: 'arrogant, immature, inaccurate',
65
  2: 'bland but not problematic',
66
  3: 'shows some understanding',
67
  4: 'realistic, mature, humble, clear'
68
- },
69
- 'rubric_features': {
70
- 'positive': ['realistic', 'self-aware', 'mature', 'humble', 'specific'],
71
- 'negative': ['arrogant', 'immature', 'overly confident', 'simplistic']
72
  }
73
  },
74
  'Spin': {
75
  'description': 'Connecting experiences to medical career',
76
  'keywords': ['learned', 'taught me', 'showed me', 'realized', 'understood',
77
  'because', 'therefore', 'this experience', 'prepared me'],
78
- 'patterns': [r'this .+ taught me', r'I learned that', r'prepared me for'],
79
  'rubric': {
80
  1: 'vague, simplistic, generic',
81
  2: 'some connection but generic',
82
  3: 'clear connection',
83
  4: 'direct, logical, specific argument'
84
- },
85
- 'rubric_features': {
86
- 'positive': ['direct', 'logical', 'specific', 'clear argument'],
87
- 'negative': ['brief', 'vague', 'simplistic', 'generic']
88
  }
89
  }
90
  }
91
 
92
- # Model paths
93
- MODEL_DIR = "trained_models"
94
-
95
- # Helper functions
96
- @st.cache_resource
97
- def load_transformer():
98
  try:
99
- return SentenceTransformer('all-MiniLM-L6-v2')
100
- except:
 
 
 
101
  return None
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def extract_features(text, embedder):
 
104
  features = []
105
  text_lower = text.lower()
106
  words = text.split()
@@ -112,25 +129,32 @@ def extract_features(text, embedder):
112
  len(set(words)) / max(len(words), 1)
113
  ])
114
 
115
- # Category features
116
  for cat_name, cat_info in CATEGORIES.items():
117
  keyword_count = sum(1 for kw in cat_info['keywords'] if kw.lower() in text_lower)
118
  features.append(keyword_count / len(cat_info['keywords']))
119
 
120
  # Get embedding
121
- try:
122
- embedding = embedder.encode(text)
123
- if hasattr(embedding, 'cpu'):
124
- embedding = embedding.cpu().numpy()
125
- embedding = embedding.flatten()[:128] # Reduced size
126
- except:
 
 
 
127
  embedding = np.zeros(128)
128
 
129
  return np.concatenate([features, embedding])
130
 
131
- def train_simple_model(df, embedder):
 
 
 
 
132
  X = []
133
- y_labels = []
134
 
135
  for _, row in df.iterrows():
136
  if 'text' in row:
@@ -138,28 +162,36 @@ def train_simple_model(df, embedder):
138
  features = extract_features(text, embedder)
139
  X.append(features)
140
 
141
- # Find category
142
  label = 'Unknown'
143
  for cat in CATEGORIES.keys():
144
- if f"Code: {cat} Applied" in row:
145
- if row[f"Code: {cat} Applied"] in [True, 1, '1', 'true', 'True']:
 
146
  label = cat
147
  break
148
- y_labels.append(label)
 
 
 
149
 
150
  X = np.array(X)
151
 
152
- # Train classifier
153
  scaler = StandardScaler()
154
  X_scaled = scaler.fit_transform(X)
155
 
 
156
  clf = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)
157
- clf.fit(X_scaled, y_labels)
158
 
159
  return scaler, clf
160
 
161
- def analyze_text(text, embedder, scaler, clf):
162
- # Split into paragraphs
 
 
 
163
  paragraphs = text.split('\n\n')
164
  paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 50]
165
 
@@ -183,120 +215,177 @@ def analyze_text(text, embedder, scaler, clf):
183
 
184
  return results
185
 
186
- # MAIN APP STARTS HERE
187
- st.title("πŸ₯ Medical School Personal Statement Analyzer")
188
- st.markdown("Analyze personal statements based on medical school rubrics")
189
-
190
- # Initialize session state
191
- if 'model_trained' not in st.session_state:
192
- st.session_state.model_trained = False
193
- if 'scaler' not in st.session_state:
194
- st.session_state.scaler = None
195
- if 'clf' not in st.session_state:
196
- st.session_state.clf = None
197
-
198
- # Load transformer
199
- embedder = load_transformer()
200
- if embedder is None:
201
- st.error("Failed to load model. Please refresh the page.")
202
- st.stop()
203
-
204
- # Tabs
205
- tab1, tab2, tab3 = st.tabs(["Train Model", "Analyze Statement", "View Rubrics"])
206
-
207
- with tab1:
208
- st.header("Step 1: Train the Model")
209
 
210
- st.markdown("Upload Excel files with coded personal statement excerpts")
 
 
 
 
 
 
 
 
211
 
212
- uploaded_file = st.file_uploader("Upload Training Data", type=['xlsx', 'csv'])
 
213
 
214
- if uploaded_file:
215
- try:
216
- if uploaded_file.name.endswith('.csv'):
217
- df = pd.read_csv(uploaded_file)
218
- else:
219
- df = pd.read_excel(uploaded_file)
220
-
221
- st.success(f"Loaded {len(df)} rows")
222
 
223
- # Process data
224
- processed_data = []
225
- for _, row in df.iterrows():
226
- text_col = None
227
- for col in ['Excerpt Copy', 'Excerpt', 'Text', 'Content']:
228
- if col in row and pd.notna(row[col]):
229
- text_col = col
230
- break
231
-
232
- if text_col:
233
- processed_data.append({
234
- 'text': str(row[text_col]),
235
- **{col: row[col] for col in row.index if 'Code:' in col}
236
- })
237
 
238
- if processed_data:
239
- train_df = pd.DataFrame(processed_data)
240
-
241
- if st.button("Train Model"):
242
- with st.spinner("Training..."):
243
- scaler, clf = train_simple_model(train_df, embedder)
244
- st.session_state.scaler = scaler
245
- st.session_state.clf = clf
246
- st.session_state.model_trained = True
247
- st.success("Model trained successfully!")
248
- else:
249
- st.error("No valid text data found")
250
-
251
- except Exception as e:
252
- st.error(f"Error: {str(e)}")
253
-
254
- with tab2:
255
- st.header("Step 2: Analyze Personal Statement")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
- if not st.session_state.model_trained:
258
- st.warning("Please train the model first in Step 1")
259
- else:
260
- text_input = st.text_area("Paste your personal statement:", height=300)
261
 
262
- if text_input and st.button("Analyze"):
 
 
 
 
 
 
 
 
 
263
  with st.spinner("Analyzing..."):
264
- results = analyze_text(
265
- text_input,
266
- embedder,
267
- st.session_state.scaler,
268
- st.session_state.clf
269
- )
 
 
 
270
 
271
  st.success("Analysis Complete!")
272
 
273
  # Summary
274
- st.subheader("Summary")
275
  categories_found = list(set([r['category'] for r in results if r['category'] != 'Unknown']))
276
- st.metric("Categories Found", f"{len(categories_found)}/4")
277
 
278
- # Details
279
- st.subheader("Segment Analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  for result in results:
281
  with st.expander(f"Segment {result['segment']}: {result['category']}"):
282
- st.write(f"**Confidence:** {result['confidence']:.1%}")
283
- st.write(f"**Text:** {result['text']}")
 
 
 
284
 
285
  # Recommendations
286
- st.subheader("Recommendations")
287
  missing = [cat for cat in CATEGORIES.keys() if cat not in categories_found]
288
  if missing:
289
- st.warning("Missing categories:")
290
  for cat in missing:
291
- st.write(f"β€’ Add {cat}: {CATEGORIES[cat]['description']}")
292
-
293
- with tab3:
294
- st.header("Scoring Rubrics")
295
 
296
- for category, info in CATEGORIES.items():
297
- with st.expander(category):
298
- st.write(f"**Description:** {info['description']}")
299
- st.write("**Scoring:**")
300
- for score in [4, 3, 2, 1]:
301
- st.write(f"β€’ Score {score}: {info['rubric'][score]}")
302
- st.write(f"**Keywords:** {', '.join(info['keywords'][:5])}...")
 
 
 
 
 
 
 
 
 
 
 
 
3
  import numpy as np
4
  import pickle
5
  import os
 
 
 
 
 
 
 
6
  import re
7
+ from io import BytesIO
 
8
 
9
+ # Page config MUST be first
10
  st.set_page_config(
11
  page_title="Medical School Personal Statement Analyzer",
12
  page_icon="πŸ₯",
13
  layout="wide"
14
  )
15
 
16
+ # Import ML libraries after streamlit
17
+ try:
18
+ from sentence_transformers import SentenceTransformer
19
+ from sklearn.preprocessing import StandardScaler
20
+ from sklearn.ensemble import RandomForestClassifier
21
+ from sklearn.metrics.pairwise import cosine_similarity
22
+ import xgboost as xgb
23
+ ML_AVAILABLE = True
24
+ except ImportError as e:
25
+ ML_AVAILABLE = False
26
+ st.error(f"ML libraries not loaded: {e}")
27
+
28
  # Categories definition
29
  CATEGORIES = {
30
  'Spark': {
31
  'description': 'Opening that spurs interest in medicine',
32
  'keywords': ['growing up', 'childhood', 'family', 'realized', 'inspired', 'first',
33
  'beginning', 'early', 'experience that', 'moment', 'when I was'],
 
34
  'rubric': {
35
  1: 'disconnected or confusing',
36
+ 2: 'somewhat connected but unclear',
37
  3: 'connected and clear',
38
  4: 'engaging and logical flow'
 
 
 
 
39
  }
40
  },
41
  'Healthcare Experience': {
42
  'description': 'Clinical/medical experiences',
43
  'keywords': ['shadowed', 'clinical', 'hospital', 'patient', 'doctor', 'physician',
44
  'medical', 'treatment', 'observed', 'volunteer', 'clinic'],
 
45
  'rubric': {
46
  1: 'passive, uninteresting, negative',
47
  2: 'bland but not problematic',
48
  3: 'interesting and relevant',
49
  4: 'vivid, active, thoughtful, memorable'
 
 
 
 
50
  }
51
  },
52
  'Showing Doctor Qualities': {
53
  'description': 'Leadership and doctor qualities',
54
  'keywords': ['leadership', 'empathy', 'compassion', 'responsibility', 'communication',
55
  'advocate', 'caring', 'helping', 'service', 'volunteer'],
 
56
  'rubric': {
57
  1: 'arrogant, immature, inaccurate',
58
  2: 'bland but not problematic',
59
  3: 'shows some understanding',
60
  4: 'realistic, mature, humble, clear'
 
 
 
 
61
  }
62
  },
63
  'Spin': {
64
  'description': 'Connecting experiences to medical career',
65
  'keywords': ['learned', 'taught me', 'showed me', 'realized', 'understood',
66
  'because', 'therefore', 'this experience', 'prepared me'],
 
67
  'rubric': {
68
  1: 'vague, simplistic, generic',
69
  2: 'some connection but generic',
70
  3: 'clear connection',
71
  4: 'direct, logical, specific argument'
 
 
 
 
72
  }
73
  }
74
  }
75
 
76
+ def load_model():
77
+ """Load the sentence transformer model"""
78
+ if not ML_AVAILABLE:
79
+ return None
 
 
80
  try:
81
+ with st.spinner("Loading AI model..."):
82
+ model = SentenceTransformer('all-MiniLM-L6-v2')
83
+ return model
84
+ except Exception as e:
85
+ st.error(f"Failed to load model: {e}")
86
  return None
87
 
88
+ def analyze_text_simple(text):
89
+ """Simple keyword-based analysis without ML"""
90
+ paragraphs = text.split('\n\n')
91
+ paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 50]
92
+
93
+ if not paragraphs:
94
+ paragraphs = [text]
95
+
96
+ results = []
97
+ for i, para in enumerate(paragraphs):
98
+ para_lower = para.lower()
99
+
100
+ # Find best matching category
101
+ best_category = 'Unknown'
102
+ best_score = 0
103
+
104
+ for cat_name, cat_info in CATEGORIES.items():
105
+ score = sum(1 for kw in cat_info['keywords'] if kw.lower() in para_lower)
106
+ if score > best_score:
107
+ best_score = score
108
+ best_category = cat_name
109
+
110
+ results.append({
111
+ 'segment': i + 1,
112
+ 'category': best_category,
113
+ 'keyword_matches': best_score,
114
+ 'text': para[:200] + '...' if len(para) > 200 else para
115
+ })
116
+
117
+ return results
118
+
119
  def extract_features(text, embedder):
120
+ """Extract features for ML analysis"""
121
  features = []
122
  text_lower = text.lower()
123
  words = text.split()
 
129
  len(set(words)) / max(len(words), 1)
130
  ])
131
 
132
+ # Category keyword features
133
  for cat_name, cat_info in CATEGORIES.items():
134
  keyword_count = sum(1 for kw in cat_info['keywords'] if kw.lower() in text_lower)
135
  features.append(keyword_count / len(cat_info['keywords']))
136
 
137
  # Get embedding
138
+ if embedder:
139
+ try:
140
+ embedding = embedder.encode(text)
141
+ if hasattr(embedding, 'cpu'):
142
+ embedding = embedding.cpu().numpy()
143
+ embedding = embedding.flatten()[:128]
144
+ except:
145
+ embedding = np.zeros(128)
146
+ else:
147
  embedding = np.zeros(128)
148
 
149
  return np.concatenate([features, embedding])
150
 
151
+ def train_model(df, embedder):
152
+ """Train a simple classifier"""
153
+ if not ML_AVAILABLE:
154
+ return None, None
155
+
156
  X = []
157
+ y = []
158
 
159
  for _, row in df.iterrows():
160
  if 'text' in row:
 
162
  features = extract_features(text, embedder)
163
  X.append(features)
164
 
165
+ # Find category label
166
  label = 'Unknown'
167
  for cat in CATEGORIES.keys():
168
+ col_name = f"Code: {cat} Applied"
169
+ if col_name in row:
170
+ if row[col_name] in [True, 1, '1', 'true', 'True', 'yes', 'Yes']:
171
  label = cat
172
  break
173
+ y.append(label)
174
+
175
+ if not X:
176
+ return None, None
177
 
178
  X = np.array(X)
179
 
180
+ # Scale features
181
  scaler = StandardScaler()
182
  X_scaled = scaler.fit_transform(X)
183
 
184
+ # Train classifier
185
  clf = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)
186
+ clf.fit(X_scaled, y)
187
 
188
  return scaler, clf
189
 
190
+ def analyze_with_model(text, embedder, scaler, clf):
191
+ """Analyze text using trained model"""
192
+ if not ML_AVAILABLE or not all([embedder, scaler, clf]):
193
+ return analyze_text_simple(text)
194
+
195
  paragraphs = text.split('\n\n')
196
  paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 50]
197
 
 
215
 
216
  return results
217
 
218
+ # Main App
219
+ def main():
220
+ st.title("πŸ₯ Medical School Personal Statement Analyzer")
221
+ st.markdown("Analyze personal statements based on medical school admission rubrics")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
+ # Initialize session state
224
+ if 'model_trained' not in st.session_state:
225
+ st.session_state['model_trained'] = False
226
+ if 'embedder' not in st.session_state:
227
+ st.session_state['embedder'] = None
228
+ if 'scaler' not in st.session_state:
229
+ st.session_state['scaler'] = None
230
+ if 'clf' not in st.session_state:
231
+ st.session_state['clf'] = None
232
 
233
+ # Tabs
234
+ tab1, tab2, tab3 = st.tabs(["πŸ“š Train Model", "πŸ“ Analyze Statement", "πŸ“Š View Rubrics"])
235
 
236
+ with tab1:
237
+ st.header("Train the AI Model")
238
+
239
+ if ML_AVAILABLE:
240
+ st.info("Upload an Excel file with coded personal statement excerpts to train the model.")
 
 
 
241
 
242
+ uploaded_file = st.file_uploader("Upload Training Data", type=['xlsx', 'csv'])
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
+ if uploaded_file:
245
+ try:
246
+ # Read file
247
+ if uploaded_file.name.endswith('.csv'):
248
+ df = pd.read_csv(uploaded_file)
249
+ else:
250
+ df = pd.read_excel(uploaded_file)
251
+
252
+ st.success(f"Loaded {len(df)} rows")
253
+
254
+ # Show sample of data
255
+ st.write("Sample of data:")
256
+ st.dataframe(df.head())
257
+
258
+ # Process data
259
+ processed_data = []
260
+ for _, row in df.iterrows():
261
+ # Find text column
262
+ text_col = None
263
+ for col in ['Excerpt Copy', 'Excerpt', 'Text', 'Content']:
264
+ if col in df.columns and pd.notna(row[col]):
265
+ text_col = col
266
+ break
267
+
268
+ if text_col:
269
+ row_data = {'text': str(row[text_col])}
270
+ # Add category columns
271
+ for col in df.columns:
272
+ if 'Code:' in col:
273
+ row_data[col] = row[col]
274
+ processed_data.append(row_data)
275
+
276
+ if processed_data:
277
+ train_df = pd.DataFrame(processed_data)
278
+ st.write(f"Found {len(train_df)} valid training samples")
279
+
280
+ if st.button("Train Model", type="primary"):
281
+ # Load embedder if needed
282
+ if st.session_state['embedder'] is None:
283
+ st.session_state['embedder'] = load_model()
284
+
285
+ if st.session_state['embedder']:
286
+ with st.spinner("Training model..."):
287
+ scaler, clf = train_model(train_df, st.session_state['embedder'])
288
+
289
+ if scaler and clf:
290
+ st.session_state['scaler'] = scaler
291
+ st.session_state['clf'] = clf
292
+ st.session_state['model_trained'] = True
293
+ st.success("βœ… Model trained successfully!")
294
+ else:
295
+ st.error("Training failed. Check your data format.")
296
+ else:
297
+ st.error("Could not load the AI model.")
298
+ else:
299
+ st.error("No valid text data found in the file.")
300
+
301
+ except Exception as e:
302
+ st.error(f"Error reading file: {e}")
303
+ else:
304
+ st.warning("ML libraries not available. Using keyword-based analysis only.")
305
 
306
+ with tab2:
307
+ st.header("Analyze Personal Statement")
 
 
308
 
309
+ analysis_method = "ML" if st.session_state['model_trained'] else "Keyword"
310
+ st.info(f"Using {analysis_method}-based analysis")
311
+
312
+ text_input = st.text_area(
313
+ "Paste your personal statement here:",
314
+ height=300,
315
+ placeholder="Enter your personal statement text..."
316
+ )
317
+
318
+ if text_input and st.button("Analyze", type="primary"):
319
  with st.spinner("Analyzing..."):
320
+ if st.session_state['model_trained']:
321
+ results = analyze_with_model(
322
+ text_input,
323
+ st.session_state['embedder'],
324
+ st.session_state['scaler'],
325
+ st.session_state['clf']
326
+ )
327
+ else:
328
+ results = analyze_text_simple(text_input)
329
 
330
  st.success("Analysis Complete!")
331
 
332
  # Summary
333
+ st.subheader("πŸ“Š Summary")
334
  categories_found = list(set([r['category'] for r in results if r['category'] != 'Unknown']))
 
335
 
336
+ col1, col2, col3 = st.columns(3)
337
+ with col1:
338
+ st.metric("Categories Found", f"{len(categories_found)}/4")
339
+ with col2:
340
+ st.metric("Segments Analyzed", len(results))
341
+ with col3:
342
+ quality = "Good" if len(categories_found) >= 3 else "Needs Work"
343
+ st.metric("Overall", quality)
344
+
345
+ # Category presence
346
+ st.subheader("πŸ“‹ Category Coverage")
347
+ for cat in CATEGORIES.keys():
348
+ if cat in categories_found:
349
+ st.write(f"βœ… **{cat}**: Found")
350
+ else:
351
+ st.write(f"❌ **{cat}**: Not detected")
352
+
353
+ # Segment details
354
+ st.subheader("πŸ“ Segment Analysis")
355
  for result in results:
356
  with st.expander(f"Segment {result['segment']}: {result['category']}"):
357
+ if 'confidence' in result:
358
+ st.write(f"**Confidence:** {result['confidence']:.1%}")
359
+ elif 'keyword_matches' in result:
360
+ st.write(f"**Keyword Matches:** {result['keyword_matches']}")
361
+ st.write(f"**Text Preview:** {result['text']}")
362
 
363
  # Recommendations
364
+ st.subheader("πŸ’‘ Recommendations")
365
  missing = [cat for cat in CATEGORIES.keys() if cat not in categories_found]
366
  if missing:
367
+ st.warning("**Missing Categories - Add content for:**")
368
  for cat in missing:
369
+ st.write(f"β€’ **{cat}**: {CATEGORIES[cat]['description']}")
370
+ st.write(f" Keywords: {', '.join(CATEGORIES[cat]['keywords'][:5])}...")
371
+ else:
372
+ st.success("Great! All categories are represented in your statement.")
373
 
374
+ with tab3:
375
+ st.header("Scoring Rubrics")
376
+ st.info("Understanding how each category is evaluated")
377
+
378
+ for category, info in CATEGORIES.items():
379
+ with st.expander(f"**{category}** - {info['description']}"):
380
+ st.write("**Scoring Criteria:**")
381
+ for score in [4, 3, 2, 1]:
382
+ quality = ['Poor', 'Below Average', 'Good', 'Excellent'][score-1]
383
+ st.write(f"β€’ **Score {score} ({quality}):** {info['rubric'][score]}")
384
+ st.write(f"\n**Key Terms:** {', '.join(info['keywords'])}")
385
+
386
+ # Run the app
387
+ if __name__ == "__main__":
388
+ main()
389
+ else:
390
+ # This ensures the app runs when imported by Streamlit
391
+ main()