jeffrey1963 commited on
Commit
0eff467
·
verified ·
1 Parent(s): 01260a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -94
app.py CHANGED
@@ -70,13 +70,11 @@ def _docx_to_table_and_text(fileobj) -> tuple[pd.DataFrame|None, str]:
70
  # try to find a depreciation table
71
  for t in doc.tables:
72
  rows = [[c.text.strip() for c in r.cells] for r in t.rows]
73
- if not rows:
74
  continue
75
  hdr = rows[0]
76
- # minimal shape like your HW: Year | Begin BV | Depreciation | Accum | End BV
77
  if len(hdr) >= 4 and any("year" in _norm_name(h) for h in hdr):
78
  df = pd.DataFrame(rows[1:], columns=hdr)
79
- # drop empty rows
80
  df = df[~(df.astype(str).apply(lambda r: "".join(r), axis=1).str.strip() == "")]
81
  if not df.empty:
82
  return df, all_text
@@ -97,7 +95,6 @@ def _table_from_ocr_text(text: str) -> pd.DataFrame|None:
97
  return None
98
  lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
99
 
100
- # Strict header (your class format)
101
  hdr_i = -1
102
  for i, ln in enumerate(lines):
103
  low = ln.lower()
@@ -105,7 +102,6 @@ def _table_from_ocr_text(text: str) -> pd.DataFrame|None:
105
  hdr_i = i
106
  break
107
  if hdr_i == -1:
108
- # looser: split by >=2 spaces or tabs, header row must have >=4 cols
109
  for i, ln in enumerate(lines):
110
  parts = re.split(r"\s{2,}|\t+", ln)
111
  low = ln.lower()
@@ -122,7 +118,6 @@ def _table_from_ocr_text(text: str) -> pd.DataFrame|None:
122
  if len(parts) == len(header):
123
  data.append(parts)
124
  else:
125
- # stop at first badly formatted line after we’ve started reading data
126
  if len(data) >= 1:
127
  break
128
  if not data:
@@ -144,17 +139,14 @@ def _normalize_depr_columns(df_in: pd.DataFrame) -> pd.DataFrame:
144
  out["Accum Dep"] = df[c_acc] if c_acc else pd.NA
145
  out["End BV"] = df[c_end] if c_end else pd.NA
146
 
147
- # numeric coerce for values except year (year also coerced)
148
  out["Year"] = pd.to_numeric(out["Year"], errors="coerce")
149
  for col in ["Begin BV","Depreciation","Accum Dep","End BV"]:
150
  out[col] = out[col].map(_coerce_numeric)
151
- # drop empty rows
152
  out = out[~out[["Begin BV","Depreciation","Accum Dep","End BV"]].isna().all(axis=1)].reset_index(drop=True)
153
  return out
154
 
155
  # Monday Aug 11 New helpers
156
  def build_sl_schedule(cost: float, salvage: float, life: int, start_year: int):
157
- """Return the straight-line depreciation schedule as a DataFrame."""
158
  dep = (cost - salvage) / life
159
  years = [start_year + i for i in range(life)]
160
  begin_bv, dep_col, accum, end_bv = [], [], [], []
@@ -179,10 +171,8 @@ def build_sl_schedule(cost: float, salvage: float, life: int, start_year: int):
179
  return out
180
 
181
  def audit_against_expected(expected: pd.DataFrame, actual: pd.DataFrame):
182
- """Row-by-row deltas (actual - expected). Assumes normalized col names."""
183
  if actual is None or actual.empty:
184
  return pd.DataFrame(), "No student table found to check."
185
- # align only overlapping years
186
  merged = expected.merge(
187
  actual[["Year","Begin BV","Depreciation","Accum Dep","End BV"]],
188
  on="Year", how="inner", suffixes=("_exp","_act")
@@ -192,7 +182,6 @@ def audit_against_expected(expected: pd.DataFrame, actual: pd.DataFrame):
192
  deltas = pd.DataFrame({"Year": merged["Year"]})
193
  for c in ["Begin BV","Depreciation","Accum Dep","End BV"]:
194
  deltas[c + " Δ"] = merged[f"{c}_act"] - merged[f"{c}_exp"]
195
- # first mismatch helper
196
  first_bad = None
197
  for _, r in deltas.iterrows():
198
  if any(abs(r[col]) > 1e-6 for col in deltas.columns if col.endswith("Δ")):
@@ -205,8 +194,6 @@ def audit_against_expected(expected: pd.DataFrame, actual: pd.DataFrame):
205
  )
206
  return deltas, msg
207
 
208
-
209
-
210
  # ---------- Gradio callbacks ----------
211
  def _params_tuple(p):
212
  p = p or {}
@@ -219,9 +206,7 @@ def _params_tuple(p):
219
 
220
  def handle_docx(file):
221
  if file is None:
222
- # header, params_json, norm_df, cost, salv, life, year, state_params, state_table
223
  return "(no file)", {}, pd.DataFrame(), 0.0, 0.0, 10, pd.Timestamp.now().year, {}, pd.DataFrame()
224
-
225
  df_raw, header = _docx_to_table_and_text(file.name if hasattr(file, "name") else file)
226
  params = _extract_params(header or "")
227
  df_norm = _normalize_depr_columns(df_raw) if df_raw is not None else None
@@ -230,7 +215,7 @@ def handle_docx(file):
230
  header or "(no text found)",
231
  params,
232
  (df_norm if df_norm is not None else pd.DataFrame()),
233
- cost, salv, life, year, # ⬅ push directly to the inputs
234
  params,
235
  (df_norm if df_norm is not None else pd.DataFrame()),
236
  )
@@ -250,36 +235,47 @@ def handle_image(img):
250
  params,
251
  df_raw,
252
  (df_norm if df_norm is not None else pd.DataFrame()),
253
- cost, salv, life, year, # ⬅ push directly to the inputs
254
  params,
255
  (df_norm if df_norm is not None else pd.DataFrame()),
256
  )
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  # ---------- UI ----------
260
  with gr.Blocks(title="Jerry • HW Intake (Echo)") as demo:
261
  last_params = gr.State({})
262
  last_table = gr.State(pd.DataFrame())
263
  gr.Markdown("## Jerry (TA) – Homework Intake\nThis Space **only reads and echoes** your files.\nNext step will add solving & coaching.")
 
 
264
  with gr.Tab("Upload .docx"):
265
  docx_in = gr.File(file_types=[".docx"], label="Homework .docx")
266
  btn1 = gr.Button("Read")
267
  header_txt = gr.Textbox(label="Header/Text (for params)", lines=8)
268
  params_json = gr.JSON(label="Detected parameters")
269
  table_df = gr.Dataframe(label="Detected table (normalized)", interactive=False)
270
- btn1.click(
271
- handle_docx,
272
- inputs=docx_in,
273
- outputs=[
274
- header_txt, # text
275
- params_json, # json
276
- table_df, # norm table visible in tab 1
277
- in_cost, in_salv, in_life, in_year, # ⬅ autofill the four inputs
278
- last_params, # state
279
- last_table, # state
280
- ],
281
- )
282
 
 
283
  with gr.Tab("Upload Image (.png/.jpg)"):
284
  img_in = gr.Image(type="pil", label="Photo or screenshot of your table")
285
  btn2 = gr.Button("OCR")
@@ -287,20 +283,8 @@ with gr.Blocks(title="Jerry • HW Intake (Echo)") as demo:
287
  params_json2 = gr.JSON(label="Detected parameters")
288
  raw_df = gr.Dataframe(label="Raw table guess", interactive=False)
289
  norm_df = gr.Dataframe(label="Detected table (normalized)", interactive=False)
290
- btn2.click(
291
- handle_image,
292
- inputs=img_in,
293
- outputs=[
294
- ocr_txt, # raw OCR text
295
- params_json2, # json
296
- raw_df, # raw table
297
- norm_df, # norm table visible in tab 2
298
- in_cost, in_salv, in_life, in_year, # ⬅ autofill the four inputs
299
- last_params, # state
300
- last_table, # state
301
- ],
302
- )
303
 
 
304
  with gr.Tab("Straight-Line • Solve & Check"):
305
  gr.Markdown("Enter params (auto-filled if detected) → build the correct SL schedule → compare to your uploaded table.")
306
  with gr.Row():
@@ -308,67 +292,46 @@ with gr.Blocks(title="Jerry • HW Intake (Echo)") as demo:
308
  in_salv = gr.Number(label="Salvage", value=0.0)
309
  in_life = gr.Number(label="Life (years)", value=10, precision=0)
310
  in_year = gr.Number(label="Start year", value=2025, precision=0)
311
- # added here
312
- btn_use = gr.Button("Use detected params")
313
-
314
- def fill_from_state(p):
315
- p = p or {}
316
- return (
317
- float(p.get("cost", 0.0)),
318
- float(p.get("salvage", 0.0)),
319
- int(p.get("life", 10)),
320
- int(p.get("start_year", pd.Timestamp.now().year)),
321
- )
322
-
323
- btn_use.click(fill_from_state, inputs=last_params,
324
- outputs=[in_cost, in_salv, in_life, in_year])
325
 
 
326
  btn_build = gr.Button("Build expected schedule")
327
  expected_df = gr.Dataframe(label="Expected (SL) schedule", interactive=False)
328
  btn_check = gr.Button("Check against uploaded table")
329
  deltas_df = gr.Dataframe(label="Differences (student − expected)", interactive=False)
330
  coach_txt = gr.Markdown()
331
 
332
- def fill_from_state(p):
333
- # prefill controls when we have parsed params
334
- p = p or {}
335
- return (
336
- p.get("cost", 0.0),
337
- p.get("salvage", 0.0),
338
- p.get("life", 10),
339
- p.get("start_year", pd.Timestamp.now().year),
340
- )
341
-
342
- def build_cb(cost, salv, life, year):
343
- try:
344
- df = build_sl_schedule(float(cost), float(salv), int(life), int(year))
345
- except Exception as e:
346
- return pd.DataFrame([{"error": str(e)}])
347
- return df
348
-
349
- def check_cb(cost, salv, life, year, table):
350
- exp = build_sl_schedule(float(cost), float(salv), int(life), int(year))
351
- deltas, msg = audit_against_expected(exp, table if isinstance(table, pd.DataFrame) else pd.DataFrame())
352
- return deltas, msg
353
-
354
- # Wire up
355
- btn_build.click(build_cb, [in_cost, in_salv, in_life, in_year], [expected_df])
356
- btn_check.click(check_cb, [in_cost, in_salv, in_life, in_year, last_table], [deltas_df, coach_txt])
357
-
358
-
359
-
360
-
361
-
362
-
363
-
364
- # Auto-fill inputs whenever we parse new params
365
- def prefill_inputs(p): return fill_from_state(p)
366
-
367
 
368
-
 
 
 
 
 
 
 
 
 
 
 
 
369
 
 
 
 
370
 
371
-
372
  gr.Markdown("— Echo mode finished. When this looks good, we’ll plug in the SL solver + coaching.")
373
 
374
  if __name__ == "__main__":
 
70
  # try to find a depreciation table
71
  for t in doc.tables:
72
  rows = [[c.text.strip() for c in r.cells] for r in t.rows]
73
+ if not rows:
74
  continue
75
  hdr = rows[0]
 
76
  if len(hdr) >= 4 and any("year" in _norm_name(h) for h in hdr):
77
  df = pd.DataFrame(rows[1:], columns=hdr)
 
78
  df = df[~(df.astype(str).apply(lambda r: "".join(r), axis=1).str.strip() == "")]
79
  if not df.empty:
80
  return df, all_text
 
95
  return None
96
  lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
97
 
 
98
  hdr_i = -1
99
  for i, ln in enumerate(lines):
100
  low = ln.lower()
 
102
  hdr_i = i
103
  break
104
  if hdr_i == -1:
 
105
  for i, ln in enumerate(lines):
106
  parts = re.split(r"\s{2,}|\t+", ln)
107
  low = ln.lower()
 
118
  if len(parts) == len(header):
119
  data.append(parts)
120
  else:
 
121
  if len(data) >= 1:
122
  break
123
  if not data:
 
139
  out["Accum Dep"] = df[c_acc] if c_acc else pd.NA
140
  out["End BV"] = df[c_end] if c_end else pd.NA
141
 
 
142
  out["Year"] = pd.to_numeric(out["Year"], errors="coerce")
143
  for col in ["Begin BV","Depreciation","Accum Dep","End BV"]:
144
  out[col] = out[col].map(_coerce_numeric)
 
145
  out = out[~out[["Begin BV","Depreciation","Accum Dep","End BV"]].isna().all(axis=1)].reset_index(drop=True)
146
  return out
147
 
148
  # Monday Aug 11 New helpers
149
  def build_sl_schedule(cost: float, salvage: float, life: int, start_year: int):
 
150
  dep = (cost - salvage) / life
151
  years = [start_year + i for i in range(life)]
152
  begin_bv, dep_col, accum, end_bv = [], [], [], []
 
171
  return out
172
 
173
  def audit_against_expected(expected: pd.DataFrame, actual: pd.DataFrame):
 
174
  if actual is None or actual.empty:
175
  return pd.DataFrame(), "No student table found to check."
 
176
  merged = expected.merge(
177
  actual[["Year","Begin BV","Depreciation","Accum Dep","End BV"]],
178
  on="Year", how="inner", suffixes=("_exp","_act")
 
182
  deltas = pd.DataFrame({"Year": merged["Year"]})
183
  for c in ["Begin BV","Depreciation","Accum Dep","End BV"]:
184
  deltas[c + " Δ"] = merged[f"{c}_act"] - merged[f"{c}_exp"]
 
185
  first_bad = None
186
  for _, r in deltas.iterrows():
187
  if any(abs(r[col]) > 1e-6 for col in deltas.columns if col.endswith("Δ")):
 
194
  )
195
  return deltas, msg
196
 
 
 
197
  # ---------- Gradio callbacks ----------
198
  def _params_tuple(p):
199
  p = p or {}
 
206
 
207
  def handle_docx(file):
208
  if file is None:
 
209
  return "(no file)", {}, pd.DataFrame(), 0.0, 0.0, 10, pd.Timestamp.now().year, {}, pd.DataFrame()
 
210
  df_raw, header = _docx_to_table_and_text(file.name if hasattr(file, "name") else file)
211
  params = _extract_params(header or "")
212
  df_norm = _normalize_depr_columns(df_raw) if df_raw is not None else None
 
215
  header or "(no text found)",
216
  params,
217
  (df_norm if df_norm is not None else pd.DataFrame()),
218
+ cost, salv, life, year,
219
  params,
220
  (df_norm if df_norm is not None else pd.DataFrame()),
221
  )
 
235
  params,
236
  df_raw,
237
  (df_norm if df_norm is not None else pd.DataFrame()),
238
+ cost, salv, life, year,
239
  params,
240
  (df_norm if df_norm is not None else pd.DataFrame()),
241
  )
242
 
243
+ def fill_from_state(p):
244
+ p = p or {}
245
+ return (
246
+ float(p.get("cost", 0.0)),
247
+ float(p.get("salvage", 0.0)),
248
+ int(p.get("life", 10)),
249
+ int(p.get("start_year", pd.Timestamp.now().year)),
250
+ )
251
+
252
+ def build_cb(cost, salv, life, year):
253
+ try:
254
+ df = build_sl_schedule(float(cost), float(salv), int(life), int(year))
255
+ except Exception as e:
256
+ return pd.DataFrame([{"error": str(e)}])
257
+ return df
258
+
259
+ def check_cb(cost, salv, life, year, table):
260
+ exp = build_sl_schedule(float(cost), float(salv), int(life), int(year))
261
+ deltas, msg = audit_against_expected(exp, table if isinstance(table, pd.DataFrame) else pd.DataFrame())
262
+ return deltas, msg
263
 
264
  # ---------- UI ----------
265
  with gr.Blocks(title="Jerry • HW Intake (Echo)") as demo:
266
  last_params = gr.State({})
267
  last_table = gr.State(pd.DataFrame())
268
  gr.Markdown("## Jerry (TA) – Homework Intake\nThis Space **only reads and echoes** your files.\nNext step will add solving & coaching.")
269
+
270
+ # --- Tab 1: DOCX ---
271
  with gr.Tab("Upload .docx"):
272
  docx_in = gr.File(file_types=[".docx"], label="Homework .docx")
273
  btn1 = gr.Button("Read")
274
  header_txt = gr.Textbox(label="Header/Text (for params)", lines=8)
275
  params_json = gr.JSON(label="Detected parameters")
276
  table_df = gr.Dataframe(label="Detected table (normalized)", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
+ # --- Tab 2: Image ---
279
  with gr.Tab("Upload Image (.png/.jpg)"):
280
  img_in = gr.Image(type="pil", label="Photo or screenshot of your table")
281
  btn2 = gr.Button("OCR")
 
283
  params_json2 = gr.JSON(label="Detected parameters")
284
  raw_df = gr.Dataframe(label="Raw table guess", interactive=False)
285
  norm_df = gr.Dataframe(label="Detected table (normalized)", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
+ # --- Tab 3: Solve & Check ---
288
  with gr.Tab("Straight-Line • Solve & Check"):
289
  gr.Markdown("Enter params (auto-filled if detected) → build the correct SL schedule → compare to your uploaded table.")
290
  with gr.Row():
 
292
  in_salv = gr.Number(label="Salvage", value=0.0)
293
  in_life = gr.Number(label="Life (years)", value=10, precision=0)
294
  in_year = gr.Number(label="Start year", value=2025, precision=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
+ btn_use = gr.Button("Use detected params")
297
  btn_build = gr.Button("Build expected schedule")
298
  expected_df = gr.Dataframe(label="Expected (SL) schedule", interactive=False)
299
  btn_check = gr.Button("Check against uploaded table")
300
  deltas_df = gr.Dataframe(label="Differences (student − expected)", interactive=False)
301
  coach_txt = gr.Markdown()
302
 
303
+ # ---------- Wire events AFTER all components exist ----------
304
+ btn1.click(
305
+ handle_docx,
306
+ inputs=docx_in,
307
+ outputs=[
308
+ header_txt, # text
309
+ params_json, # json
310
+ table_df, # normalized table (tab 1)
311
+ in_cost, in_salv, in_life, in_year, # autofill inputs
312
+ last_params, # state
313
+ last_table, # state
314
+ ],
315
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
+ btn2.click(
318
+ handle_image,
319
+ inputs=img_in,
320
+ outputs=[
321
+ ocr_txt, # raw OCR text
322
+ params_json2, # json
323
+ raw_df, # raw table
324
+ norm_df, # normalized table (tab 2)
325
+ in_cost, in_salv, in_life, in_year, # autofill inputs
326
+ last_params, # state
327
+ last_table, # state
328
+ ],
329
+ )
330
 
331
+ btn_use.click(fill_from_state, inputs=last_params, outputs=[in_cost, in_salv, in_life, in_year])
332
+ btn_build.click(build_cb, [in_cost, in_salv, in_life, in_year], [expected_df])
333
+ btn_check.click(check_cb, [in_cost, in_salv, in_life, in_year, last_table], [deltas_df, coach_txt])
334
 
 
335
  gr.Markdown("— Echo mode finished. When this looks good, we’ll plug in the SL solver + coaching.")
336
 
337
  if __name__ == "__main__":