Spaces:
Sleeping
Sleeping
| # app.py β Study Buddy (transparent TF-IDF MCQ helper) | |
| # Hugging Face Space: Gradio app (no API keys required) | |
| from dataclasses import dataclass | |
| from typing import List, Tuple | |
| import re | |
| import numpy as np | |
| import gradio as gr | |
| # scikit-learn | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # -------------------------- | |
| # Default corpus (your text) | |
| # -------------------------- | |
| DEFAULT_NOTES = [ | |
| ("drought_planning", """ | |
| Drought planning should consider risk, cash flow, forage, and animal welfare. | |
| Short term options include purchasing supplemental feed, leasing additional pasture, or modestly destocking (e.g., 10β15%). | |
| Leasing pasture helps preserve home forage and avoids overgrazing, often with lower upfront cash than large feed purchases. | |
| Waiting for rain without a contingency is risky due to price and yield volatility. | |
| """), | |
| ("depreciation_basics", """ | |
| Straight line depreciation allocates (Cost β Salvage) evenly over an asset's useful life. | |
| Annual Depreciation = (Cost β Salvage) / Life. | |
| Ending book value should not fall below the salvage value. | |
| """), | |
| ("stocking_rate", """ | |
| Rotational grazing with adequate rest improves pasture condition and resilience. | |
| During drought or slow growth, reduce stocking rate by 10β15% to protect ground cover and avoid overuse. | |
| Short term leasing of nearby pasture can also relieve pressure and stabilize production. | |
| """), | |
| ("budgeting_cashflow", """ | |
| Enterprise budgets compare revenues and costs per activity; sensitivity analysis helps evaluate risk. | |
| When cash flow is tight, options with lower upfront cost or flexible payment terms can be preferable. | |
| """), | |
| ] | |
| DEFAULT_CORPUS_TEXT = """# Study Buddy Corpus (edit me) | |
| # Format suggestions: | |
| # - You can leave this as simple text. Paragraphs/sentences become "passages". | |
| # - Optionally add section headings like: ### drought_planning | |
| ### drought_planning | |
| Drought planning should consider risk, cash flow, forage, and animal welfare. | |
| Short term options include purchasing supplemental feed, leasing additional pasture, or modestly destocking (e.g., 10β15%). | |
| Leasing pasture helps preserve home forage and avoids overgrazing, often with lower upfront cash than large feed purchases. | |
| Waiting for rain without a contingency is risky due to price and yield volatility. | |
| ### depreciation_basics | |
| Straight line depreciation allocates (Cost β Salvage) evenly over an asset's useful life. | |
| Annual Depreciation = (Cost β Salvage) / Life. | |
| Ending book value should not fall below the salvage value. | |
| ### stocking_rate | |
| Rotational grazing with adequate rest improves pasture condition and resilience. | |
| During drought or slow growth, reduce stocking rate by 10β15% to protect ground cover and avoid overuse. | |
| Short term leasing of nearby pasture can also relieve pressure and stabilize production. | |
| ### budgeting_cashflow | |
| Enterprise budgets compare revenues and costs per activity; sensitivity analysis helps evaluate risk. | |
| When cash flow is tight, options with lower upfront cost or flexible payment terms can be preferable. | |
| """ | |
| # -------------------------- | |
| # Passage splitting & index | |
| # -------------------------- | |
| def split_passages(text: str, min_len=30) -> List[str]: | |
| # naive sentence-ish split; keep only nontrivial lines | |
| rough = re.split(r'[.\n]+', text) | |
| out = [] | |
| for s in (t.strip() for t in rough): | |
| if len(s) >= min_len: | |
| out.append(s) | |
| return out | |
| def parse_corpus_to_notes(corpus_text: str) -> List[Tuple[str, str]]: | |
| """ | |
| Parses the editable textarea into (doc_id, text) chunks. | |
| If the user includes lines like '### section_name', use them as doc_ids, | |
| otherwise everything becomes a single doc 'notes'. | |
| """ | |
| lines = corpus_text.splitlines() | |
| notes: List[Tuple[str, str]] = [] | |
| current_id = "notes" | |
| current_buf: List[str] = [] | |
| def flush(): | |
| nonlocal notes, current_id, current_buf | |
| if current_buf: | |
| notes.append((current_id, "\n".join(current_buf).strip())) | |
| current_buf = [] | |
| for ln in lines: | |
| m = re.match(r'^\s*#{3,}\s*(.+?)\s*$', ln) # ### heading | |
| if m: | |
| flush() | |
| # Normalize id | |
| current_id = re.sub(r'[^a-z0-9_]+', '_', m.group(1).strip().lower()) | |
| else: | |
| current_buf.append(ln) | |
| flush() | |
| # Fallback to defaults if parsed content is too small | |
| ok_notes = [(i, t) for i, t in notes if len(t.strip()) >= 30] | |
| return ok_notes if ok_notes else DEFAULT_NOTES | |
| class PassageIndex: | |
| vectorizer: TfidfVectorizer | |
| X: np.ndarray | |
| ids: List[str] | |
| texts: List[str] | |
| def build_corpus_passages(notes: List[Tuple[str, str]]) -> List[Tuple[str, str]]: | |
| passages = [] | |
| for doc_id, txt in notes: | |
| for p in split_passages(txt): | |
| passages.append((doc_id, p)) | |
| return passages | |
| def build_index_from_corpus(corpus_text: str) -> PassageIndex: | |
| notes = parse_corpus_to_notes(corpus_text) | |
| passages = build_corpus_passages(notes) | |
| ids = [d for d, _ in passages] | |
| texts = [t for _, t in passages] | |
| if not texts: | |
| # Ensure vectorizer has at least one doc | |
| texts = ["placeholder"] | |
| ids = ["placeholder"] | |
| vec = TfidfVectorizer(lowercase=True, ngram_range=(1, 2), max_df=0.9, min_df=1) | |
| X = vec.fit_transform(texts) | |
| return PassageIndex(vec, X, ids, texts) | |
| # -------------------------- | |
| # MCQ scoring | |
| # -------------------------- | |
| class OptionScore: | |
| label: str | |
| text: str | |
| total: float | |
| evidence: List[Tuple[float, str]] | |
| def score_option(index: PassageIndex, question: str, option_text: str, top_k=3) -> Tuple[float, List[Tuple[float, str]]]: | |
| query = (question + " " + option_text).strip() | |
| qv = index.vectorizer.transform([query]) | |
| sims = cosine_similarity(qv, index.X)[0] | |
| top_idx = np.argsort(sims)[::-1][:top_k] | |
| evidence = [(float(sims[i]), index.texts[i]) for i in top_idx] | |
| total = float(np.mean([s for s, _ in evidence])) if evidence else 0.0 | |
| return total, evidence | |
| def answer_mcq(index: PassageIndex, question: str, options: List[Tuple[str, str]], top_k=3) -> OptionScore: | |
| scored = [] | |
| for lab, opt in options: | |
| tot, ev = score_option(index, question, opt, top_k=top_k) | |
| scored.append(OptionScore(lab, opt, tot, ev)) | |
| scored.sort(key=lambda r: r.total, reverse=True) | |
| return scored[0] if scored else OptionScore("-", "(no options)", 0.0, []) | |
| # -------------------------- | |
| # Gradio UI logic | |
| # -------------------------- | |
| def rebuild_index(corpus_text): | |
| idx = build_index_from_corpus(corpus_text) | |
| return idx, "β Corpus indexed (passages = {})".format(len(idx.texts)) | |
| def run_mcq(idx: PassageIndex, question: str, opts: List[str], top_k: int): | |
| # Parse options: keep non-empty; label A..F | |
| labels = list("ABCDEF") | |
| pairs = [(labels[i], o.strip()) for i, o in enumerate(opts) if o and o.strip()] | |
| if not question.strip() or not pairs: | |
| return "Enter a question and at least one option.", "" | |
| best = answer_mcq(idx, question, pairs, top_k=top_k) | |
| # Pretty output | |
| head = f"β PICK: {best.label}) {best.text} [score={best.total:.4f}]" | |
| ev = "\n".join([f" β’ ({s:.4f}) {p}" for s, p in best.evidence]) | |
| return head, f"Evidence passages:\n{ev}" | |
| with gr.Blocks(title="Study Buddy β Transparent MCQ Helper") as demo: | |
| gr.Markdown("# Study Buddy β Transparent MCQ Helper\nA tiny TF-IDF + cosine similarity bot. Paste your notes, build an index, then ask a multiple-choice question. The bot shows the top evidence it used.") | |
| with gr.Tab("Corpus"): | |
| corpus_text = gr.Textbox(value=DEFAULT_CORPUS_TEXT, lines=20, label="Your notes (editable)") | |
| build_btn = gr.Button("Rebuild Corpus Index") | |
| status = gr.Markdown() | |
| state_idx = gr.State(build_index_from_corpus(DEFAULT_CORPUS_TEXT)) | |
| build_btn.click(fn=rebuild_index, inputs=corpus_text, outputs=[state_idx, status]) | |
| with gr.Tab("Ask MCQ"): | |
| q = gr.Textbox(lines=3, label="Question") | |
| with gr.Row(): | |
| oA = gr.Textbox(label="Option A") | |
| oB = gr.Textbox(label="Option B") | |
| with gr.Row(): | |
| oC = gr.Textbox(label="Option C") | |
| oD = gr.Textbox(label="Option D") | |
| with gr.Accordion("Add more options (optional)", open=False): | |
| with gr.Row(): | |
| oE = gr.Textbox(label="Option E") | |
| oF = gr.Textbox(label="Option F") | |
| topk = gr.Slider(1, 5, value=3, step=1, label="Evidence passages to average (k)") | |
| go = gr.Button("Answer") | |
| pick = gr.Markdown() | |
| evidence = gr.Markdown() | |
| def _run(idx, q, a, b, c, d, e, f, k): | |
| return run_mcq(idx, q, [a, b, c, d, e, f], int(k)) | |
| go.click(_run, inputs=[state_idx, q, oA, oB, oC, oD, oE, oF, topk], outputs=[pick, evidence]) | |
| with gr.Tab("Demo (3 sample MCQs)"): | |
| gr.Markdown("Click a button to load a sample Q with options.") | |
| demo_out = gr.Markdown() | |
| def load_demo(which): | |
| if which == "Q1": | |
| q = "Given elevated drought risk and limited cash, which short term strategy is best?" | |
| opts = ["Purchase a large quantity of supplemental feed immediately", | |
| "Wait two months and hope for rain", | |
| "Lease additional pasture short term to relieve pressure", | |
| "Increase stocking rate to maintain output"] | |
| elif which == "Q2": | |
| q = "Under straight line depreciation, what is the annual depreciation formula?" | |
| opts = ["Annual Depreciation = Cost / Life", | |
| "Annual Depreciation = (Cost β Salvage) / Life", | |
| "Annual Depreciation = (Cost β Salvage) Γ Life", | |
| "Annual Depreciation equals Ending BV β Beginning BV"] | |
| else: | |
| q = "During slow pasture growth in a rotational system, which action is most conservative?" | |
| opts = ["Reduce stocking rate by around 10β15%", | |
| "Wait for rain without changing anything", | |
| "Graze all paddocks continuously to keep animals fed", | |
| "Overseed high traffic paddocks only"] | |
| head, ev = run_mcq(build_index_from_corpus(DEFAULT_CORPUS_TEXT), q, opts, top_k=3) | |
| return f"**Q:** {q}\n\n" + "\n".join([f"- {chr(65+i)}) {opt}" for i, opt in enumerate(opts)]) + f"\n\n{head}\n\n{ev}" | |
| with gr.Row(): | |
| b1 = gr.Button("Load demo Q1") | |
| b2 = gr.Button("Load demo Q2") | |
| b3 = gr.Button("Load demo Q3") | |
| b1.click(lambda: load_demo("Q1"), outputs=demo_out) | |
| b2.click(lambda: load_demo("Q2"), outputs=demo_out) | |
| b3.click(lambda: load_demo("Q3"), outputs=demo_out) | |
| if __name__ == "__main__": | |
| demo.launch() | |