File size: 9,686 Bytes
a8cf7aa
 
 
 
 
 
 
 
 
 
 
 
 
 
6798fdd
a8cf7aa
 
 
 
6798fdd
 
 
a8cf7aa
 
 
20a6a2d
a8cf7aa
 
 
 
 
 
 
20a6a2d
a8cf7aa
 
 
 
 
 
 
20a6a2d
a8cf7aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6754e1
 
 
 
a8cf7aa
 
 
d6754e1
d9cd3f3
d6754e1
 
d9cd3f3
 
 
d6754e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8cf7aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6798fdd
 
 
 
 
 
 
 
 
 
 
a8cf7aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
"""
ui_strings.py — All user-facing copy for the v1 Query tab.

Kept in one place so wording can be iterated on without touching app.py.
"""

from __future__ import annotations


# ── Header ─────────────────────────────────────────────────────────────

APP_TITLE = "Ask your school a question."

APP_TAGLINE = (
    "Ask a question in plain English about attendance, grades, or discipline. "
    "We'll write the query and bring you the answer."
)

# ── Domain sections (starter questions) ────────────────────────────────
# Each section is a label, a short blurb shown under the label, and 2-3
# natural-language questions phrased the way a non-technical user
# (secretary, principal, VP) would actually ask.

DOMAIN_SECTIONS: list[dict] = [
    {
        "title": "Chronic absenteeism trends",
        "questions": [
            "How many students were chronically absent this year?",
            "Which school has the worst attendance?",
            "Show me the chronically absent students at Lincoln Elementary.",
        ],
    },
    {
        "title": "Discipline instances by school",
        "questions": [
            "How many discipline incidents happened at Washington Middle this year?",
            "What kinds of incidents are most common at Jefferson High?",
            "Show me total suspension days by school.",
        ],
    },
    {
        "title": "Grades and academic comparisons",
        "questions": [
            "What's the average GPA for chronically absent students?",
            "How do GPAs compare across schools this year?",
            "Which grade level has the highest average GPA?",
        ],
    },
]


# ── Input helpers ──────────────────────────────────────────────────────

INPUT_HELPER = (
    "You can ask about: schools, school years (like 2023-2024), grade levels, "
    "students, attendance, discipline incidents, and grades."
)

INPUT_PROMPT_EMPTY = (
    "Type a question above, or pick one from the suggestions to get started."
)

FIRST_VISIT_NUDGE = (
    '<a href="javascript:openAboutModal()" class="first-visit-link">'
    "First time here?"
    "</a>"
    " Type a question in plain English, or click any of the "
    "suggestions above to see how it works."
)

# ── About / FAQ modal content (HTML) ─────────────────────────────────
ABOUT_MODAL_TITLE = "About Local First Education Data Framework"

ABOUT_MODAL_INTRO = (
    "Local First Education Data Framework (LFED) is a local-first education data "
    "assistant. It lets school admins ask plain-English questions about district "
    "data and get answers instantly — without sending anything to the cloud."
)

ABOUT_MODAL_HOW_IT_WORKS = (
    "You type a question like <em>“What’s the average GPA for chronically absent "
    "students in 2023-2024?”</em> A language model running on this machine turns "
    "it into a read-only SQL query, runs it against an in-memory DuckDB database, "
    "and returns the result as a sentence and a table."
)

ABOUT_MODAL_PRIVACY = (
    "Everything stays on this machine. Your questions, the generated query, and "
    "the results are not sent anywhere, stored, or logged. When you close the "
    "page, the conversation is gone."
)

ABOUT_MODAL_WHAT_IT_IS_BULLETS = [
    "Ask attendance, discipline, grade, enrollment, and demographic questions in plain English.",
    "Get a plain-English summary plus a sortable table of results.",
    "Inspect the generated SQL with <strong>Show me how this was computed</strong>.",
    "Download any result table to CSV.",
    "Run entirely on your own hardware — no API keys or internet required (local build).",
]

ABOUT_MODAL_WHAT_IT_ISNT_BULLETS = [
    "It does not change any data — all queries are read-only.",
    "It is not a replacement for your student information system; it is a question-answering layer on top.",
    "It does not know individual students by name — only by anonymized ID.",
    "It does not store questions between sessions.",
]

ABOUT_MODAL_FAQ = [
    {
        "q": "What data can I ask about?",
        "a": "Five synthetic school tables: students, enrollment, attendance, discipline, and grades. You can ask about schools, school years, grade levels, demographics, absences, incidents, and academic performance.",
    },
    {
        "q": "Why do I sometimes need to name a school or year?",
        "a": "The model is fine-tuned on school-data questions, but it still needs enough context to write a safe, correct query. Naming a school and school year usually produces the most reliable results.",
    },
    {
        "q": "What model is running?",
        "a": "This demo runs a fine-tuned Qwen2.5-Coder-14B (QLoRA adapter on a bnb-4-bit base) via Transformers on Hugging Face ZeroGPU. The local build uses the same fine-tune as a GGUF in llama.cpp.",
    },
    {
        "q": "Can I trust the SQL it writes?",
        "a": "Every generated query is validated: it must be SELECT-only, reference known tables and columns, and avoid forbidden tokens. If validation fails, you get a clear message instead of a result.",
    },
    {
        "q": "Can I use my own real school data?",
        "a": "This Space runs on deterministic synthetic seed data. The local-first build can be pointed at your own DuckDB or Parquet files while keeping the same read-only guardrails.",
    },
]

ABOUT_MODAL_CLOSE = "Close"

ABOUT_MODAL_HINT = (
    "Tip: If the model misinterprets a question, rephrase it and include a "
    "specific school name and school year."
)


# ── Summary templates ──────────────────────────────────────────────────
# Plain-English one-liners used as the headline above the result table.

SUMMARY_TEMPLATES = {
    "single_value": "The answer is **{value}**.",
    "single_pair": "**{label}**: {value}.",
    "by_school": "Here's the breakdown across {n} schools.",
    "generic": "Here are the {n} rows that match.",
}


# ── Error rephrasings ──────────────────────────────────────────────────
# Map substrings of raw error messages → user-friendly message.
# Order matters: more specific markers first.

ERROR_REPHRASINGS: dict[str, str] = {
    "validation": (
        "I couldn't turn that question into a query I trust. "
        "Try rephrasing it more simply — for example, name a school and a school year."
    ),
    "forbidden": (
        "That question would ask for something I don't allow (like changing data). "
        "This tool is read-only, so try asking a question instead."
    ),
    "timeout": (
        "That took too long to look up. "
        "Try narrowing your question to a specific school or school year."
    ),
    "model": (
        "I'm having trouble understanding that question. "
        "Try rephrasing it the way you'd say it out loud."
    ),
    "missing from clause": (
        "I couldn't figure out where to look. "
        "Try naming what you want to see and which school or year."
    ),
}


# ── Result UI copy ─────────────────────────────────────────────────────

SQL_DISCLOSURE_LABEL = "Show me how this was computed"

PREVIOUS_RIBBON_TEMPLATE = (
    "Your previous answer: {summary}"
)


# ── Footer + explainer ─────────────────────────────────────────────────

WHAT_THIS_IS_ONE_LINER = (
    "A way to ask questions about your school's data in plain English, "
    "that runs on your own machine."
)

WHAT_THIS_IS_NOT_ONE_LINER = (
    "It's not connected to the internet, it doesn't store your questions "
    "between sessions, and it isn't a replacement for your student information system."
)

# Bulleted lists revealed by the 'Read the full explainer' footer button
WHAT_THIS_IS = [
    "A way to ask questions about your school's data in plain English.",
    "A read-only tool — it answers questions, it doesn't change anything.",
    "Something you can run on a single school computer, no internet required.",
    "A model that's been fine-tuned on common school-data questions.",
]

WHAT_THIS_IS_NOT = [
    "It does not connect to the internet or send data anywhere.",
    "It does not store your questions between sessions.",
    "It does not replace your student information system — it's a layer on top.",
    "It does not know about individual students by name, only by ID.",
]

HOW_IT_WORKS = (
    "You ask a question in plain English. A language model (running on this "
    "machine) translates your question into a database query, and a small "
    "in-memory database runs it. You see the answer as a sentence and a table. "
    "If you want to see exactly what was looked up, click "
    "\u201cShow me how this was computed\u201d under any result."
)

PRIVACY_EXPLAINER = (
    "Everything happens on the machine you're using right now. Your questions, "
    "the generated query, and the results never leave this network. No accounts, "
    "no analytics, no telemetry. When you close this page, the conversation is "
    "gone."
)