Files changed (1) hide show
  1. app.py +90 -116
app.py CHANGED
@@ -6,173 +6,166 @@ from PIL import Image
6
  import pytesseract
7
  import fitz
8
  import pathlib
9
-
 
10
 
11
  def resolve_token(ui_token):
12
  if ui_token and ui_token.strip():
13
  return ui_token.strip()
14
-
15
- env_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
16
  if env_token:
17
  return env_token.strip()
18
-
19
  return None
20
 
21
-
22
  SUPPORTED_EXT = (
23
  ".pdf", ".docx", ".txt", ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"
24
  )
25
 
26
-
27
  def extract_text_from_file(filepath):
28
  if not filepath:
29
  return ""
30
-
31
  if isinstance(filepath, dict) and "name" in filepath:
32
  filepath = filepath["name"]
33
-
34
  ext = pathlib.Path(filepath).suffix.lower()
35
-
36
  try:
37
  if ext == ".pdf":
38
  doc = fitz.open(filepath)
39
- return "\n".join([page.get_text() for page in doc])
40
-
41
  elif ext == ".docx":
42
  doc = Document(filepath)
43
- return "\n".join([p.text for p in doc.paragraphs])
44
-
45
  elif ext == ".txt":
46
  with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
47
  return f.read()
48
-
49
  elif ext in (".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"):
50
  img = Image.open(filepath)
51
  return pytesseract.image_to_string(img)
52
-
53
  else:
54
  return "Unsupported file type"
55
-
56
  except Exception as e:
57
  return f"Error reading file: {str(e)}"
58
 
59
-
60
  MODELS = {
61
- "Qwen 3.5 0.8B (Fastest)": "Qwen/Qwen3.5-0.8B",
62
- "DeepSeek R1 1.5B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
63
- "Qwen 3.5 2B (Balanced Speed)": "Qwen/Qwen3.5-2B",
64
- "Gemma 4 5B": "google/gemma-4-E2B-it",
65
- "Qwen 3.5 4B": "Qwen/Qwen3.5-4B",
66
- "Gemma 4 9B": "google/gemma-4-9b-it",
67
- "DeepSeek Qwen 7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
68
- "DeepSeek R1 Llama 8B": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
69
- "Gemma 8B": "google/gemma-4-E4B-it",
70
- "Qwen 3.5 9B (Balanced Thinking)": "Qwen/Qwen3.5-9B",
71
- "Qwen 3.6 27B (Best)": "Qwen/Qwen3.6-27B",
72
  }
73
 
74
  ALL_MODEL_NAMES = list(MODELS.keys())
75
 
76
- SYSTEM_MSG = """You are a precise educational assistant.
77
- Follow instructions strictly.
78
- Use simple language.
79
- Be structured.
80
- Avoid repetition.
81
- Output markdown.
82
- """
83
-
84
 
85
- def make_prompts(topic):
86
  base = f"""
87
- Topic:
88
- {topic}
89
- Instructions:
90
- - Be clear and concise
91
- - Use bullet points
92
- - Avoid long paragraphs
 
 
93
  """
94
-
95
  return {
96
  "lesson": base + """
97
- Create a lesson plan with:
98
- 1. Objectives (3-5 points)
99
- 2. Short introduction
100
- 3. Key concepts (bullet points)
101
- 4. 2 simple examples
102
- 5. 1 case study (short)
103
- 6. 1 classroom activity
104
- 7. 5 assessment questions
105
  """,
106
  "qa": base + """
107
- Generate 10 short exam questions with answers.
108
- Keep answers brief (2-3 lines each).
 
 
 
 
 
109
  """,
110
  "mcq": base + """
111
- Generate 10 MCQs:
112
- - 4 options each
113
- - mark correct answer
 
 
 
 
114
  """,
115
  "summary": base + """
116
- Write a 200-word summary.
117
- Use simple sentences.
 
 
 
118
  """
119
  }
120
 
121
-
122
  def stream_llm(model_id, prompt, hf_token):
123
  if not hf_token:
124
  yield "โŒ No Hugging Face API key found."
125
  return
126
-
127
  try:
128
- client = InferenceClient(model=model_id, token=hf_token)
129
-
130
- stream = client.text_generation(
131
- SYSTEM_MSG + "\n\n" + prompt,
132
- max_new_tokens=1024,
 
 
 
133
  temperature=0.7,
134
  top_p=0.95,
135
- repetition_penalty=1.1,
136
  stream=True,
137
  )
138
-
139
  partial = ""
140
  for chunk in stream:
141
- if hasattr(chunk, "token"):
142
- token_text = chunk.token.text
143
- else:
144
- token_text = str(chunk)
145
-
146
- partial += token_text
147
- yield partial
148
-
149
  except Exception as e:
150
- err = str(e)
151
- if "401" in err or "Unauthorized" in err:
152
- yield f"โŒ Invalid Hugging Face API Key {err}"
153
- else:
154
- yield f"โŒ API Error:\n{err}"
155
-
156
 
157
  def generate_content(text, file, model_label, token):
 
158
  file_text = extract_text_from_file(file) if file else ""
159
  syllabus = (text + "\n\n" + file_text).strip()
160
-
161
  if not syllabus:
162
  yield ("Provide topic or file", "", "", "")
163
  return
164
-
165
  prompts = make_prompts(syllabus)
166
  model_id = MODELS[model_label]
167
-
168
  outputs = ["", "", "", ""]
169
  keys = ["lesson", "qa", "mcq", "summary"]
170
-
 
 
 
 
 
171
  for i, key in enumerate(keys):
172
- for chunk in stream_llm(model_id, prompts[key], resolve_token(token)):
173
- outputs[i] = chunk
174
- yield tuple(outputs)
175
-
 
 
 
176
 
177
  CSS = """
178
  body,.gradio-container{
@@ -180,48 +173,29 @@ body,.gradio-container{
180
  }
181
  """
182
 
183
- with gr.Blocks(css=CSS) as demo:
184
  gr.Markdown("# ๐ŸŽ“ AI Study Material Generator (Streaming)")
185
-
186
  with gr.Row():
187
  with gr.Column():
188
- text_input = gr.Textbox(
189
- placeholder="Paste syllabus or topic",
190
- lines=6
191
- )
192
- file_input = gr.File(label="Upload syllabus file")
193
-
194
  with gr.Column():
195
- model_selector = gr.Dropdown(
196
- choices=ALL_MODEL_NAMES,
197
- value=ALL_MODEL_NAMES[0],
198
- label="Model"
199
- )
200
-
201
- token_box = gr.Textbox(
202
- label="HF API Key (optional)",
203
- type="password"
204
- )
205
-
206
  btn = gr.Button("Generate")
207
-
208
  with gr.Tabs():
209
  with gr.TabItem("Lesson Plan"):
210
  lesson = gr.Markdown()
211
-
212
- with gr.TabItem("Q&A"):
213
  qa = gr.Markdown()
214
-
215
  with gr.TabItem("MCQ"):
216
  mcq = gr.Markdown()
217
-
218
  with gr.TabItem("Summary"):
219
  summary = gr.Markdown()
220
-
221
  btn.click(
222
  fn=generate_content,
223
  inputs=[text_input, file_input, model_selector, token_box],
224
  outputs=[lesson, qa, mcq, summary]
225
  )
226
 
227
- demo.launch()
 
6
  import pytesseract
7
  import fitz
8
  import pathlib
9
+ import threading
10
+ import time
11
 
12
  def resolve_token(ui_token):
13
  if ui_token and ui_token.strip():
14
  return ui_token.strip()
15
+ env_token = os.getenv("hf")
 
16
  if env_token:
17
  return env_token.strip()
 
18
  return None
19
 
 
20
  SUPPORTED_EXT = (
21
  ".pdf", ".docx", ".txt", ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"
22
  )
23
 
 
24
  def extract_text_from_file(filepath):
25
  if not filepath:
26
  return ""
 
27
  if isinstance(filepath, dict) and "name" in filepath:
28
  filepath = filepath["name"]
 
29
  ext = pathlib.Path(filepath).suffix.lower()
 
30
  try:
31
  if ext == ".pdf":
32
  doc = fitz.open(filepath)
33
+ return "\n".join(page.get_text() for page in doc)
 
34
  elif ext == ".docx":
35
  doc = Document(filepath)
36
+ return "\n".join(p.text for p in doc.paragraphs)
 
37
  elif ext == ".txt":
38
  with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
39
  return f.read()
 
40
  elif ext in (".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"):
41
  img = Image.open(filepath)
42
  return pytesseract.image_to_string(img)
 
43
  else:
44
  return "Unsupported file type"
 
45
  except Exception as e:
46
  return f"Error reading file: {str(e)}"
47
 
 
48
  MODELS = {
49
+ "DeepSeek-Qwen 1.5B (Fastest)": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
50
+ "Qwen 3 4B (Good Speed)": "Qwen/Qwen3-4B-Thinking-2507:nscale",
51
+ "Gemma 3 4B": "google/gemma-3n-E4B-it:together",
52
+ "DeepSeek-Llama 8B (Recommended)": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B:nscale",
53
+ "Qwen 3 8B": "Qwen/Qwen3-8B:nscale",
54
+ "OpenAI GPT OSS 120B": "openai/gpt-oss-120b:novita",
55
+ "Gemma 4 31B (Best)": "google/gemma-4-31B-it:novita"
 
 
 
 
56
  }
57
 
58
  ALL_MODEL_NAMES = list(MODELS.keys())
59
 
60
+ SYSTEM_MSG = """You are an expert Academic Instructor and Curriculum Designer, highly familiar with SASTRA University's academic structure.
61
+ - STRICTLY adhere to the provided syllabus/topic. Do not invent concepts, formulas, or fake exam papers.
62
+ - Use clean, structured Markdown. Enforce exact formatting where specified.
63
+ - Keep outputs practical, concise, and immediately usable. Avoid overcomplication.
64
+ - SASTRA RESOURCES: Actively identify and recommend SASTRA-specific resources. Mention SASTRA Material Hub, Material Base, previous year CIA/End-Sem papers, and standard textbooks prescribed by SASTRA faculty. Frame teaching points and questions to match SASTRA's typical assessment patterns (CIA 1, CIA 2, End Semester).
65
+ - GENERAL RESOURCES: If a specific standard textbook or verified platform (like NPTEL, standard authors) is widely used for this topic, cite it accurately.
66
+ - NEVER hallucinate citations or paper questions. If unsure about a specific SASTRA paper, generalize based on typical university patterns while maintaining the SASTRA difficulty curve."""
 
67
 
68
+ def make_prompts(syllabus):
69
  base = f"""
70
+ Academic Context:
71
+ {syllabus}
72
+
73
+ General Requirements:
74
+ - Maintain strict alignment with the syllabus
75
+ - Format strictly in Markdown
76
+ - Keep explanations concise, practical, and exam/teaching ready
77
+ - Cite SASTRA-specific resources (Material Hub, Material Base, previous papers) or widely recognized academic platforms
78
  """
 
79
  return {
80
  "lesson": base + """
81
+ Generate a period-wise teaching schedule (Lesson Plan):
82
+ - Assume 10-12 periods of 45-50 minutes each.
83
+ - Output MUST be a single Markdown table with exactly these columns:
84
+ | Period | Topic/Subtopic | Key Teaching Points | SASTRA Resource/Activity Hint |
85
+ - In the 'SASTRA Resource/Activity Hint' column, specify relevant SASTRA Material Hub modules, standard prescribed book chapters, or typical CIA question types for that topic.
86
+ - Keep descriptions brief. Focus on logical progression.
87
+ - Reserve the final period for SASTRA end-semester pattern revision.
88
+ - Do not add any introductory or concluding text outside the table.
89
  """,
90
  "qa": base + """
91
+ Generate 8 exam-style short questions with precise answers:
92
+ - Mirror SASTRA CIA and End-Semester phrasing and difficulty.
93
+ - Cover definitions, direct applications, derivations, and 1-step reasoning.
94
+ - Format strictly:
95
+ **Q1.** [Question]
96
+ **A:** [Concise answer, max 2 lines]
97
+ - If the question matches a known pattern from SASTRA Material Base or a prescribed textbook exercise, note it briefly at the end of the answer.
98
  """,
99
  "mcq": base + """
100
+ Generate 8 multiple-choice questions:
101
+ - 4 options (A-D), one clearly correct, three plausible distractors.
102
+ - Format strictly:
103
+ **Q1.** [Stem]
104
+ A) ... B) ... C) ... D) ...
105
+ **Correct:** [Letter] | **Why:** [1-line rationale]
106
+ - Focus on core concepts, formula application, and common student mistakes seen in SASTRA exams.
107
  """,
108
  "summary": base + """
109
+ Produce a rapid-revision summary:
110
+ - List 5 core concepts/formulas
111
+ - Provide 3 high-yield takeaways for SASTRA exams
112
+ - Include 1 quick self-check question based on typical CIA patterns
113
+ - Keep strictly under 150 words. Use bullet points only. No paragraphs.
114
  """
115
  }
116
 
 
117
  def stream_llm(model_id, prompt, hf_token):
118
  if not hf_token:
119
  yield "โŒ No Hugging Face API key found."
120
  return
 
121
  try:
122
+ client = InferenceClient(token=hf_token)
123
+ stream = client.chat.completions.create(
124
+ model=model_id,
125
+ messages=[
126
+ {"role": "system", "content": SYSTEM_MSG},
127
+ {"role": "user", "content": prompt},
128
+ ],
129
+ max_tokens=2048,
130
  temperature=0.7,
131
  top_p=0.95,
 
132
  stream=True,
133
  )
 
134
  partial = ""
135
  for chunk in stream:
136
+ if chunk.choices and chunk.choices[0].delta:
137
+ token = chunk.choices[0].delta.get("content", "")
138
+ if token:
139
+ partial += token
140
+ yield partial
 
 
 
141
  except Exception as e:
142
+ yield f"โŒ API Error:\n{str(e)}"
 
 
 
 
 
143
 
144
  def generate_content(text, file, model_label, token):
145
+ hf_token = resolve_token(token)
146
  file_text = extract_text_from_file(file) if file else ""
147
  syllabus = (text + "\n\n" + file_text).strip()
 
148
  if not syllabus:
149
  yield ("Provide topic or file", "", "", "")
150
  return
 
151
  prompts = make_prompts(syllabus)
152
  model_id = MODELS[model_label]
 
153
  outputs = ["", "", "", ""]
154
  keys = ["lesson", "qa", "mcq", "summary"]
155
+ lock = threading.Lock()
156
+ def run_stream(i, key):
157
+ for chunk in stream_llm(model_id, prompts[key], hf_token):
158
+ with lock:
159
+ outputs[i] = chunk
160
+ threads = []
161
  for i, key in enumerate(keys):
162
+ t = threading.Thread(target=run_stream, args=(i, key))
163
+ t.start()
164
+ threads.append(t)
165
+ while any(t.is_alive() for t in threads):
166
+ time.sleep(0.1)
167
+ yield tuple(outputs)
168
+ yield tuple(outputs)
169
 
170
  CSS = """
171
  body,.gradio-container{
 
173
  }
174
  """
175
 
176
+ with gr.Blocks() as demo:
177
  gr.Markdown("# ๐ŸŽ“ AI Study Material Generator (Streaming)")
 
178
  with gr.Row():
179
  with gr.Column():
180
+ text_input = gr.Textbox(placeholder="Paste syllabus or topic", lines=6)
181
+ file_input = gr.File(label="Upload syllabus file", file_types=list(SUPPORTED_EXT))
 
 
 
 
182
  with gr.Column():
183
+ model_selector = gr.Dropdown(choices=ALL_MODEL_NAMES, value="DeepSeek-Llama 8B (Recommended)", label="Model")
184
+ token_box = gr.Textbox(label="HF API Key (optional)", type="password")
 
 
 
 
 
 
 
 
 
185
  btn = gr.Button("Generate")
 
186
  with gr.Tabs():
187
  with gr.TabItem("Lesson Plan"):
188
  lesson = gr.Markdown()
189
+ with gr.TabItem("Question and Anwer"):
 
190
  qa = gr.Markdown()
 
191
  with gr.TabItem("MCQ"):
192
  mcq = gr.Markdown()
 
193
  with gr.TabItem("Summary"):
194
  summary = gr.Markdown()
 
195
  btn.click(
196
  fn=generate_content,
197
  inputs=[text_input, file_input, model_selector, token_box],
198
  outputs=[lesson, qa, mcq, summary]
199
  )
200
 
201
+ demo.launch(css=CSS)