Upload folder using huggingface_hub

#2
Files changed (2) hide show
  1. app.py +430 -92
  2. requirements.txt +11 -4
app.py CHANGED
@@ -1,92 +1,430 @@
1
- import gradio as gr
2
- import torch
3
- import os
4
- from transformers import pipeline
5
- from huggingface_hub import login
6
-
7
- login(token=os.getenv("hf_tok"))
8
-
9
- MODEL_NAME = "google/gemma-2b-it"
10
- generator = pipeline(
11
- "text-generation",
12
- model=MODEL_NAME,
13
- torch_dtype=torch.float16,
14
- device_map="auto",
15
- )
16
-
17
- def build_prompt(syllabus, level, output_type):
18
- instruction = ""
19
- if output_type == "Lesson Material":
20
- instruction = f"""Create detailed lesson material based on the syllabus below.
21
- Level: {level}
22
-
23
- Syllabus:
24
- {syllabus}
25
-
26
- Include:
27
- - Concept explanations
28
- - Key ideas
29
- - Examples
30
- - Classroom discussion points"""
31
- elif output_type == "Questions and Answers":
32
- instruction = f"""Generate 10 descriptive university-level questions with answers.
33
- Level: {level}
34
-
35
- Syllabus:
36
- {syllabus}
37
-
38
- Format clearly as:
39
- Question
40
- Answer"""
41
- else:
42
- instruction = f"""Generate 10 multiple choice questions.
43
- Level: {level}
44
-
45
- Syllabus:
46
- {syllabus}
47
-
48
- Format:
49
- Question
50
- A)
51
- B)
52
- C)
53
- D)
54
- Correct Answer:"""
55
-
56
- messages = [
57
- {"role": "system", "content": "You are an expert university teacher."},
58
- {"role": "user", "content": instruction}
59
- ]
60
- return generator.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
61
-
62
- def generate_content(syllabus, level, output_type):
63
- prompt = build_prompt(syllabus, level, output_type)
64
-
65
- output = generator(
66
- prompt,
67
- max_new_tokens=800,
68
- temperature=0.7,
69
- top_p=0.9,
70
- do_sample=True,
71
- pad_token_id=generator.tokenizer.eos_token_id
72
- )
73
-
74
- text = output[0]["generated_text"]
75
- return text.replace(prompt, "").strip()
76
-
77
- with gr.Blocks() as demo:
78
- gr.Markdown("# AI Syllabus Teaching Assistant")
79
-
80
- syllabus = gr.Textbox(label="Paste your syllabus", lines=10)
81
- level = gr.Dropdown(["School", "Undergraduate", "Postgraduate"], value="Undergraduate", label="Level")
82
- output_type = gr.Radio(["Lesson Material", "Questions and Answers", "MCQs"], value="Lesson Material", label="Generate")
83
- generate_btn = gr.Button("Generate")
84
- output = gr.Textbox(label="AI Output", lines=20)
85
-
86
- generate_btn.click(
87
- generate_content,
88
- inputs=[syllabus, level, output_type],
89
- outputs=output
90
- )
91
-
92
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import threading
4
+ import pathlib
5
+ import base64
6
+ import urllib.parse
7
+
8
+ # ──────────────────────────────────────────────
9
+ # FILE TEXT EXTRACTION
10
+ # ──────────────────────────────────────────────
11
+
12
+ SUPPORTED_EXT = (".pdf", ".docx", ".doc", ".txt",
13
+ ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff")
14
+
15
+ def extract_text_from_file(filepath: str) -> str:
16
+ """Extract plain text from PDF, DOCX, TXT, or image files."""
17
+ if not filepath:
18
+ return ""
19
+ ext = pathlib.Path(filepath).suffix.lower()
20
+ try:
21
+ # ── PDF ──
22
+ if ext == ".pdf":
23
+ import fitz # pymupdf
24
+ doc = fitz.open(filepath)
25
+ return "\n".join(page.get_text() for page in doc).strip()
26
+
27
+ # ── Word (.docx / .doc) ──
28
+ elif ext in (".docx", ".doc"):
29
+ from docx import Document
30
+ doc = Document(filepath)
31
+ return "\n".join(p.text for p in doc.paragraphs if p.text.strip()).strip()
32
+
33
+ # ── Plain text ──
34
+ elif ext == ".txt":
35
+ with open(filepath, "r", encoding="utf-8", errors="replace") as f:
36
+ return f.read().strip()
37
+
38
+ # ── Images (OCR via pytesseract) ──
39
+ elif ext in (".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"):
40
+ try:
41
+ import pytesseract
42
+ from PIL import Image
43
+ img = Image.open(filepath)
44
+ return pytesseract.image_to_string(img).strip()
45
+ except Exception as ocr_err:
46
+ return (
47
+ f"⚠️ OCR failed: {ocr_err}\n"
48
+ "Ensure Tesseract-OCR is installed: https://github.com/UB-Mannheim/tesseract/wiki"
49
+ )
50
+
51
+ else:
52
+ return f"⚠️ Unsupported file type: {ext}"
53
+
54
+ except Exception as e:
55
+ return f"⚠️ Could not extract text: {e}"
56
+
57
+
58
+ # ──────────────────────────────────────────────
59
+ # MODEL CONFIGURATIONS (all run via transformers pipeline)
60
+ # ──────────────────────────────────────────────
61
+ #
62
+ # Grouped by RAM tier so users can pick what fits their machine.
63
+ # Models are downloaded from HF Hub on first use and cached locally.
64
+
65
+ MODELS = {
66
+ # ���─ Tier 1: Fast (<2 GB RAM) ───────────────────────────
67
+ "⚑ Qwen2.5-0.5B [~1 GB | Fastest]": "Qwen/Qwen2.5-0.5B-Instruct",
68
+ "πŸ’« Qwen2.5-1.5B [~2 GB | Fast]": "Qwen/Qwen2.5-1.5B-Instruct",
69
+ # ── Tier 2: Balanced (2–4 GB RAM) ──────────────────────
70
+ "πŸ”¬ DeepSeek-R1-Distill 1.5B [~2 GB]": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
71
+ "🦩 Llama-3.2-1B-Instruct [~2 GB]": "meta-llama/Llama-3.2-1B-Instruct",
72
+ "🦩 Llama-3.2-3B-Instruct [~4 GB]": "meta-llama/Llama-3.2-3B-Instruct",
73
+ "πŸ’Ά Phi-3-mini-4k [~4 GB | Strong]": "microsoft/Phi-3-mini-4k-instruct",
74
+ # ── Tier 3: Quality (4–8 GB RAM) ───────────────────────
75
+ "πŸ’Ž Gemma-2-2B-it [~3 GB | Google]": "google/gemma-2-2b-it",
76
+ "πŸ”₯ Qwen2.5-3B [~4 GB | Balanced]": "Qwen/Qwen2.5-3B-Instruct",
77
+ "πŸ₯‡ Llama-3.1-8B-Instruct [~8 GB]": "meta-llama/Llama-3.1-8B-Instruct",
78
+ "πŸ₯‡ Qwen2.5-7B [~8 GB | Best quality]": "Qwen/Qwen2.5-7B-Instruct",
79
+ }
80
+
81
+ ALL_MODEL_NAMES = list(MODELS.keys())
82
+
83
+ # ──────────────────────────────────────────────
84
+ # PIPELINE CACHE (lazy-loaded, thread-safe)
85
+ # ──────────────────────────────────────────────
86
+ _pipeline_cache: dict = {}
87
+ _pipeline_lock = threading.Lock()
88
+
89
+
90
+ def get_pipeline(model_id: str, hf_token: str = ""):
91
+ """Download (on first use) and cache a transformers text-generation pipeline."""
92
+ with _pipeline_lock:
93
+ if model_id not in _pipeline_cache:
94
+ try:
95
+ from transformers import pipeline, AutoTokenizer
96
+ token = hf_token.strip() if hf_token else None
97
+ tok = AutoTokenizer.from_pretrained(model_id, token=token)
98
+ pipe = pipeline(
99
+ "text-generation",
100
+ model=model_id,
101
+ tokenizer=tok,
102
+ device_map="cpu",
103
+ dtype="auto",
104
+ trust_remote_code=True,
105
+ token=token,
106
+ )
107
+ # Avoid conflict with max_length=20 default in some models
108
+ pipe.model.generation_config.max_length = None
109
+ _pipeline_cache[model_id] = pipe
110
+ except Exception as e:
111
+ return None, str(e)
112
+ return _pipeline_cache[model_id], None
113
+
114
+
115
+ # ──────────────────────────────────────────────
116
+ # INFERENCE
117
+ # ──────────────────────────────────────────────
118
+
119
+ SYSTEM_MSG = (
120
+ "You are an expert educational assistant. "
121
+ "Always respond with clean, well-structured Markdown text."
122
+ )
123
+
124
+
125
+ def ask_llm(model_label: str, prompt: str, hf_token: str = "") -> str:
126
+ """Run the prompt through the transformers pipeline for the selected model."""
127
+ model_id = MODELS[model_label]
128
+ pipe, err = get_pipeline(model_id, hf_token)
129
+ if err:
130
+ return (
131
+ f"❌ **Failed to load `{model_id}`:**\n```\n{err}\n```\n\n"
132
+ "*Tip: Check your internet connection or choose a smaller model.*"
133
+ )
134
+ try:
135
+ messages = [
136
+ {"role": "system", "content": SYSTEM_MSG},
137
+ {"role": "user", "content": prompt},
138
+ ]
139
+ if pipe is None:
140
+ return "❌ **Pipeline error: Pipeline object is None.**"
141
+
142
+ # Pass generation params to the call to avoid constructor deprecation
143
+ out = pipe(
144
+ messages,
145
+ max_new_tokens=1024,
146
+ pad_token_id=pipe.tokenizer.eos_token_id if (pipe.tokenizer and pipe.tokenizer.eos_token_id is not None) else 50256
147
+ )
148
+ generated = out[0]["generated_text"]
149
+ if isinstance(generated, list):
150
+ # Chat-template output β€” last element is the assistant reply
151
+ return generated[-1]["content"]
152
+ # Plain-string fallback β€” strip the echoed prompt
153
+ return generated[len(str(messages)):].strip()
154
+ except Exception as e:
155
+ return f"❌ **Inference error:**\n```\n{str(e)}\n```"
156
+
157
+
158
+ # ──────────────────────────────────────────────
159
+ # PROMPTS
160
+ # ──────────────────────────────────────────────
161
+
162
+ def make_prompts(syllabus: str) -> dict:
163
+ return {
164
+ "lesson": (
165
+ f"Create comprehensive, engaging lesson materials for the following syllabus/topic. "
166
+ f"Use clear ## headings, bullet points, bold key terms, and concise explanations "
167
+ f"suitable for a student.\n\nSyllabus/Topic:\n{syllabus}"
168
+ ),
169
+ "qa": (
170
+ f"Generate 8 important exam-style questions with detailed model answers based on "
171
+ f"this syllabus/topic. Number each Q&A pair clearly.\n\nSyllabus/Topic:\n{syllabus}"
172
+ ),
173
+ "mcq": (
174
+ f"Generate 8 multiple-choice questions based on this syllabus/topic. "
175
+ f"Each question must have 4 options (A–D). After all questions, list the correct "
176
+ f"answers with a brief explanation.\n\nSyllabus/Topic:\n{syllabus}"
177
+ ),
178
+ "mindmap": (
179
+ f"Create a high-level Flowchart or Mindmap for the following syllabus/topic using Mermaid.js syntax.\n"
180
+ f"STRICT RULES:\n"
181
+ f"- Output ONLY the mermaid code block (```mermaid ... ```).\n"
182
+ f"- Use 'graph TD' (for flowcharts) or 'mindmap' structure.\n"
183
+ f"- This will be converted into a static picture, so keep labels clear.\n"
184
+ f"- No introductory text, no explanation outside the block.\n"
185
+ f"- Avoid special characters in node labels.\n\n"
186
+ f"Syllabus/Topic:\n{syllabus}"
187
+ ),
188
+ "infographic": (
189
+ f"Create a highly visual text-based cheat sheet / infographic for this syllabus/topic. "
190
+ f"Use emojis, ASCII section dividers, tables, bullet points, and bold highlights "
191
+ f"to make it easy to scan, remember, and share.\n\nSyllabus/Topic:\n{syllabus}"
192
+ ),
193
+ }
194
+
195
+
196
+ # ──────────────────────────────────────────────
197
+ # MAIN GENERATION FUNCTION (progressive generator)
198
+ # ──────────────────────────────────────────────
199
+
200
+ def render_mermaid_as_image(text: str) -> str:
201
+ """Extract Mermaid code block and convert it to a mermaid.ink image URL."""
202
+ import re
203
+ import json
204
+ # Look for ```mermaid ... ``` block
205
+ match = re.search(r'```mermaid\s+(.*?)\s+```', text, re.DOTALL)
206
+ if not match:
207
+ return text # Return raw text if no block is found
208
+
209
+ mermaid_code = match.group(1).strip()
210
+
211
+ # Base64 encode the code for mermaid.ink (requires JSON wrapping for the best compatibility)
212
+ try:
213
+ data = {
214
+ "code": mermaid_code,
215
+ "mermaid": {"theme": "default"},
216
+ "updateEditor": False,
217
+ "autoSync": True,
218
+ "updateDiagram": True
219
+ }
220
+ json_str = json.dumps(data)
221
+ encoded = base64.b64encode(json_str.encode('utf-8')).decode('utf-8')
222
+ image_url = f"https://mermaid.ink/img/{encoded}?type=webp"
223
+
224
+ # Return ONLY the image tag as requested ("picture only")
225
+ return f"![Flowchart/Mindmap]({image_url})"
226
+ except Exception as e:
227
+ return f"*⚠️ Failed to render flowchart as image: {e}*\n\n```mermaid\n{mermaid_code}\n```"
228
+
229
+ def generate_content(syllabus_text: str, uploaded_file, model_label: str, hf_token: str):
230
+ # Merge pasted text + uploaded file text
231
+ file_text = extract_text_from_file(uploaded_file) if uploaded_file else ""
232
+ syllabus = (syllabus_text.strip() + "\n\n" + file_text).strip()
233
+
234
+ if not syllabus:
235
+ yield ("⚠️ Please paste a syllabus/topic **or** upload a file.", "", "", "", "")
236
+ return
237
+
238
+ model_id = MODELS[model_label]
239
+ mode_note = f"*Model: **`{model_id}`***"
240
+ prompts = make_prompts(syllabus)
241
+
242
+ WAIT = "⏳ Waiting…"
243
+ steps = [
244
+ ("πŸ“– Generating Lesson Material… (1/5)", "lesson"),
245
+ ("❓ Generating Q&A… (2/5)", "qa"),
246
+ ("βœ… Generating MCQs… (3/5)", "mcq"),
247
+ ("🧠 Generating Mindmap… (4/5)", "mindmap"),
248
+ ("πŸ“Š Generating Cheat Sheet… (5/5)", "infographic"),
249
+ ]
250
+
251
+ results = [mode_note + "\n\n" + steps[0][0], WAIT, WAIT, WAIT, WAIT]
252
+ yield tuple(results)
253
+
254
+ for i, (status_msg, key) in enumerate(steps):
255
+ result = ask_llm(model_label, prompts[key], hf_token)
256
+
257
+ # Post-process mindmap to purely render as an image URL
258
+ if key == "mindmap":
259
+ result = render_mermaid_as_image(result)
260
+
261
+ results[i] = mode_note + "\n\n" + result
262
+ if i + 1 < len(steps):
263
+ results[i + 1] = steps[i + 1][0]
264
+ yield tuple(results)
265
+
266
+
267
+ # ──────────────────────────────────────────────
268
+ # GRADIO UI
269
+ # ──────────────────────────────────────────────
270
+
271
+ CSS = """
272
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
273
+
274
+ body, .gradio-container {
275
+ font-family: 'Inter', sans-serif !important;
276
+ }
277
+
278
+ .app-header {
279
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
280
+ border-radius: 16px;
281
+ padding: 28px 32px;
282
+ margin-bottom: 8px;
283
+ border: 1px solid rgba(99,102,241,0.3);
284
+ }
285
+
286
+ .app-header h1 {
287
+ font-size: 2rem !important;
288
+ font-weight: 700 !important;
289
+ background: linear-gradient(90deg, #818cf8, #c084fc, #38bdf8);
290
+ -webkit-background-clip: text;
291
+ -webkit-text-fill-color: transparent;
292
+ margin-bottom: 6px !important;
293
+ }
294
+
295
+ .app-header p {
296
+ color: #94a3b8 !important;
297
+ font-size: 0.95rem;
298
+ }
299
+
300
+ .generate-btn {
301
+ background: linear-gradient(135deg, #6366f1, #8b5cf6) !important;
302
+ border: none !important;
303
+ border-radius: 12px !important;
304
+ font-weight: 600 !important;
305
+ font-size: 1rem !important;
306
+ transition: all 0.2s ease !important;
307
+ width: 100% !important;
308
+ }
309
+
310
+ .generate-btn:hover {
311
+ transform: translateY(-2px) !important;
312
+ box-shadow: 0 8px 25px rgba(99,102,241,0.4) !important;
313
+ }
314
+ """
315
+
316
+ with gr.Blocks() as demo:
317
+
318
+ # ── Header ──
319
+ gr.HTML("""
320
+ <div class="app-header">
321
+ <h1>πŸŽ“ AI Study Material Generator</h1>
322
+ <p>Generate lesson notes, Q&amp;A, MCQs, a Mindmap, and a Cheat Sheet from any topic
323
+ or syllabus β€” using SOTA open-source LLMs running entirely on your CPU via
324
+ <code>transformers.pipeline</code>. No API key needed.</p>
325
+ </div>
326
+ """)
327
+
328
+ # ── Input Row ──
329
+ with gr.Row(equal_height=False):
330
+
331
+ # Left: syllabus input (paste OR upload)
332
+ with gr.Column(scale=4):
333
+ with gr.Tabs():
334
+ with gr.TabItem("✏️ Paste Text"):
335
+ syllabus_input = gr.Textbox(
336
+ show_label=False,
337
+ placeholder=(
338
+ "Paste your syllabus, topic, or any content here…\n"
339
+ "e.g. The Water Cycle, Neural Networks, World War II, Photosynthesis"
340
+ ),
341
+ lines=7,
342
+ )
343
+ with gr.TabItem("πŸ“‚ Upload File"):
344
+ gr.Markdown(
345
+ "Upload a **PDF**, **Word (.docx)**, **plain text (.txt)**, "
346
+ "or **image** (PNG / JPG / WEBP) β€” text is extracted automatically."
347
+ )
348
+ file_input = gr.File(
349
+ label="Upload syllabus file",
350
+ file_types=[".pdf", ".docx", ".doc", ".txt",
351
+ ".png", ".jpg", ".jpeg", ".webp", ".bmp"],
352
+ file_count="single",
353
+ )
354
+ file_preview = gr.Textbox(
355
+ label="Extracted text preview",
356
+ lines=4,
357
+ interactive=False,
358
+ placeholder="Text extracted from the file will appear here…",
359
+ )
360
+ # Live preview when file is uploaded
361
+ file_input.change(
362
+ fn=lambda f: extract_text_from_file(f) if f else "",
363
+ inputs=file_input,
364
+ outputs=file_preview,
365
+ )
366
+
367
+ # Right: model selector + generate button
368
+ with gr.Column(scale=2):
369
+ model_selector = gr.Dropdown(
370
+ choices=ALL_MODEL_NAMES,
371
+ value=ALL_MODEL_NAMES[0],
372
+ label="πŸ€– Model (all run locally via pipeline)",
373
+ info=(
374
+ "Tier 1 = fastest / least RAM. "
375
+ "Tier 3 = best quality / needs 6–8 GB RAM. "
376
+ "Models download on first use."
377
+ ),
378
+ )
379
+ gr.Markdown(
380
+ "<small>πŸ’‘ **Llama** & **Gemma** models may require a Hugging Face login "
381
+ "(`huggingface-cli login`) or a Token to download.</small>"
382
+ )
383
+ hf_token_input = gr.Textbox(
384
+ label="πŸ”‘ HF Token (optional)",
385
+ info="Required for gated models. Your token stays private.",
386
+ type="password",
387
+ placeholder="hf_...",
388
+ )
389
+ generate_btn = gr.Button(
390
+ "⚑ Generate Study Materials",
391
+ variant="primary",
392
+ size="lg",
393
+ elem_classes=["generate-btn"],
394
+ )
395
+
396
+ gr.HTML("<hr style='margin:8px 0; border-color:rgba(99,102,241,0.2)'>")
397
+
398
+ # ── Output Tabs ──
399
+ with gr.Tabs():
400
+ with gr.TabItem("πŸ“– Lesson Material"):
401
+ lesson_output = gr.Markdown(value="*Results will appear here after generation.*")
402
+ with gr.TabItem("❓ Q & A"):
403
+ qa_output = gr.Markdown(value="*Results will appear here after generation.*")
404
+ with gr.TabItem("βœ… MCQs"):
405
+ mcq_output = gr.Markdown(value="*Results will appear here after generation.*")
406
+ with gr.TabItem("🧠 Mindmap"):
407
+ gr.Markdown("*The diagram is generated as an image (powered by mermaid.ink).*")
408
+ mindmap_output = gr.Markdown(value="*Results will appear here after generation.*")
409
+ with gr.TabItem("πŸ“Š Cheat Sheet"):
410
+ infographic_output = gr.Markdown(value="*Results will appear here after generation.*")
411
+
412
+ # ── Footer ──
413
+ gr.HTML("""
414
+ <div style='text-align:center; color:#64748b; font-size:0.8rem; margin-top:12px;'>
415
+ Built with πŸ€— Gradio Β· Hugging Face Transformers β€” 100% open-source Β· runs offline on CPU
416
+ </div>
417
+ """)
418
+
419
+ # ── Wire up button ──
420
+ generate_btn.click(
421
+ fn=generate_content,
422
+ inputs=[syllabus_input, file_input, model_selector, hf_token_input],
423
+ outputs=[lesson_output, qa_output, mcq_output, mindmap_output, infographic_output],
424
+ )
425
+
426
+ if __name__ == "__main__":
427
+ demo.launch(
428
+ theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
429
+ css=CSS,
430
+ )
requirements.txt CHANGED
@@ -1,4 +1,11 @@
1
- gradio
2
- transformers
3
- torch
4
- accelerate
 
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ huggingface_hub>=0.23.0
3
+ transformers>=4.44.0
4
+ torch
5
+ accelerate
6
+ sentencepiece
7
+ protobuf
8
+ pymupdf
9
+ python-docx
10
+ pytesseract
11
+ Pillow