yoursdvniel commited on
Commit
5977914
·
verified ·
1 Parent(s): efc826f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +297 -4
main.py CHANGED
@@ -2,16 +2,20 @@ from flask import Flask, request, jsonify
2
  from flask_cors import CORS
3
  import json
4
  from datetime import datetime
5
- from typing import Optional, Dict, Any
6
  import re
7
- from typing import List
 
 
 
 
8
 
9
  from firestore_client import get_firestore_client
10
  from gemini_client import ask_gpt
11
  from prompt_instructions import build_system_message
12
- from role_access import get_allowed_collections # (currently unused but kept)
13
  from data_fetcher import fetch_data_from_firestore
14
- from data_planner import determine_data_requirements # 🧠 Gemini planner
15
  from resolver import resolve_user_context
16
  from schema_utils import has_field, resolve_field
17
 
@@ -335,6 +339,187 @@ def _calculate_progress_suggestion(intervention: Dict[str, Any], ai_result: Dict
335
  "overTargetBy": 0,
336
  }
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  # -- route ---------------------------------------------------------------
339
 
340
  @app.route('/chat', methods=['POST'])
@@ -674,6 +859,114 @@ def analyze_intervention_update():
674
  "error": "Failed to analyse intervention update"
675
  }), 500
676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
 
678
  if __name__ == "__main__":
679
  app.run(host="0.0.0.0", port=7860)
 
2
  from flask_cors import CORS
3
  import json
4
  from datetime import datetime
5
+ from typing import Optional, Dict, Any, List
6
  import re
7
+ import os
8
+ import io
9
+
10
+ from pypdf import PdfReader
11
+ from docx import Document
12
 
13
  from firestore_client import get_firestore_client
14
  from gemini_client import ask_gpt
15
  from prompt_instructions import build_system_message
16
+ from role_access import get_allowed_collections
17
  from data_fetcher import fetch_data_from_firestore
18
+ from data_planner import determine_data_requirements
19
  from resolver import resolve_user_context
20
  from schema_utils import has_field, resolve_field
21
 
 
339
  "overTargetBy": 0,
340
  }
341
 
342
+ ALLOWED_COURSE_SOURCE_EXTENSIONS = {"pdf", "docx"}
343
+ MAX_SOURCE_TEXT_CHARS = 60000
344
+
345
+
346
+ def _allowed_course_source(filename: str) -> bool:
347
+ if not filename or "." not in filename:
348
+ return False
349
+ return filename.rsplit(".", 1)[1].lower() in ALLOWED_COURSE_SOURCE_EXTENSIONS
350
+
351
+
352
+ def _clean_extracted_text(text: str) -> str:
353
+ if not text:
354
+ return ""
355
+ text = text.replace("\x00", " ")
356
+ text = re.sub(r"[ \t]+", " ", text)
357
+ text = re.sub(r"\n{3,}", "\n\n", text)
358
+ return text.strip()
359
+
360
+
361
+ def _extract_text_from_pdf_bytes(file_bytes: bytes) -> str:
362
+ reader = PdfReader(io.BytesIO(file_bytes))
363
+ pages = []
364
+
365
+ for page in reader.pages:
366
+ try:
367
+ pages.append(page.extract_text() or "")
368
+ except Exception:
369
+ pages.append("")
370
+
371
+ return _clean_extracted_text("\n\n".join(pages))
372
+
373
+
374
+ def _extract_text_from_docx_bytes(file_bytes: bytes) -> str:
375
+ doc = Document(io.BytesIO(file_bytes))
376
+ lines: List[str] = []
377
+
378
+ for p in doc.paragraphs:
379
+ txt = (p.text or "").strip()
380
+ if txt:
381
+ lines.append(txt)
382
+
383
+ for table in doc.tables:
384
+ for row in table.rows:
385
+ row_text = " | ".join((cell.text or "").strip() for cell in row.cells if (cell.text or "").strip())
386
+ if row_text:
387
+ lines.append(row_text)
388
+
389
+ return _clean_extracted_text("\n".join(lines))
390
+
391
+
392
+ def _extract_course_source_text(filename: str, file_bytes: bytes) -> str:
393
+ ext = filename.rsplit(".", 1)[1].lower()
394
+
395
+ if ext == "pdf":
396
+ return _extract_text_from_pdf_bytes(file_bytes)
397
+
398
+ if ext == "docx":
399
+ return _extract_text_from_docx_bytes(file_bytes)
400
+
401
+ raise ValueError("Unsupported file type")
402
+
403
+
404
+ def _truncate_source_text(text: str, limit: int = MAX_SOURCE_TEXT_CHARS) -> str:
405
+ if len(text) <= limit:
406
+ return text
407
+ return text[:limit]
408
+
409
+
410
+ def _build_course_outline_prompt(source_text: str, filename: str) -> str:
411
+ return f"""
412
+ You are designing a practical learning course outline from source material.
413
+
414
+ Return STRICT JSON only with this exact shape:
415
+ {{
416
+ "courseTitle": "string",
417
+ "courseDescription": "string",
418
+ "difficulty": "beginner|intermediate|advanced",
419
+ "category": "string",
420
+ "courseType": "string",
421
+ "estimatedTotalDuration": "string",
422
+ "learningObjectives": ["string"],
423
+ "modules": [
424
+ {{
425
+ "type": "lesson|quiz|assignment|review",
426
+ "title": "string",
427
+ "description": "string",
428
+ "duration": "e.g. 20m or 1h",
429
+ "content": "only for lesson when useful",
430
+ "assignmentPrompt": "only for assignment when useful",
431
+ "answerKey": "only for assignment when useful",
432
+ "questions": [
433
+ {{
434
+ "question": "string",
435
+ "options": ["string", "string", "string", "string"],
436
+ "correctAnswer": 0
437
+ }}
438
+ ]
439
+ }}
440
+ ],
441
+ "warnings": ["string"]
442
+ }}
443
+
444
+ Rules:
445
+ - Build a course outline grounded in the uploaded document.
446
+ - Prefer 4 to 12 modules unless the source strongly suggests otherwise.
447
+ - Most modules should be lessons.
448
+ - Include quizzes only where knowledge checks make sense.
449
+ - Include assignments only when there is something practical to apply.
450
+ - Include review modules only when useful for recap.
451
+ - Every lesson must have a realistic duration estimate.
452
+ - estimatedTotalDuration must reflect the sum of lesson durations approximately.
453
+ - Keep titles practical and clean.
454
+ - Do not invent niche facts that are not supported by the source.
455
+ - If the document is too thin, still produce a usable outline and add a warning.
456
+ - If the content looks like a scanned PDF with poor extraction, say so in warnings.
457
+
458
+ Filename: {filename}
459
+
460
+ Source document text:
461
+ {source_text}
462
+ """.strip()
463
+
464
+
465
+ def _normalize_outline_json(ai_result: Dict[str, Any]) -> Dict[str, Any]:
466
+ raw_modules = ai_result.get("modules") or []
467
+ out_modules = []
468
+
469
+ for idx, mod in enumerate(raw_modules):
470
+ mtype = str(mod.get("type") or "lesson").strip().lower()
471
+ if mtype not in ["lesson", "quiz", "assignment", "review"]:
472
+ mtype = "lesson"
473
+
474
+ base = {
475
+ "id": f"module-{idx + 1}",
476
+ "type": mtype,
477
+ "title": str(mod.get("title") or f"Module {idx + 1}").strip(),
478
+ "description": str(mod.get("description") or "").strip(),
479
+ }
480
+
481
+ if mtype == "lesson":
482
+ base["duration"] = str(mod.get("duration") or "20m").strip()
483
+ base["content"] = str(mod.get("content") or "").strip()
484
+ base["videoUrls"] = []
485
+ base["imageUrls"] = []
486
+
487
+ elif mtype == "quiz":
488
+ questions = []
489
+ for qidx, q in enumerate(mod.get("questions") or []):
490
+ options = q.get("options") or []
491
+ while len(options) < 4:
492
+ options.append("")
493
+ questions.append({
494
+ "id": f"q-{idx + 1}-{qidx + 1}",
495
+ "question": str(q.get("question") or "").strip(),
496
+ "options": [str(x or "").strip() for x in options[:4]],
497
+ "correctAnswer": int(q.get("correctAnswer") or 0),
498
+ })
499
+ base["questions"] = questions
500
+
501
+ elif mtype == "assignment":
502
+ base["assignmentPrompt"] = str(mod.get("assignmentPrompt") or "").strip()
503
+ base["answerKey"] = str(mod.get("answerKey") or "").strip()
504
+
505
+ out_modules.append(base)
506
+
507
+ return {
508
+ "courseTitle": str(ai_result.get("courseTitle") or "").strip(),
509
+ "courseDescription": str(ai_result.get("courseDescription") or "").strip(),
510
+ "difficulty": str(ai_result.get("difficulty") or "beginner").strip().lower(),
511
+ "category": str(ai_result.get("category") or "General").strip(),
512
+ "courseType": str(ai_result.get("courseType") or "Foundational").strip(),
513
+ "estimatedTotalDuration": str(ai_result.get("estimatedTotalDuration") or "").strip(),
514
+ "learningObjectives": [
515
+ str(x).strip() for x in (ai_result.get("learningObjectives") or []) if str(x).strip()
516
+ ],
517
+ "modules": out_modules,
518
+ "warnings": [
519
+ str(x).strip() for x in (ai_result.get("warnings") or []) if str(x).strip()
520
+ ],
521
+ }
522
+
523
  # -- route ---------------------------------------------------------------
524
 
525
  @app.route('/chat', methods=['POST'])
 
859
  "error": "Failed to analyse intervention update"
860
  }), 500
861
 
862
+ @app.route('/generate-course-outline', methods=['POST'])
863
+ def generate_course_outline():
864
+ """
865
+ multipart/form-data:
866
+ - file: pdf or docx
867
+ - role: operations | consultant | admin | ...
868
+ - companyCode: ...
869
+ - userId: ...
870
+
871
+ Response:
872
+ {
873
+ "reply": "Course outline generated successfully",
874
+ "outline": {
875
+ "courseTitle": "...",
876
+ "courseDescription": "...",
877
+ "difficulty": "...",
878
+ "category": "...",
879
+ "courseType": "...",
880
+ "estimatedTotalDuration": "...",
881
+ "learningObjectives": [],
882
+ "modules": [],
883
+ "warnings": []
884
+ },
885
+ "meta": {
886
+ "filename": "...",
887
+ "contentType": "...",
888
+ "extractedChars": 12345,
889
+ "truncated": false
890
+ }
891
+ }
892
+ """
893
+ try:
894
+ role = request.form.get('role')
895
+ company_code = request.form.get('companyCode')
896
+ user_id = request.form.get('userId')
897
+ uploaded = request.files.get('file')
898
+
899
+ if not role or not company_code or not user_id:
900
+ return jsonify({
901
+ "error": "Missing role, companyCode, or userId"
902
+ }), 400
903
+
904
+ if uploaded is None:
905
+ return jsonify({
906
+ "error": "Missing file"
907
+ }), 400
908
+
909
+ filename = uploaded.filename or ""
910
+ if not _allowed_course_source(filename):
911
+ return jsonify({
912
+ "error": "Only PDF and DOCX files are supported"
913
+ }), 400
914
+
915
+ file_bytes = uploaded.read()
916
+ if not file_bytes:
917
+ return jsonify({
918
+ "error": "Uploaded file is empty"
919
+ }), 400
920
+
921
+ extracted_text = _extract_course_source_text(filename, file_bytes)
922
+ if not extracted_text:
923
+ return jsonify({
924
+ "error": "Could not extract readable text from the uploaded file"
925
+ }), 400
926
+
927
+ truncated_text = _truncate_source_text(extracted_text)
928
+ was_truncated = len(truncated_text) < len(extracted_text)
929
+
930
+ system_msg = {
931
+ "role": "system",
932
+ "content": (
933
+ "You generate practical LMS course outlines from uploaded documents. "
934
+ "Return strict JSON only."
935
+ )
936
+ }
937
+
938
+ user_msg = {
939
+ "role": "user",
940
+ "content": _build_course_outline_prompt(truncated_text, filename)
941
+ }
942
+
943
+ ai_raw = ask_gpt([system_msg, user_msg])
944
+ ai_result = _extract_json_block(ai_raw)
945
+ outline = _normalize_outline_json(ai_result)
946
+
947
+ if was_truncated:
948
+ outline["warnings"] = outline.get("warnings", [])
949
+ outline["warnings"].append(
950
+ "The source document was long, so only the first portion was used to generate this outline."
951
+ )
952
+
953
+ return jsonify({
954
+ "reply": "Course outline generated successfully",
955
+ "outline": outline,
956
+ "meta": {
957
+ "filename": filename,
958
+ "contentType": uploaded.content_type,
959
+ "extractedChars": len(extracted_text),
960
+ "truncated": was_truncated,
961
+ }
962
+ })
963
+
964
+ except Exception as e:
965
+ print("generate_course_outline_failed:", e)
966
+ return jsonify({
967
+ "error": "Failed to generate course outline from file"
968
+ }), 500
969
+
970
 
971
  if __name__ == "__main__":
972
  app.run(host="0.0.0.0", port=7860)