Spaces:

yoursdvniel
/

SmartInc-API

Running

App Files Files Community

yoursdvniel commited on 7 days ago

Commit

5977914

verified ·

1 Parent(s): efc826f

Update main.py

Browse files

Files changed (1) hide show

main.py +297 -4

main.py CHANGED Viewed

@@ -2,16 +2,20 @@ from flask import Flask, request, jsonify
 from flask_cors import CORS
 import json
 from datetime import datetime
-from typing import Optional, Dict, Any
 import re
-from typing import List
 from firestore_client import get_firestore_client
 from gemini_client import ask_gpt
 from prompt_instructions import build_system_message
-from role_access import get_allowed_collections  # (currently unused but kept)
 from data_fetcher import fetch_data_from_firestore
-from data_planner import determine_data_requirements  # 🧠 Gemini planner
 from resolver import resolve_user_context
 from schema_utils import has_field, resolve_field
@@ -335,6 +339,187 @@ def _calculate_progress_suggestion(intervention: Dict[str, Any], ai_result: Dict
         "overTargetBy": 0,
     }
 # -- route ---------------------------------------------------------------
 @app.route('/chat', methods=['POST'])
@@ -674,6 +859,114 @@ def analyze_intervention_update():
             "error": "Failed to analyse intervention update"
         }), 500
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)

 from flask_cors import CORS
 import json
 from datetime import datetime
+from typing import Optional, Dict, Any, List
 import re
+import os
+import io
+from pypdf import PdfReader
+from docx import Document
 from firestore_client import get_firestore_client
 from gemini_client import ask_gpt
 from prompt_instructions import build_system_message
+from role_access import get_allowed_collections
 from data_fetcher import fetch_data_from_firestore
+from data_planner import determine_data_requirements
 from resolver import resolve_user_context
 from schema_utils import has_field, resolve_field
         "overTargetBy": 0,
     }
+ALLOWED_COURSE_SOURCE_EXTENSIONS = {"pdf", "docx"}
+MAX_SOURCE_TEXT_CHARS = 60000
+def _allowed_course_source(filename: str) -> bool:
+    if not filename or "." not in filename:
+        return False
+    return filename.rsplit(".", 1)[1].lower() in ALLOWED_COURSE_SOURCE_EXTENSIONS
+def _clean_extracted_text(text: str) -> str:
+    if not text:
+        return ""
+    text = text.replace("\x00", " ")
+    text = re.sub(r"[ \t]+", " ", text)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
+def _extract_text_from_pdf_bytes(file_bytes: bytes) -> str:
+    reader = PdfReader(io.BytesIO(file_bytes))
+    pages = []
+    for page in reader.pages:
+        try:
+            pages.append(page.extract_text() or "")
+        except Exception:
+            pages.append("")
+    return _clean_extracted_text("\n\n".join(pages))
+def _extract_text_from_docx_bytes(file_bytes: bytes) -> str:
+    doc = Document(io.BytesIO(file_bytes))
+    lines: List[str] = []
+    for p in doc.paragraphs:
+        txt = (p.text or "").strip()
+        if txt:
+            lines.append(txt)
+    for table in doc.tables:
+        for row in table.rows:
+            row_text = " | ".join((cell.text or "").strip() for cell in row.cells if (cell.text or "").strip())
+            if row_text:
+                lines.append(row_text)
+    return _clean_extracted_text("\n".join(lines))
+def _extract_course_source_text(filename: str, file_bytes: bytes) -> str:
+    ext = filename.rsplit(".", 1)[1].lower()
+    if ext == "pdf":
+        return _extract_text_from_pdf_bytes(file_bytes)
+    if ext == "docx":
+        return _extract_text_from_docx_bytes(file_bytes)
+    raise ValueError("Unsupported file type")
+def _truncate_source_text(text: str, limit: int = MAX_SOURCE_TEXT_CHARS) -> str:
+    if len(text) <= limit:
+        return text
+    return text[:limit]
+def _build_course_outline_prompt(source_text: str, filename: str) -> str:
+    return f"""
+You are designing a practical learning course outline from source material.
+Return STRICT JSON only with this exact shape:
+{{
+  "courseTitle": "string",
+  "courseDescription": "string",
+  "difficulty": "beginner|intermediate|advanced",
+  "category": "string",
+  "courseType": "string",
+  "estimatedTotalDuration": "string",
+  "learningObjectives": ["string"],
+  "modules": [
+    {{
+      "type": "lesson|quiz|assignment|review",
+      "title": "string",
+      "description": "string",
+      "duration": "e.g. 20m or 1h",
+      "content": "only for lesson when useful",
+      "assignmentPrompt": "only for assignment when useful",
+      "answerKey": "only for assignment when useful",
+      "questions": [
+        {{
+          "question": "string",
+          "options": ["string", "string", "string", "string"],
+          "correctAnswer": 0
+        }}
+      ]
+    }}
+  ],
+  "warnings": ["string"]
+}}
+Rules:
+- Build a course outline grounded in the uploaded document.
+- Prefer 4 to 12 modules unless the source strongly suggests otherwise.
+- Most modules should be lessons.
+- Include quizzes only where knowledge checks make sense.
+- Include assignments only when there is something practical to apply.
+- Include review modules only when useful for recap.
+- Every lesson must have a realistic duration estimate.
+- estimatedTotalDuration must reflect the sum of lesson durations approximately.
+- Keep titles practical and clean.
+- Do not invent niche facts that are not supported by the source.
+- If the document is too thin, still produce a usable outline and add a warning.
+- If the content looks like a scanned PDF with poor extraction, say so in warnings.
+Filename: {filename}
+Source document text:
+{source_text}
+""".strip()
+def _normalize_outline_json(ai_result: Dict[str, Any]) -> Dict[str, Any]:
+    raw_modules = ai_result.get("modules") or []
+    out_modules = []
+    for idx, mod in enumerate(raw_modules):
+        mtype = str(mod.get("type") or "lesson").strip().lower()
+        if mtype not in ["lesson", "quiz", "assignment", "review"]:
+            mtype = "lesson"
+        base = {
+            "id": f"module-{idx + 1}",
+            "type": mtype,
+            "title": str(mod.get("title") or f"Module {idx + 1}").strip(),
+            "description": str(mod.get("description") or "").strip(),
+        }
+        if mtype == "lesson":
+            base["duration"] = str(mod.get("duration") or "20m").strip()
+            base["content"] = str(mod.get("content") or "").strip()
+            base["videoUrls"] = []
+            base["imageUrls"] = []
+        elif mtype == "quiz":
+            questions = []
+            for qidx, q in enumerate(mod.get("questions") or []):
+                options = q.get("options") or []
+                while len(options) < 4:
+                    options.append("")
+                questions.append({
+                    "id": f"q-{idx + 1}-{qidx + 1}",
+                    "question": str(q.get("question") or "").strip(),
+                    "options": [str(x or "").strip() for x in options[:4]],
+                    "correctAnswer": int(q.get("correctAnswer") or 0),
+                })
+            base["questions"] = questions
+        elif mtype == "assignment":
+            base["assignmentPrompt"] = str(mod.get("assignmentPrompt") or "").strip()
+            base["answerKey"] = str(mod.get("answerKey") or "").strip()
+        out_modules.append(base)
+    return {
+        "courseTitle": str(ai_result.get("courseTitle") or "").strip(),
+        "courseDescription": str(ai_result.get("courseDescription") or "").strip(),
+        "difficulty": str(ai_result.get("difficulty") or "beginner").strip().lower(),
+        "category": str(ai_result.get("category") or "General").strip(),
+        "courseType": str(ai_result.get("courseType") or "Foundational").strip(),
+        "estimatedTotalDuration": str(ai_result.get("estimatedTotalDuration") or "").strip(),
+        "learningObjectives": [
+            str(x).strip() for x in (ai_result.get("learningObjectives") or []) if str(x).strip()
+        ],
+        "modules": out_modules,
+        "warnings": [
+            str(x).strip() for x in (ai_result.get("warnings") or []) if str(x).strip()
+        ],
+    }
 # -- route ---------------------------------------------------------------
 @app.route('/chat', methods=['POST'])
             "error": "Failed to analyse intervention update"
         }), 500
+@app.route('/generate-course-outline', methods=['POST'])
+def generate_course_outline():
+    """
+    multipart/form-data:
+      - file: pdf or docx
+      - role: operations | consultant | admin | ...
+      - companyCode: ...
+      - userId: ...
+    Response:
+    {
+      "reply": "Course outline generated successfully",
+      "outline": {
+        "courseTitle": "...",
+        "courseDescription": "...",
+        "difficulty": "...",
+        "category": "...",
+        "courseType": "...",
+        "estimatedTotalDuration": "...",
+        "learningObjectives": [],
+        "modules": [],
+        "warnings": []
+      },
+      "meta": {
+        "filename": "...",
+        "contentType": "...",
+        "extractedChars": 12345,
+        "truncated": false
+      }
+    }
+    """
+    try:
+        role = request.form.get('role')
+        company_code = request.form.get('companyCode')
+        user_id = request.form.get('userId')
+        uploaded = request.files.get('file')
+        if not role or not company_code or not user_id:
+            return jsonify({
+                "error": "Missing role, companyCode, or userId"
+            }), 400
+        if uploaded is None:
+            return jsonify({
+                "error": "Missing file"
+            }), 400
+        filename = uploaded.filename or ""
+        if not _allowed_course_source(filename):
+            return jsonify({
+                "error": "Only PDF and DOCX files are supported"
+            }), 400
+        file_bytes = uploaded.read()
+        if not file_bytes:
+            return jsonify({
+                "error": "Uploaded file is empty"
+            }), 400
+        extracted_text = _extract_course_source_text(filename, file_bytes)
+        if not extracted_text:
+            return jsonify({
+                "error": "Could not extract readable text from the uploaded file"
+            }), 400
+        truncated_text = _truncate_source_text(extracted_text)
+        was_truncated = len(truncated_text) < len(extracted_text)
+        system_msg = {
+            "role": "system",
+            "content": (
+                "You generate practical LMS course outlines from uploaded documents. "
+                "Return strict JSON only."
+            )
+        }
+        user_msg = {
+            "role": "user",
+            "content": _build_course_outline_prompt(truncated_text, filename)
+        }
+        ai_raw = ask_gpt([system_msg, user_msg])
+        ai_result = _extract_json_block(ai_raw)
+        outline = _normalize_outline_json(ai_result)
+        if was_truncated:
+            outline["warnings"] = outline.get("warnings", [])
+            outline["warnings"].append(
+                "The source document was long, so only the first portion was used to generate this outline."
+            )
+        return jsonify({
+            "reply": "Course outline generated successfully",
+            "outline": outline,
+            "meta": {
+                "filename": filename,
+                "contentType": uploaded.content_type,
+                "extractedChars": len(extracted_text),
+                "truncated": was_truncated,
+            }
+        })
+    except Exception as e:
+        print("generate_course_outline_failed:", e)
+        return jsonify({
+            "error": "Failed to generate course outline from file"
+        }), 500
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)