yoursdvniel commited on
Commit
9fed675
·
verified ·
1 Parent(s): 9ed036f

Reset to old endpoints

Browse files
Files changed (1) hide show
  1. main.py +0 -226
main.py CHANGED
@@ -520,160 +520,6 @@ def _normalize_outline_json(ai_result: Dict[str, Any]) -> Dict[str, Any]:
520
  ],
521
  }
522
 
523
- ALLOWED_INTAKE_EXTENSIONS = {"pdf", "docx", "txt"}
524
- MAX_INTAKE_DOC_CHARS = 25000
525
-
526
-
527
- def _allowed_intake_source(filename: str) -> bool:
528
- return bool(filename and "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_INTAKE_EXTENSIONS)
529
-
530
-
531
- def _extract_text_from_txt_bytes(file_bytes: bytes) -> str:
532
- return _clean_extracted_text(file_bytes.decode("utf-8", errors="ignore"))
533
-
534
-
535
- def _extract_intake_document_text(filename: str, file_bytes: bytes) -> str:
536
- ext = filename.rsplit(".", 1)[1].lower()
537
-
538
- if ext == "pdf":
539
- return _extract_text_from_pdf_bytes(file_bytes)
540
-
541
- if ext == "docx":
542
- return _extract_text_from_docx_bytes(file_bytes)
543
-
544
- if ext == "txt":
545
- return _extract_text_from_txt_bytes(file_bytes)
546
-
547
- raise ValueError("Unsupported file type")
548
-
549
-
550
- def _build_sme_intake_prompt(payload: Dict[str, Any]) -> str:
551
- return f"""
552
- You are helping an SME complete an incubation application.
553
-
554
- The user may write in English, Shona, Ndebele, Zulu, or a mixture.
555
- Understand the mixed language, but return clean STRICT JSON only.
556
-
557
- Return this exact JSON shape:
558
- {{
559
- "detectedLanguages": ["English"],
560
- "profilePatch": {{
561
- "participantName": null,
562
- "gender": null,
563
- "idNumber": null,
564
- "phone": null,
565
- "beneficiaryName": null,
566
- "sector": null,
567
- "natureOfBusiness": null,
568
- "beeLevel": null,
569
- "youthOwnedPercent": null,
570
- "femaleOwnedPercent": null,
571
- "blackOwnedPercent": null,
572
- "dateOfRegistration": null,
573
- "yearsOfTrading": null,
574
- "registrationNumber": null,
575
- "businessAddress": null,
576
- "city": null,
577
- "postalCode": null,
578
- "province": null,
579
- "hub": null,
580
- "location": null
581
- }},
582
- "smeAssessment": {{
583
- "urgencyLevel": "low|medium|high|urgent",
584
- "urgencyScore": 0,
585
- "businessStage": "idea|startup|early_growth|established|stagnant|struggling|distressed|growth_ready|unknown",
586
- "riskLevel": "low|medium|high",
587
- "priorityInterventionAreas": [
588
- {
589
- "area": "Financial Compliance|HSE & Labour Compliance|Legal Advisory Services|Market Linkages|PDS|Wellness Services|Training Academy|Marketing and Communication|ROM",
590
- "urgency": "low|medium|high|urgent",
591
- "reason": "",
592
- "suggestedInterventions": []
593
- }
594
- ],
595
- "classificationReasons": [],
596
- "redFlags": [],
597
- "growthSignals": []
598
- }},
599
- "applicationPatch": {{
600
- "motivation": null,
601
- "challenges": null,
602
- "facebook": null,
603
- "instagram": null,
604
- "linkedIn": null,
605
- "profile": {{}}
606
- }},
607
- "businessStory": {{
608
- "summary": "",
609
- "polished": "",
610
- "strengths": [],
611
- "risks": []
612
- }},
613
- "documentFindings": [
614
- {{
615
- "documentType": "Certified ID Copy|Proof of Address|B-BBEE Certificate|Tax PIN|CIPC|Management Accounts|Three Months Bank Statements|Unknown",
616
- "filename": "",
617
- "extractedFields": {{}},
618
- "confidence": 0,
619
- "warnings": []
620
- }}
621
- ],
622
- "missingFields": [
623
- {{
624
- "field": "",
625
- "section": "profile|application|program",
626
- "label": "",
627
- "question": "",
628
- "reason": ""
629
- }}
630
- ],
631
- "missingDocuments": [
632
- {{
633
- "type": "",
634
- "reason": ""
635
- }}
636
- ],
637
- "nextQuestion": "",
638
- "confidence": 0,
639
- "warnings": []
640
- }}
641
-
642
- Rules:
643
- - Do not invent values.
644
- - Use documents for official values like ID number, registration number, business name, registration date, address, B-BBEE level, tax details.
645
- - If a document contradicts typed text, keep the document value and add a warning.
646
- - Only return fields that are reasonably supported.
647
- - For programQuestions, fill applicationPatch.profile using the question id as the key.
648
- - Ask one best next question in nextQuestion.
649
- - If nothing important is missing, nextQuestion must be empty.
650
- - Use South African SMME/incubation context.
651
-
652
- SME assessment rules:
653
- - urgencyScore must be 0 to 100.
654
- - urgent = 80-100, high = 60-79, medium = 35-59, low = 0-34.
655
- - Classify businessStage using the available facts:
656
- - idea/startup: newly registered, little or no trading/revenue, business still forming.
657
- - early_growth: trading with some traction but systems/compliance/markets still weak.
658
- - established: trading for several years with stable operations.
659
- - stagnant: revenue, staff, market access, or growth has not improved for a long period.
660
- - struggling: serious operational, compliance, cash flow, market, staffing, or documentation gaps.
661
- - distressed: severe survival risk, legal/financial/compliance crisis, or inability to trade normally.
662
- - growth_ready: stable/compliant business needing market access, finance, procurement, branding, or scale support.
663
- - unknown: not enough evidence.
664
- - Rate urgency higher when there are signs of missing compliance documents, no CIPC/tax/B-BBEE, no bank statements, weak financial records, no contracts, labour/HSE exposure, stagnant revenue, low market access, legal risks, poor business planning, or severe founder/business challenges.
665
- - Priority intervention areas must align with Lepharo departments:
666
- ROM, HSE & Labour Compliance, Financial Compliance, PDS, Market Linkages, Legal Advisory Services, Wellness Services, Training Academy, Marketing and Communication.
667
- - classificationReasons must explain why the stage and urgency were selected.
668
- - redFlags must list serious risks.
669
- - growthSignals must list positive signs.
670
- - suggestedInterventions must be practical intervention titles, not vague advice.
671
-
672
- Payload:
673
- {json.dumps(payload, ensure_ascii=False)}
674
- """.strip()
675
-
676
-
677
  # -- route ---------------------------------------------------------------
678
 
679
  @app.route('/chat', methods=['POST'])
@@ -1121,78 +967,6 @@ def generate_course_outline():
1121
  "error": "Failed to generate course outline from file"
1122
  }), 500
1123
 
1124
- @app.route('/analyze-sme-application-intake', methods=['POST'])
1125
- def analyze_sme_application_intake():
1126
- try:
1127
- role = request.form.get("role")
1128
- company_code = request.form.get("companyCode")
1129
- user_id = request.form.get("userId")
1130
- program_id = request.form.get("programId")
1131
- program_name = request.form.get("programName")
1132
- raw_story = request.form.get("rawStory") or ""
1133
-
1134
- profile_values = json.loads(request.form.get("profileValuesJson") or "{}")
1135
- application_values = json.loads(request.form.get("applicationValuesJson") or "{}")
1136
- program_questions = json.loads(request.form.get("programQuestionsJson") or "[]")
1137
- required_documents = json.loads(request.form.get("requiredDocumentsJson") or "[]")
1138
-
1139
- if not role or not company_code or not user_id:
1140
- return jsonify({"error": "Missing role, companyCode, or userId"}), 400
1141
-
1142
- uploaded_files = request.files.getlist("files")
1143
- document_payloads = []
1144
-
1145
- for uploaded in uploaded_files:
1146
- filename = uploaded.filename or ""
1147
-
1148
- if not _allowed_intake_source(filename):
1149
- document_payloads.append({
1150
- "filename": filename,
1151
- "text": "",
1152
- "warning": "Unsupported file type"
1153
- })
1154
- continue
1155
-
1156
- file_bytes = uploaded.read()
1157
- extracted = _extract_intake_document_text(filename, file_bytes)
1158
- document_payloads.append({
1159
- "filename": filename,
1160
- "contentType": uploaded.content_type,
1161
- "text": _truncate_source_text(extracted, MAX_INTAKE_DOC_CHARS)
1162
- })
1163
-
1164
- payload = {
1165
- "role": role,
1166
- "companyCode": company_code,
1167
- "userId": user_id,
1168
- "programId": program_id,
1169
- "programName": program_name,
1170
- "rawStory": raw_story,
1171
- "currentProfileValues": profile_values,
1172
- "currentApplicationValues": application_values,
1173
- "programQuestions": program_questions,
1174
- "requiredDocuments": required_documents,
1175
- "documents": document_payloads
1176
- }
1177
-
1178
- system_msg = {
1179
- "role": "system",
1180
- "content": "You extract SME application data. Return strict JSON only."
1181
- }
1182
-
1183
- user_msg = {
1184
- "role": "user",
1185
- "content": _build_sme_intake_prompt(payload)
1186
- }
1187
-
1188
- ai_raw = ask_gpt([system_msg, user_msg])
1189
- parsed = _extract_json_block(ai_raw)
1190
-
1191
- return jsonify(to_jsonable(parsed))
1192
-
1193
- except Exception as e:
1194
- print("analyze_sme_application_intake_failed:", e)
1195
- return jsonify({"error": "Failed to analyse SME application intake"}), 500
1196
 
1197
  if __name__ == "__main__":
1198
  app.run(host="0.0.0.0", port=7860)
 
520
  ],
521
  }
522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  # -- route ---------------------------------------------------------------
524
 
525
  @app.route('/chat', methods=['POST'])
 
967
  "error": "Failed to generate course outline from file"
968
  }), 500
969
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
970
 
971
  if __name__ == "__main__":
972
  app.run(host="0.0.0.0", port=7860)