Spaces:
Sleeping
Sleeping
Reset to old endpoints
Browse files
main.py
CHANGED
|
@@ -520,160 +520,6 @@ def _normalize_outline_json(ai_result: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 520 |
],
|
| 521 |
}
|
| 522 |
|
| 523 |
-
ALLOWED_INTAKE_EXTENSIONS = {"pdf", "docx", "txt"}
|
| 524 |
-
MAX_INTAKE_DOC_CHARS = 25000
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
def _allowed_intake_source(filename: str) -> bool:
|
| 528 |
-
return bool(filename and "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_INTAKE_EXTENSIONS)
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
def _extract_text_from_txt_bytes(file_bytes: bytes) -> str:
|
| 532 |
-
return _clean_extracted_text(file_bytes.decode("utf-8", errors="ignore"))
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
def _extract_intake_document_text(filename: str, file_bytes: bytes) -> str:
|
| 536 |
-
ext = filename.rsplit(".", 1)[1].lower()
|
| 537 |
-
|
| 538 |
-
if ext == "pdf":
|
| 539 |
-
return _extract_text_from_pdf_bytes(file_bytes)
|
| 540 |
-
|
| 541 |
-
if ext == "docx":
|
| 542 |
-
return _extract_text_from_docx_bytes(file_bytes)
|
| 543 |
-
|
| 544 |
-
if ext == "txt":
|
| 545 |
-
return _extract_text_from_txt_bytes(file_bytes)
|
| 546 |
-
|
| 547 |
-
raise ValueError("Unsupported file type")
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
def _build_sme_intake_prompt(payload: Dict[str, Any]) -> str:
|
| 551 |
-
return f"""
|
| 552 |
-
You are helping an SME complete an incubation application.
|
| 553 |
-
|
| 554 |
-
The user may write in English, Shona, Ndebele, Zulu, or a mixture.
|
| 555 |
-
Understand the mixed language, but return clean STRICT JSON only.
|
| 556 |
-
|
| 557 |
-
Return this exact JSON shape:
|
| 558 |
-
{{
|
| 559 |
-
"detectedLanguages": ["English"],
|
| 560 |
-
"profilePatch": {{
|
| 561 |
-
"participantName": null,
|
| 562 |
-
"gender": null,
|
| 563 |
-
"idNumber": null,
|
| 564 |
-
"phone": null,
|
| 565 |
-
"beneficiaryName": null,
|
| 566 |
-
"sector": null,
|
| 567 |
-
"natureOfBusiness": null,
|
| 568 |
-
"beeLevel": null,
|
| 569 |
-
"youthOwnedPercent": null,
|
| 570 |
-
"femaleOwnedPercent": null,
|
| 571 |
-
"blackOwnedPercent": null,
|
| 572 |
-
"dateOfRegistration": null,
|
| 573 |
-
"yearsOfTrading": null,
|
| 574 |
-
"registrationNumber": null,
|
| 575 |
-
"businessAddress": null,
|
| 576 |
-
"city": null,
|
| 577 |
-
"postalCode": null,
|
| 578 |
-
"province": null,
|
| 579 |
-
"hub": null,
|
| 580 |
-
"location": null
|
| 581 |
-
}},
|
| 582 |
-
"smeAssessment": {{
|
| 583 |
-
"urgencyLevel": "low|medium|high|urgent",
|
| 584 |
-
"urgencyScore": 0,
|
| 585 |
-
"businessStage": "idea|startup|early_growth|established|stagnant|struggling|distressed|growth_ready|unknown",
|
| 586 |
-
"riskLevel": "low|medium|high",
|
| 587 |
-
"priorityInterventionAreas": [
|
| 588 |
-
{
|
| 589 |
-
"area": "Financial Compliance|HSE & Labour Compliance|Legal Advisory Services|Market Linkages|PDS|Wellness Services|Training Academy|Marketing and Communication|ROM",
|
| 590 |
-
"urgency": "low|medium|high|urgent",
|
| 591 |
-
"reason": "",
|
| 592 |
-
"suggestedInterventions": []
|
| 593 |
-
}
|
| 594 |
-
],
|
| 595 |
-
"classificationReasons": [],
|
| 596 |
-
"redFlags": [],
|
| 597 |
-
"growthSignals": []
|
| 598 |
-
}},
|
| 599 |
-
"applicationPatch": {{
|
| 600 |
-
"motivation": null,
|
| 601 |
-
"challenges": null,
|
| 602 |
-
"facebook": null,
|
| 603 |
-
"instagram": null,
|
| 604 |
-
"linkedIn": null,
|
| 605 |
-
"profile": {{}}
|
| 606 |
-
}},
|
| 607 |
-
"businessStory": {{
|
| 608 |
-
"summary": "",
|
| 609 |
-
"polished": "",
|
| 610 |
-
"strengths": [],
|
| 611 |
-
"risks": []
|
| 612 |
-
}},
|
| 613 |
-
"documentFindings": [
|
| 614 |
-
{{
|
| 615 |
-
"documentType": "Certified ID Copy|Proof of Address|B-BBEE Certificate|Tax PIN|CIPC|Management Accounts|Three Months Bank Statements|Unknown",
|
| 616 |
-
"filename": "",
|
| 617 |
-
"extractedFields": {{}},
|
| 618 |
-
"confidence": 0,
|
| 619 |
-
"warnings": []
|
| 620 |
-
}}
|
| 621 |
-
],
|
| 622 |
-
"missingFields": [
|
| 623 |
-
{{
|
| 624 |
-
"field": "",
|
| 625 |
-
"section": "profile|application|program",
|
| 626 |
-
"label": "",
|
| 627 |
-
"question": "",
|
| 628 |
-
"reason": ""
|
| 629 |
-
}}
|
| 630 |
-
],
|
| 631 |
-
"missingDocuments": [
|
| 632 |
-
{{
|
| 633 |
-
"type": "",
|
| 634 |
-
"reason": ""
|
| 635 |
-
}}
|
| 636 |
-
],
|
| 637 |
-
"nextQuestion": "",
|
| 638 |
-
"confidence": 0,
|
| 639 |
-
"warnings": []
|
| 640 |
-
}}
|
| 641 |
-
|
| 642 |
-
Rules:
|
| 643 |
-
- Do not invent values.
|
| 644 |
-
- Use documents for official values like ID number, registration number, business name, registration date, address, B-BBEE level, tax details.
|
| 645 |
-
- If a document contradicts typed text, keep the document value and add a warning.
|
| 646 |
-
- Only return fields that are reasonably supported.
|
| 647 |
-
- For programQuestions, fill applicationPatch.profile using the question id as the key.
|
| 648 |
-
- Ask one best next question in nextQuestion.
|
| 649 |
-
- If nothing important is missing, nextQuestion must be empty.
|
| 650 |
-
- Use South African SMME/incubation context.
|
| 651 |
-
|
| 652 |
-
SME assessment rules:
|
| 653 |
-
- urgencyScore must be 0 to 100.
|
| 654 |
-
- urgent = 80-100, high = 60-79, medium = 35-59, low = 0-34.
|
| 655 |
-
- Classify businessStage using the available facts:
|
| 656 |
-
- idea/startup: newly registered, little or no trading/revenue, business still forming.
|
| 657 |
-
- early_growth: trading with some traction but systems/compliance/markets still weak.
|
| 658 |
-
- established: trading for several years with stable operations.
|
| 659 |
-
- stagnant: revenue, staff, market access, or growth has not improved for a long period.
|
| 660 |
-
- struggling: serious operational, compliance, cash flow, market, staffing, or documentation gaps.
|
| 661 |
-
- distressed: severe survival risk, legal/financial/compliance crisis, or inability to trade normally.
|
| 662 |
-
- growth_ready: stable/compliant business needing market access, finance, procurement, branding, or scale support.
|
| 663 |
-
- unknown: not enough evidence.
|
| 664 |
-
- Rate urgency higher when there are signs of missing compliance documents, no CIPC/tax/B-BBEE, no bank statements, weak financial records, no contracts, labour/HSE exposure, stagnant revenue, low market access, legal risks, poor business planning, or severe founder/business challenges.
|
| 665 |
-
- Priority intervention areas must align with Lepharo departments:
|
| 666 |
-
ROM, HSE & Labour Compliance, Financial Compliance, PDS, Market Linkages, Legal Advisory Services, Wellness Services, Training Academy, Marketing and Communication.
|
| 667 |
-
- classificationReasons must explain why the stage and urgency were selected.
|
| 668 |
-
- redFlags must list serious risks.
|
| 669 |
-
- growthSignals must list positive signs.
|
| 670 |
-
- suggestedInterventions must be practical intervention titles, not vague advice.
|
| 671 |
-
|
| 672 |
-
Payload:
|
| 673 |
-
{json.dumps(payload, ensure_ascii=False)}
|
| 674 |
-
""".strip()
|
| 675 |
-
|
| 676 |
-
|
| 677 |
# -- route ---------------------------------------------------------------
|
| 678 |
|
| 679 |
@app.route('/chat', methods=['POST'])
|
|
@@ -1121,78 +967,6 @@ def generate_course_outline():
|
|
| 1121 |
"error": "Failed to generate course outline from file"
|
| 1122 |
}), 500
|
| 1123 |
|
| 1124 |
-
@app.route('/analyze-sme-application-intake', methods=['POST'])
|
| 1125 |
-
def analyze_sme_application_intake():
|
| 1126 |
-
try:
|
| 1127 |
-
role = request.form.get("role")
|
| 1128 |
-
company_code = request.form.get("companyCode")
|
| 1129 |
-
user_id = request.form.get("userId")
|
| 1130 |
-
program_id = request.form.get("programId")
|
| 1131 |
-
program_name = request.form.get("programName")
|
| 1132 |
-
raw_story = request.form.get("rawStory") or ""
|
| 1133 |
-
|
| 1134 |
-
profile_values = json.loads(request.form.get("profileValuesJson") or "{}")
|
| 1135 |
-
application_values = json.loads(request.form.get("applicationValuesJson") or "{}")
|
| 1136 |
-
program_questions = json.loads(request.form.get("programQuestionsJson") or "[]")
|
| 1137 |
-
required_documents = json.loads(request.form.get("requiredDocumentsJson") or "[]")
|
| 1138 |
-
|
| 1139 |
-
if not role or not company_code or not user_id:
|
| 1140 |
-
return jsonify({"error": "Missing role, companyCode, or userId"}), 400
|
| 1141 |
-
|
| 1142 |
-
uploaded_files = request.files.getlist("files")
|
| 1143 |
-
document_payloads = []
|
| 1144 |
-
|
| 1145 |
-
for uploaded in uploaded_files:
|
| 1146 |
-
filename = uploaded.filename or ""
|
| 1147 |
-
|
| 1148 |
-
if not _allowed_intake_source(filename):
|
| 1149 |
-
document_payloads.append({
|
| 1150 |
-
"filename": filename,
|
| 1151 |
-
"text": "",
|
| 1152 |
-
"warning": "Unsupported file type"
|
| 1153 |
-
})
|
| 1154 |
-
continue
|
| 1155 |
-
|
| 1156 |
-
file_bytes = uploaded.read()
|
| 1157 |
-
extracted = _extract_intake_document_text(filename, file_bytes)
|
| 1158 |
-
document_payloads.append({
|
| 1159 |
-
"filename": filename,
|
| 1160 |
-
"contentType": uploaded.content_type,
|
| 1161 |
-
"text": _truncate_source_text(extracted, MAX_INTAKE_DOC_CHARS)
|
| 1162 |
-
})
|
| 1163 |
-
|
| 1164 |
-
payload = {
|
| 1165 |
-
"role": role,
|
| 1166 |
-
"companyCode": company_code,
|
| 1167 |
-
"userId": user_id,
|
| 1168 |
-
"programId": program_id,
|
| 1169 |
-
"programName": program_name,
|
| 1170 |
-
"rawStory": raw_story,
|
| 1171 |
-
"currentProfileValues": profile_values,
|
| 1172 |
-
"currentApplicationValues": application_values,
|
| 1173 |
-
"programQuestions": program_questions,
|
| 1174 |
-
"requiredDocuments": required_documents,
|
| 1175 |
-
"documents": document_payloads
|
| 1176 |
-
}
|
| 1177 |
-
|
| 1178 |
-
system_msg = {
|
| 1179 |
-
"role": "system",
|
| 1180 |
-
"content": "You extract SME application data. Return strict JSON only."
|
| 1181 |
-
}
|
| 1182 |
-
|
| 1183 |
-
user_msg = {
|
| 1184 |
-
"role": "user",
|
| 1185 |
-
"content": _build_sme_intake_prompt(payload)
|
| 1186 |
-
}
|
| 1187 |
-
|
| 1188 |
-
ai_raw = ask_gpt([system_msg, user_msg])
|
| 1189 |
-
parsed = _extract_json_block(ai_raw)
|
| 1190 |
-
|
| 1191 |
-
return jsonify(to_jsonable(parsed))
|
| 1192 |
-
|
| 1193 |
-
except Exception as e:
|
| 1194 |
-
print("analyze_sme_application_intake_failed:", e)
|
| 1195 |
-
return jsonify({"error": "Failed to analyse SME application intake"}), 500
|
| 1196 |
|
| 1197 |
if __name__ == "__main__":
|
| 1198 |
app.run(host="0.0.0.0", port=7860)
|
|
|
|
| 520 |
],
|
| 521 |
}
|
| 522 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
# -- route ---------------------------------------------------------------
|
| 524 |
|
| 525 |
@app.route('/chat', methods=['POST'])
|
|
|
|
| 967 |
"error": "Failed to generate course outline from file"
|
| 968 |
}), 500
|
| 969 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 970 |
|
| 971 |
if __name__ == "__main__":
|
| 972 |
app.run(host="0.0.0.0", port=7860)
|