Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import re
|
|
|
|
| 3 |
|
| 4 |
import openai
|
| 5 |
from dotenv import load_dotenv
|
|
@@ -14,7 +15,6 @@ from pydantic import BaseModel
|
|
| 14 |
load_dotenv()
|
| 15 |
api_key = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
| 16 |
|
| 17 |
-
# You can change the model later if needed.
|
| 18 |
model_link = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 19 |
base_url = "https://router.huggingface.co/v1"
|
| 20 |
|
|
@@ -27,7 +27,12 @@ class Message(BaseModel):
|
|
| 27 |
|
| 28 |
@app.get("/", response_class=HTMLResponse)
|
| 29 |
async def read_root():
|
| 30 |
-
return "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
# =========================
|
|
@@ -163,7 +168,7 @@ def detect_intent(text: str) -> str:
|
|
| 163 |
):
|
| 164 |
return "HUMAN_AGENT"
|
| 165 |
|
| 166 |
-
# 2) Specific course types first
|
| 167 |
if any(x in t for x in ["online", "اونلاين", "اون لاين", "zoom", "زووم", "من البيت"]):
|
| 168 |
return "ONLINE_COURSES"
|
| 169 |
|
|
@@ -227,7 +232,7 @@ def detect_intent(text: str) -> str:
|
|
| 227 |
return "OTHER"
|
| 228 |
|
| 229 |
|
| 230 |
-
def detect_course_subtype(text: str) -> str
|
| 231 |
t = normalize_arabic(text)
|
| 232 |
|
| 233 |
if any(x in t for x in ["express", "اكسبريس", "سريع", "super intensive"]):
|
|
@@ -340,8 +345,6 @@ def sanitize_reply(text: str) -> str:
|
|
| 340 |
"يهتم بيك": "تحب",
|
| 341 |
"اقترحك": "أقترح لك",
|
| 342 |
"يمكنني": "أقدر",
|
| 343 |
-
"سأقوم": "ه",
|
| 344 |
-
"سوف": "ه",
|
| 345 |
"العميل": "إنت",
|
| 346 |
"المستخدم": "إنت",
|
| 347 |
}
|
|
@@ -349,21 +352,16 @@ def sanitize_reply(text: str) -> str:
|
|
| 349 |
for old, new in replacements.items():
|
| 350 |
text = text.replace(old, new)
|
| 351 |
|
| 352 |
-
# Remove bullets spam
|
| 353 |
text = re.sub(r"[•\-]{2,}", "-", text)
|
| 354 |
|
| 355 |
-
# Remove extra empty lines
|
| 356 |
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
| 357 |
lines = lines[:5]
|
| 358 |
-
|
| 359 |
text = "\n".join(lines)
|
| 360 |
|
| 361 |
-
# Avoid too much English-heavy reply
|
| 362 |
english_words = re.findall(r"[A-Za-z]{4,}", text)
|
| 363 |
if len(english_words) > 8:
|
| 364 |
return fallback_customer_service_reply()
|
| 365 |
|
| 366 |
-
# Prevent invented addresses / prices / schedules if model sneaks them in
|
| 367 |
blocked_patterns = [
|
| 368 |
r"\b\d+\s*جنيه\b",
|
| 369 |
r"\b\d+\s*egp\b",
|
|
@@ -371,9 +369,11 @@ def sanitize_reply(text: str) -> str:
|
|
| 371 |
r"\bالمعاد\b",
|
| 372 |
r"\bالساعة\b",
|
| 373 |
r"\bالسعر\b",
|
|
|
|
| 374 |
r"\bالأسعار\b",
|
| 375 |
r"\bفرع\s+\w+",
|
| 376 |
]
|
|
|
|
| 377 |
lowered = normalize_arabic(text)
|
| 378 |
for pattern in blocked_patterns:
|
| 379 |
if re.search(pattern, lowered, flags=re.IGNORECASE):
|
|
@@ -497,13 +497,11 @@ def process_text(user_text: str):
|
|
| 497 |
|
| 498 |
intent = detect_intent(user_text)
|
| 499 |
|
| 500 |
-
# 1) Rule-based first for known intents
|
| 501 |
answer = generate_rule_based_reply(intent, user_text)
|
| 502 |
if answer:
|
| 503 |
answer = sanitize_reply(answer)
|
| 504 |
return {"ok": True, "intent": intent, "reply": answer}
|
| 505 |
|
| 506 |
-
# 2) LLM only for unknown / out-of-scope style messages
|
| 507 |
llm_answer = process_with_llm(user_text)
|
| 508 |
return {"ok": True, "intent": intent, "reply": llm_answer}
|
| 509 |
|
|
@@ -514,7 +512,14 @@ def process_text(user_text: str):
|
|
| 514 |
|
| 515 |
@app.post("/processtext")
|
| 516 |
async def receive_updates(request: Request):
|
| 517 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
print("Received Update:", data)
|
| 519 |
|
| 520 |
result = process_text(data.get("message", ""))
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
+
from typing import Optional
|
| 4 |
|
| 5 |
import openai
|
| 6 |
from dotenv import load_dotenv
|
|
|
|
| 15 |
load_dotenv()
|
| 16 |
api_key = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
| 17 |
|
|
|
|
| 18 |
model_link = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 19 |
base_url = "https://router.huggingface.co/v1"
|
| 20 |
|
|
|
|
| 27 |
|
| 28 |
@app.get("/", response_class=HTMLResponse)
|
| 29 |
async def read_root():
|
| 30 |
+
return "Welcome to Up to 12 Chat Processor"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@app.get("/health")
|
| 34 |
+
async def health():
|
| 35 |
+
return {"ok": True, "service": "up"}
|
| 36 |
|
| 37 |
|
| 38 |
# =========================
|
|
|
|
| 168 |
):
|
| 169 |
return "HUMAN_AGENT"
|
| 170 |
|
| 171 |
+
# 2) Specific course types first
|
| 172 |
if any(x in t for x in ["online", "اونلاين", "اون لاين", "zoom", "زووم", "من البيت"]):
|
| 173 |
return "ONLINE_COURSES"
|
| 174 |
|
|
|
|
| 232 |
return "OTHER"
|
| 233 |
|
| 234 |
|
| 235 |
+
def detect_course_subtype(text: str) -> Optional[str]:
|
| 236 |
t = normalize_arabic(text)
|
| 237 |
|
| 238 |
if any(x in t for x in ["express", "اكسبريس", "سريع", "super intensive"]):
|
|
|
|
| 345 |
"يهتم بيك": "تحب",
|
| 346 |
"اقترحك": "أقترح لك",
|
| 347 |
"يمكنني": "أقدر",
|
|
|
|
|
|
|
| 348 |
"العميل": "إنت",
|
| 349 |
"المستخدم": "إنت",
|
| 350 |
}
|
|
|
|
| 352 |
for old, new in replacements.items():
|
| 353 |
text = text.replace(old, new)
|
| 354 |
|
|
|
|
| 355 |
text = re.sub(r"[•\-]{2,}", "-", text)
|
| 356 |
|
|
|
|
| 357 |
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
| 358 |
lines = lines[:5]
|
|
|
|
| 359 |
text = "\n".join(lines)
|
| 360 |
|
|
|
|
| 361 |
english_words = re.findall(r"[A-Za-z]{4,}", text)
|
| 362 |
if len(english_words) > 8:
|
| 363 |
return fallback_customer_service_reply()
|
| 364 |
|
|
|
|
| 365 |
blocked_patterns = [
|
| 366 |
r"\b\d+\s*جنيه\b",
|
| 367 |
r"\b\d+\s*egp\b",
|
|
|
|
| 369 |
r"\bالمعاد\b",
|
| 370 |
r"\bالساعة\b",
|
| 371 |
r"\bالسعر\b",
|
| 372 |
+
r"\bالاسعار\b",
|
| 373 |
r"\bالأسعار\b",
|
| 374 |
r"\bفرع\s+\w+",
|
| 375 |
]
|
| 376 |
+
|
| 377 |
lowered = normalize_arabic(text)
|
| 378 |
for pattern in blocked_patterns:
|
| 379 |
if re.search(pattern, lowered, flags=re.IGNORECASE):
|
|
|
|
| 497 |
|
| 498 |
intent = detect_intent(user_text)
|
| 499 |
|
|
|
|
| 500 |
answer = generate_rule_based_reply(intent, user_text)
|
| 501 |
if answer:
|
| 502 |
answer = sanitize_reply(answer)
|
| 503 |
return {"ok": True, "intent": intent, "reply": answer}
|
| 504 |
|
|
|
|
| 505 |
llm_answer = process_with_llm(user_text)
|
| 506 |
return {"ok": True, "intent": intent, "reply": llm_answer}
|
| 507 |
|
|
|
|
| 512 |
|
| 513 |
@app.post("/processtext")
|
| 514 |
async def receive_updates(request: Request):
|
| 515 |
+
try:
|
| 516 |
+
data = await request.json()
|
| 517 |
+
except Exception:
|
| 518 |
+
return JSONResponse(
|
| 519 |
+
content={"ok": False, "error": "Invalid JSON payload"},
|
| 520 |
+
status_code=400
|
| 521 |
+
)
|
| 522 |
+
|
| 523 |
print("Received Update:", data)
|
| 524 |
|
| 525 |
result = process_text(data.get("message", ""))
|