sarf / app.py
Boulbaba's picture
Upload app.py
b193db3 verified
from flask import Flask, request, Response
from pyaramorph import Analyzer
import json
import os
app = Flask(__name__)
analyzer = Analyzer()
# Arabic diacritics / characters used for arudi normalization
SHORT_VOWELS = "\u064e\u064f\u0650" # ูŽ ู ู
TANWEEN = "\u064b\u064c\u064d" # ู‹ ูŒ ู
SUKUN = "\u0652" # ู’
SHADDA = "\u0651" # ู‘
LONG_VOWELS = {
"ุง": "M",
"ู‰": "M",
"ุข": "M",
"ูˆ": "M",
"ูŠ": "M",
}
VALID_NEEDS = {"sarf", "aroudh"}
def json_response(payload, status=200):
return Response(
json.dumps(payload, ensure_ascii=False),
status=status,
content_type="application/json; charset=utf-8",
)
def extract_solution_only(solution_text: str) -> str:
if not solution_text:
return ""
first_line = solution_text.strip().splitlines()[0].strip()
if first_line.startswith("solution:"):
return first_line.replace("solution:", "", 1).strip()
return first_line
def build_arudi_key(word: str) -> str:
"""
Build a metrical/prosodic key for deduplication.
Rules:
- fatha/damma/kasra are considered equivalent -> V
- tanween has a special status and is preserved distinctly -> VN
- long vowels (ุงุŒ ูˆุŒ ูŠุŒ ู‰ุŒ ุข) are considered equivalent -> M
- sukun is preserved -> S
- shadda is preserved -> D
- consonants are preserved as-is
"""
normalized = []
for ch in word:
if ch in SHORT_VOWELS:
normalized.append("V")
elif ch in TANWEEN:
normalized.append("VN")
elif ch == SUKUN:
normalized.append("S")
elif ch == SHADDA:
normalized.append("D")
elif ch in LONG_VOWELS:
normalized.append(LONG_VOWELS[ch])
else:
normalized.append(ch)
return "".join(normalized)
def format_sarf_results(results):
formatted_results = []
for word_block in results:
if not word_block:
continue
word_header = word_block[0].replace("analysis for: ", "").strip()
solutions = [solution_text.strip() for solution_text in word_block[1:] if solution_text.strip()]
formatted_results.append({
"word": word_header,
"solutions": solutions,
})
return formatted_results
def format_aroudh_results(results):
formatted_results = []
for word_block in results:
if not word_block:
continue
word_header = word_block[0].replace("analysis for: ", "").strip()
unique_solutions = []
seen_arudi_keys = set()
for solution_text in word_block[1:]:
clean_solution = extract_solution_only(solution_text)
if not clean_solution:
continue
arudi_key = build_arudi_key(clean_solution)
if arudi_key in seen_arudi_keys:
continue
seen_arudi_keys.add(arudi_key)
unique_solutions.append({"solution": clean_solution})
formatted_results.append({
"word": word_header,
"solutions": unique_solutions,
})
return formatted_results
@app.route("/", methods=["GET"])
def home():
return json_response({
"success": True,
"message": "PyAraMorph API is running",
"supported_needs": ["sarf", "aroudh"],
"default_need": "sarf",
})
@app.route("/health", methods=["GET"])
def health():
return json_response({"success": True, "status": "ok"})
@app.route("/analyze", methods=["POST"])
def analyze():
data = request.get_json(silent=True) or {}
text = (data.get("text") or "").strip()
need = (data.get("need") or "sarf").strip().lower()
if not text:
return json_response({
"success": False,
"message": "text is required",
}, 400)
if need not in VALID_NEEDS:
return json_response({
"success": False,
"message": 'need must be either "sarf" or "aroudh"',
}, 400)
try:
results = analyzer.analyze_text(text)
if need == "sarf":
formatted_results = format_sarf_results(results)
else:
formatted_results = format_aroudh_results(results)
return json_response({
"success": True,
"need": need,
"text": text,
"results": formatted_results,
})
except Exception as e:
return json_response({
"success": False,
"message": str(e),
}, 500)
if __name__ == "__main__":
port = int(os.environ.get("PORT", "7860"))
app.run(host="0.0.0.0", port=port, debug=False)