from flask import Flask, request, Response from pyaramorph import Analyzer import json import os app = Flask(__name__) analyzer = Analyzer() # Arabic diacritics / characters used for arudi normalization SHORT_VOWELS = "\u064e\u064f\u0650" # َ ُ ِ TANWEEN = "\u064b\u064c\u064d" # ً ٌ ٍ SUKUN = "\u0652" # ْ SHADDA = "\u0651" # ّ LONG_VOWELS = { "ا": "M", "ى": "M", "آ": "M", "و": "M", "ي": "M", } VALID_NEEDS = {"sarf", "aroudh"} def json_response(payload, status=200): return Response( json.dumps(payload, ensure_ascii=False), status=status, content_type="application/json; charset=utf-8", ) def extract_solution_only(solution_text: str) -> str: if not solution_text: return "" first_line = solution_text.strip().splitlines()[0].strip() if first_line.startswith("solution:"): return first_line.replace("solution:", "", 1).strip() return first_line def build_arudi_key(word: str) -> str: """ Build a metrical/prosodic key for deduplication. Rules: - fatha/damma/kasra are considered equivalent -> V - tanween has a special status and is preserved distinctly -> VN - long vowels (ا، و، ي، ى، آ) are considered equivalent -> M - sukun is preserved -> S - shadda is preserved -> D - consonants are preserved as-is """ normalized = [] for ch in word: if ch in SHORT_VOWELS: normalized.append("V") elif ch in TANWEEN: normalized.append("VN") elif ch == SUKUN: normalized.append("S") elif ch == SHADDA: normalized.append("D") elif ch in LONG_VOWELS: normalized.append(LONG_VOWELS[ch]) else: normalized.append(ch) return "".join(normalized) def format_sarf_results(results): formatted_results = [] for word_block in results: if not word_block: continue word_header = word_block[0].replace("analysis for: ", "").strip() solutions = [solution_text.strip() for solution_text in word_block[1:] if solution_text.strip()] formatted_results.append({ "word": word_header, "solutions": solutions, }) return formatted_results def format_aroudh_results(results): formatted_results = [] for word_block in results: if not word_block: continue word_header = word_block[0].replace("analysis for: ", "").strip() unique_solutions = [] seen_arudi_keys = set() for solution_text in word_block[1:]: clean_solution = extract_solution_only(solution_text) if not clean_solution: continue arudi_key = build_arudi_key(clean_solution) if arudi_key in seen_arudi_keys: continue seen_arudi_keys.add(arudi_key) unique_solutions.append({"solution": clean_solution}) formatted_results.append({ "word": word_header, "solutions": unique_solutions, }) return formatted_results @app.route("/", methods=["GET"]) def home(): return json_response({ "success": True, "message": "PyAraMorph API is running", "supported_needs": ["sarf", "aroudh"], "default_need": "sarf", }) @app.route("/health", methods=["GET"]) def health(): return json_response({"success": True, "status": "ok"}) @app.route("/analyze", methods=["POST"]) def analyze(): data = request.get_json(silent=True) or {} text = (data.get("text") or "").strip() need = (data.get("need") or "sarf").strip().lower() if not text: return json_response({ "success": False, "message": "text is required", }, 400) if need not in VALID_NEEDS: return json_response({ "success": False, "message": 'need must be either "sarf" or "aroudh"', }, 400) try: results = analyzer.analyze_text(text) if need == "sarf": formatted_results = format_sarf_results(results) else: formatted_results = format_aroudh_results(results) return json_response({ "success": True, "need": need, "text": text, "results": formatted_results, }) except Exception as e: return json_response({ "success": False, "message": str(e), }, 500) if __name__ == "__main__": port = int(os.environ.get("PORT", "7860")) app.run(host="0.0.0.0", port=port, debug=False)