Spaces:

psugam
/

sanskrit-parser-api

Sleeping

File size: 3,027 Bytes

from flask import Flask, request, jsonify
from flask_cors import CORS
import process_sanskrit as ps
import re
import json

app = Flask(__name__)
CORS(app)

def clean_definitions(content):
    """
    Recursively flattens the dictionary data.
    Normalizes whitespace but PRESERVES HTML tags.
    """
    results = []
    if isinstance(content, str):
        # Normalize whitespace but do NOT strip <tags>
        clean = re.sub(r'\s+', ' ', content).strip()
        if clean:
            results.append(clean)
    elif isinstance(content, list):
        for item in content:
            results.extend(clean_definitions(item))
    elif isinstance(content, dict):
        for value in content.values():
            results.extend(clean_definitions(value))
    return results

@app.route('/split')
def split_word():
    word = request.args.get('word')
    if not word: return jsonify({"error": "No word"}), 400
    try:
        split_result = ps.split(word)
        if split_result and isinstance(split_result[0], list):
            components = split_result[0]
        else:
            components = split_result if split_result else [word]
        components = [c for c in components if c]
        is_compound = len(components) > 1
        return jsonify({"is_compound": is_compound, "components": components})
    except Exception as e:
        return jsonify({"is_compound": False, "components": [word]})

@app.route('/meaning')
def get_meaning():
    word = request.args.get('word')
    try:
        raw_results = ps.process(word, 'mw', 'ap90', 'cae', 'bhs')
        if not raw_results:
            return jsonify([])

        grouped_results = {}

        for entry in raw_results:
            stem = entry[0]
            word_type = entry[1] if entry[1] else "morphology"
            grammar = entry[2] if entry[2] else [["form recognized"]]
            dict_data = entry[6]
            
            formatted_defs = {}
            for source, content in dict_data.items():
                cleaned = clean_definitions(content)
                if cleaned:
                    formatted_defs[source] = cleaned

            def_key = json.dumps(formatted_defs, sort_keys=True)

            if def_key in grouped_results:
                existing = grouped_results[def_key]
                for tag_set in grammar:
                    if tag_set not in existing["detected_tags"]:
                        existing["detected_tags"].append(tag_set)
                if word_type not in existing["type"]:
                    existing["type"] += f" / {word_type}"
            else:
                grouped_results[def_key] = {
                    "stem": stem,
                    "type": word_type,
                    "detected_tags": grammar,
                    "definitions": formatted_defs
                }

        return jsonify(list(grouped_results.values()))
    except Exception as e:
        print(f"Meaning error: {e}")
        return jsonify({"error": str(e)}), 500

if __name__ == '__main__':
    app.run(debug=True)