Spaces:

psugam
/

sanskrit-parser-api

Sleeping

App Files Files Community

sanskrit-parser-api / app.py

psugam

Update app.py

6a1a740 verified about 1 month ago

raw

history blame contribute delete

3.03 kB

	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import process_sanskrit as ps
	import re
	import json

	app = Flask(__name__)
	CORS(app)

	def clean_definitions(content):
	"""
	Recursively flattens the dictionary data.
	Normalizes whitespace but PRESERVES HTML tags.
	"""
	results = []
	if isinstance(content, str):
	# Normalize whitespace but do NOT strip <tags>
	clean = re.sub(r'\s+', ' ', content).strip()
	if clean:
	results.append(clean)
	elif isinstance(content, list):
	for item in content:
	results.extend(clean_definitions(item))
	elif isinstance(content, dict):
	for value in content.values():
	results.extend(clean_definitions(value))
	return results

	@app.route('/split')
	def split_word():
	word = request.args.get('word')
	if not word: return jsonify({"error": "No word"}), 400
	try:
	split_result = ps.split(word)
	if split_result and isinstance(split_result[0], list):
	components = split_result[0]
	else:
	components = split_result if split_result else [word]
	components = [c for c in components if c]
	is_compound = len(components) > 1
	return jsonify({"is_compound": is_compound, "components": components})
	except Exception as e:
	return jsonify({"is_compound": False, "components": [word]})

	@app.route('/meaning')
	def get_meaning():
	word = request.args.get('word')
	try:
	raw_results = ps.process(word, 'mw', 'ap90', 'cae', 'bhs')
	if not raw_results:
	return jsonify([])

	grouped_results = {}

	for entry in raw_results:
	stem = entry[0]
	word_type = entry[1] if entry[1] else "morphology"
	grammar = entry[2] if entry[2] else [["form recognized"]]
	dict_data = entry[6]

	formatted_defs = {}
	for source, content in dict_data.items():
	cleaned = clean_definitions(content)
	if cleaned:
	formatted_defs[source] = cleaned

	def_key = json.dumps(formatted_defs, sort_keys=True)

	if def_key in grouped_results:
	existing = grouped_results[def_key]
	for tag_set in grammar:
	if tag_set not in existing["detected_tags"]:
	existing["detected_tags"].append(tag_set)
	if word_type not in existing["type"]:
	existing["type"] += f" / {word_type}"
	else:
	grouped_results[def_key] = {
	"stem": stem,
	"type": word_type,
	"detected_tags": grammar,
	"definitions": formatted_defs
	}

	return jsonify(list(grouped_results.values()))
	except Exception as e:
	print(f"Meaning error: {e}")
	return jsonify({"error": str(e)}), 500

	if __name__ == '__main__':
	app.run(debug=True)