psugam commited on
Commit
6a1a740
·
verified ·
1 Parent(s): 61a4cfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -73
app.py CHANGED
@@ -1,74 +1,90 @@
1
- from flask import Flask, request, jsonify
2
- from flask_cors import CORS
3
- import process_sanskrit as ps
4
- import re
5
-
6
- app = Flask(__name__)
7
- CORS(app)
8
-
9
- def clean_dictionary_html(content):
10
- if isinstance(content, str):
11
- clean = re.sub(r'<[^>]*>', '', content)
12
- return re.sub(r'\s+', ' ', clean).strip()
13
- if isinstance(content, list):
14
- return [clean_dictionary_html(item) for item in content]
15
- if isinstance(content, dict):
16
- return {k: clean_dictionary_html(v) for k, v in content.items()}
17
- return str(content)
18
-
19
- @app.route('/split')
20
- def split_word():
21
- word = request.args.get('word')
22
- if not word: return jsonify({"error": "No word"}), 400
23
- try:
24
- # ps.split can return a list of lists: [['p1', 'p2', 'p3']]
25
- split_result = ps.split(word)
26
-
27
- # We want the most complete split (usually the first item in the list)
28
- if split_result and isinstance(split_result[0], list):
29
- components = split_result[0]
30
- else:
31
- components = split_result if split_result else [word]
32
-
33
- # Filter out empty strings and check if it actually split
34
- components = [c for c in components if c]
35
- is_compound = len(components) > 1
36
-
37
- return jsonify({"is_compound": is_compound, "components": components})
38
- except Exception as e:
39
- print(f"Split error: {e}")
40
- return jsonify({"is_compound": False, "components": [word]})
41
-
42
- @app.route('/meaning')
43
- def get_meaning():
44
- word = request.args.get('word')
45
- try:
46
- # If the word is an infinitive (like 'gantum'), ps.process
47
- # should find the root 'gam'.
48
- raw_results = ps.process(word, 'mw', 'ap90', 'cae', 'bhs')
49
-
50
- if not raw_results:
51
- return jsonify([])
52
-
53
- final_output = []
54
- for entry in raw_results:
55
- # Check if entry has enough data
56
- stem = entry[0]
57
- word_type = entry[1]
58
- grammar = entry[2]
59
-
60
- # If the library returns 'indeclinable' for an infinitive,
61
- # we make sure to pass that through.
62
- final_output.append({
63
- "stem": stem,
64
- "type": word_type if word_type else "morphology",
65
- "detected_tags": grammar if grammar else [["form recognized"]],
66
- "definitions": clean_dictionary_html(entry[6])
67
- })
68
- return jsonify(final_output)
69
- except Exception as e:
70
- print(f"Meaning error: {e}")
71
- return jsonify({"error": str(e)}), 500
72
-
73
- if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  app.run(debug=True)
 
1
+ from flask import Flask, request, jsonify
2
+ from flask_cors import CORS
3
+ import process_sanskrit as ps
4
+ import re
5
+ import json
6
+
7
+ app = Flask(__name__)
8
+ CORS(app)
9
+
10
+ def clean_definitions(content):
11
+ """
12
+ Recursively flattens the dictionary data.
13
+ Normalizes whitespace but PRESERVES HTML tags.
14
+ """
15
+ results = []
16
+ if isinstance(content, str):
17
+ # Normalize whitespace but do NOT strip <tags>
18
+ clean = re.sub(r'\s+', ' ', content).strip()
19
+ if clean:
20
+ results.append(clean)
21
+ elif isinstance(content, list):
22
+ for item in content:
23
+ results.extend(clean_definitions(item))
24
+ elif isinstance(content, dict):
25
+ for value in content.values():
26
+ results.extend(clean_definitions(value))
27
+ return results
28
+
29
+ @app.route('/split')
30
+ def split_word():
31
+ word = request.args.get('word')
32
+ if not word: return jsonify({"error": "No word"}), 400
33
+ try:
34
+ split_result = ps.split(word)
35
+ if split_result and isinstance(split_result[0], list):
36
+ components = split_result[0]
37
+ else:
38
+ components = split_result if split_result else [word]
39
+ components = [c for c in components if c]
40
+ is_compound = len(components) > 1
41
+ return jsonify({"is_compound": is_compound, "components": components})
42
+ except Exception as e:
43
+ return jsonify({"is_compound": False, "components": [word]})
44
+
45
+ @app.route('/meaning')
46
+ def get_meaning():
47
+ word = request.args.get('word')
48
+ try:
49
+ raw_results = ps.process(word, 'mw', 'ap90', 'cae', 'bhs')
50
+ if not raw_results:
51
+ return jsonify([])
52
+
53
+ grouped_results = {}
54
+
55
+ for entry in raw_results:
56
+ stem = entry[0]
57
+ word_type = entry[1] if entry[1] else "morphology"
58
+ grammar = entry[2] if entry[2] else [["form recognized"]]
59
+ dict_data = entry[6]
60
+
61
+ formatted_defs = {}
62
+ for source, content in dict_data.items():
63
+ cleaned = clean_definitions(content)
64
+ if cleaned:
65
+ formatted_defs[source] = cleaned
66
+
67
+ def_key = json.dumps(formatted_defs, sort_keys=True)
68
+
69
+ if def_key in grouped_results:
70
+ existing = grouped_results[def_key]
71
+ for tag_set in grammar:
72
+ if tag_set not in existing["detected_tags"]:
73
+ existing["detected_tags"].append(tag_set)
74
+ if word_type not in existing["type"]:
75
+ existing["type"] += f" / {word_type}"
76
+ else:
77
+ grouped_results[def_key] = {
78
+ "stem": stem,
79
+ "type": word_type,
80
+ "detected_tags": grammar,
81
+ "definitions": formatted_defs
82
+ }
83
+
84
+ return jsonify(list(grouped_results.values()))
85
+ except Exception as e:
86
+ print(f"Meaning error: {e}")
87
+ return jsonify({"error": str(e)}), 500
88
+
89
+ if __name__ == '__main__':
90
  app.run(debug=True)