Spaces:

Juna190825
/

karuleste

Sleeping

App Files Files Community

Juna190825 commited on Sep 11, 2025

Commit

f9984cf

verified ·

1 Parent(s): 3693a28

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -0

app.py CHANGED Viewed

@@ -4,6 +4,130 @@ import stanza
 # nlp = stanza.Pipeline('en', processors='tokenize,mwt,pos,lemma,depparse,ner,constituency,coref,sentiment', force=True, verbose=True)
 nlp = stanza.Pipeline('en', processors='tokenize,pos,lemma,constituency', force=True, verbose=True)
 from utils import *
 from datasets import load_dataset

 # nlp = stanza.Pipeline('en', processors='tokenize,mwt,pos,lemma,depparse,ner,constituency,coref,sentiment', force=True, verbose=True)
 nlp = stanza.Pipeline('en', processors='tokenize,pos,lemma,constituency', force=True, verbose=True)
+##################################################################
+# @title def analyze_sentence3
+def analyze_sentence3(sentence, existing_rules, debug=False):
+    """Analyzes a sentence and suggests missing rules, being punctuation-inclusive for 'S' nodes."""
+    print(f"\n🔍 Analyzing: '{sentence}'")
+    print("=" * 50)
+    doc = nlp(sentence)
+    tree = doc.sentences[0].constituency
+    suggestions = {}
+    def is_punctuation_label(label):
+        """Helper function to identify punctuation POS tags."""
+        return label in ['.', ',', ':', ';', '!', '?', '\'', '"', '`', '-', '--', '#', '$', '(', ')', '[', ']', '{', '}']
+    def has_at_least_two_children(child_labels):
+        """Check if there are at least 2 non-None children."""
+        if child_labels is None:
+            return False
+        # Count actual children (excluding None values if any)
+        count = len([label for label in child_labels if label is not None])
+        return count >= 2
+    def _traverse(node):
+        if not node.is_leaf():
+            if node.label not in EXCLUDE_NODES:
+                child_labels = tuple(child.label for child in node.children)
+                # Only process if node has 2+ real children
+                if has_at_least_two_children(child_labels):
+                    # SPECIAL HANDLING FOR 'S' NODES (include punctuation)
+                    if node.label == 'S':
+                        pattern = (node.label, child_labels)
+                        if pattern not in existing_rules and pattern not in suggestions:
+                            example_text = ' '.join(node.leaf_labels())
+                            suggestions[pattern] = example_text
+                    # STANDARD HANDLING FOR OTHER NODES
+                    else:
+                        # Suggest rule with all children (including punctuation)
+                        pattern_all = (node.label, child_labels)
+                        if pattern_all not in existing_rules and pattern_all not in suggestions:
+                            example_text = ' '.join(node.leaf_labels())
+                            suggestions[pattern_all] = example_text
+                        # If last child is punctuation, suggest rule without it
+                        if (is_punctuation_label(child_labels[-1]) and
+                            not is_punctuation_label(node.label)):
+                            non_punct_child_labels = child_labels[:-1]
+                            # Only suggest if we still have 2+ real children after removing punctuation
+                            if has_at_least_two_children(non_punct_child_labels):
+                                pattern_without_punct = (node.label, non_punct_child_labels)
+                                if (pattern_without_punct not in existing_rules and
+                                    pattern_without_punct not in suggestions):
+                                    example_text = ' '.join(node.leaf_labels())
+                                    suggestions[pattern_without_punct] = example_text
+            for child in node.children:
+                _traverse(child)
+    _traverse(tree)
+    # Only suggest wildcard rules for labels that don't have any specific patterns suggested
+    # and appear multiple times in the tree (to avoid suggesting wildcards for rare patterns)
+    label_counts = {}
+    def _count_labels(node):
+        if not node.is_leaf() and node.label not in EXCLUDE_NODES:
+            label_counts[node.label] = label_counts.get(node.label, 0) + 1
+            for child in node.children:
+                _count_labels(child)
+    _count_labels(tree)
+    # Suggest wildcard rules only for labels that appear multiple times and don't have specific rules
+    for label, count in label_counts.items():
+        if count >= 2:  # Only suggest wildcard for labels that appear multiple times
+            wildcard_pattern = (label, None)
+            has_specific_pattern = any(pattern[0] == label for pattern in suggestions.keys())
+            if (not has_specific_pattern and
+                wildcard_pattern not in existing_rules and
+                wildcard_pattern not in suggestions):
+                # Find an example for this label
+                def _find_example(node, target_label):
+                    if not node.is_leaf() and node.label == target_label:
+                        return ' '.join(node.leaf_labels())
+                    for child in node.children:
+                        result = _find_example(child, target_label)
+                        if result:
+                            return result
+                    return None
+                example_text = _find_example(tree, label)
+                if example_text:
+                    suggestions[wildcard_pattern] = example_text
+    # To exclude nodes that have '.' at the end of the list of child nodes
+    suggestions = {k: v for k, v in suggestions.items() if k[1][-1] != "."}
+    suggestions_dict = {}
+    if suggestions:
+        print("💡 Suggested new rules:")
+        for pattern, example in suggestions.items():
+            if pattern[1] is None:  # Wildcard rule
+                value = f"lambda node, *children: ...   # e.g., '{example}'"
+                print(f"  {pattern}: lambda node, *children: ...   # e.g., '{example}'")
+            else:
+                child_vars = ', '.join([f'c{i}' for i in range(len(pattern[1]))])
+                value = f"lambda node, {child_vars}: ...   # e.g., '{example}'"
+                print(f"  {pattern}: lambda node, {child_vars}: ...   # e.g., '{example}'")
+            suggestions_dict[pattern] = value
+    else:
+        print("✅ All patterns already handled!")
+    return suggestions, suggestions_dict
 from utils import *
 from datasets import load_dataset