Spaces:

GenAIDevTOProd
/

slotmatch

Running

App Files Files Community

GenAIDevTOProd commited on Aug 14, 2025

Commit

8983b2d

verified ·

1 Parent(s): f8c143d

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

README.md +66 -8
__init__.py +1 -0
examples/test_slotmatch.py +21 -0
extractor.py +53 -0
pyproject.toml +3 -0
schema.py +28 -0
setup.py +21 -0
utils.py +22 -0

README.md CHANGED Viewed

@@ -1,10 +1,68 @@
 ---
-title: Slotmatch
-emoji: 🏃
-colorFrom: blue
-colorTo: yellow
-sdk: static
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# SlotMatch
+**SlotMatch** is a lightweight Python package for extracting structured key-value pairs from unstructured or noisy LLM outputs. It supports regex-based parsing, fuzzy key recovery, schema validation, and confidence scoring. Perfect for production RAG, chatbot, and NLU pipelines.
 ---
+## Installation
+```bash
+pip install slotmatch
+## Features
+- Regex-based value extraction
+- Fuzzy key mapping (e.g., intnt → intent)
+- Schema validation for expected keys and types
+- Type coercion (str, int, float, bool)
+- Confidence scoring (regex = high, fuzzy = partial, fallback = 0)
+- Lightweight, no external dependencies
+## Usage
+from slotmatch import SlotExtractor
+schema = {
+    "name": str,
+    "intent": str,
+    "destination": str
+}
+llm_output = '''
+Hi, I'm Alice.
+{
+  "intnt": "book_flight",
+  "dest": "NYC",
+  "name": "Alice"
+}
+'''
+extractor = SlotExtractor(schema)
+print(extractor.extract(llm_output))
+## Output
+{
+  'name': {'value': 'Alice', 'confidence': 1.0},
+  'intent': {'value': 'book_flight', 'confidence': ~0.64},
+  'destination': {'value': None, 'confidence': 0.0}
+}
+## Example Use Cases
+- Post-processing LLM outputs (chatbots, assistants, tools)
+- Extracting form fields or user intents
+- Structuring data for downstream APIs or storage
+- Integrating LLMs with business logic (field validation, routing)
+## License
+This project is licensed under the MIT License.

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .extractor import SlotExtractor

examples/test_slotmatch.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from slotmatch import SlotExtractor
+from pprint import pprint
+schema = {
+    "name": str,
+    "intent": str,
+    "destination": str
+}
+llm_output = '''
+Hi, I’m Alice. I’d like to book a flight to New York.
+{
+  "intnt": "book_flight",
+  "dest": "New York",
+  "name": "Alice"
+}
+'''
+extractor = SlotExtractor(schema)
+result = extractor.extract(llm_output)
+pprint(result)

extractor.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from slotmatch.schema import SchemaValidator
+from slotmatch.utils import extract_value_by_regex, fuzzy_match_key, compute_confidence
+class SlotExtractor:
+    def __init__(self, schema: dict):
+        self.validator = SchemaValidator(schema)
+        self.schema = self.validator.get_schema()
+        self.schema_keys = list(self.schema.keys())
+    def extract(self, text: str) -> dict:
+        result = {}
+        for expected_key in self.schema_keys:
+            # 1. Try regex directly
+            raw_value = extract_value_by_regex(text, expected_key)
+            if raw_value is not None:
+                result[expected_key] = {
+                    "value": self._coerce_type(raw_value, self.schema[expected_key]),
+                    "confidence": compute_confidence("regex")
+                }
+                continue
+            # 2. Try fuzzy match
+            fuzzy_key, score = fuzzy_match_key(expected_key, self._get_all_keys_from_text(text))
+            if fuzzy_key:
+                raw_value = extract_value_by_regex(text, fuzzy_key)
+                if raw_value is not None:
+                    result[expected_key] = {
+                        "value": self._coerce_type(raw_value, self.schema[expected_key]),
+                        "confidence": compute_confidence("fuzzy") * score
+                    }
+                    continue
+            # 3. Fallback
+            result[expected_key] = {
+                "value": None,
+                "confidence": 0.0
+            }
+        return result
+    def _get_all_keys_from_text(self, text: str) -> list:
+        import re
+        pattern = r'["\']?([\w-]+)["\']?\s*[:=]'
+        return list(set(re.findall(pattern, text)))
+    def _coerce_type(self, value, expected_type):
+        try:
+            if expected_type == bool:
+                return value.lower() in ['true', 'yes', '1']
+            return expected_type(value)
+        except:
+            return value  # fallback to original

pyproject.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"

schema.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# slotmatch/schema.py
+ALLOWED_TYPES = {str, int, float, bool}
+class SchemaValidationError(Exception):
+    """Custom error when schema is invalid."""
+    pass
+class SchemaValidator:
+    def __init__(self, schema: dict):
+        self.schema = schema
+        self._validate_schema()
+    def _validate_schema(self):
+        if not isinstance(self.schema, dict):
+            raise SchemaValidationError("Schema must be a dictionary.")
+        for key, expected_type in self.schema.items():
+            if not isinstance(key, str):
+                raise SchemaValidationError(f"Key '{key}' must be a string.")
+            if expected_type not in ALLOWED_TYPES:
+                raise SchemaValidationError(
+                    f"Unsupported type '{expected_type}' for key '{key}'. "
+                    f"Supported types: {ALLOWED_TYPES}"
+                )
+    def get_schema(self):
+        return self.schema

setup.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from setuptools import setup, find_packages
+setup(
+    name='slotmatch',
+    version='0.1.0',
+    author='Your Name',
+    author_email='your.email@example.com',
+    description='Extract structured key-value pairs from unstructured LLM output using regex, fuzzy matching, and schema validation.',
+    long_description=open("README.md").read(),
+    long_description_content_type='text/markdown',
+    url='https://huggingface.co/GenAIDevTOProd',
+    packages=find_packages(),
+    include_package_data=True,
+    python_requires='>=3.7',
+    classifiers=[
+        'Programming Language :: Python :: 3',
+        'License :: OSI Approved :: MIT License',
+        'Operating System :: OS Independent',
+    ],
+    install_requires=[],
+)

utils.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import re
+import difflib
+def extract_value_by_regex(text: str, key: str):
+    pattern = rf'["\']?{key}["\']?\s*[:=]\s*["\']([^"\']+)["\']'
+    match = re.search(pattern, text)
+    if match:
+        return match.group(1)
+    return None
+def fuzzy_match_key(input_key: str, schema_keys: list, cutoff: float = 0.75):
+    best_match = difflib.get_close_matches(input_key, schema_keys, n=1, cutoff=cutoff)
+    if best_match:
+        return best_match[0], difflib.SequenceMatcher(None, input_key, best_match[0]).ratio()
+    return None, 0.0
+def compute_confidence(method: str):
+    return {
+        "regex": 1.0,
+        "fuzzy": 0.7,
+        "semantic": 0.5
+    }.get(method, 0.0)