Upload 23 files
Browse files- .gitignore +3 -0
- LICENSE +3 -0
- README.md +103 -3
- config/graph_schema.json +10 -0
- config/model_config.yaml +3 -0
- data/examples.json +9 -0
- data/synthetic_intents.json +6 -0
- model_card.md +7 -0
- notebooks/intentgraph_demo.ipynb +1 -0
- requirements.txt +1 -0
- scripts/export_model.py +5 -0
- scripts/preprocess_data.py +5 -0
- src/__init__.py +1 -0
- src/graph_builder.py +10 -0
- src/graph_updater.py +5 -0
- src/inference.py +15 -0
- src/intent_extractor.py +7 -0
- src/tokenizer.py +3 -0
- tests/test_graph_building.py +6 -0
- tests/test_intent_extraction.py +6 -0
- training/evaluate.py +5 -0
- training/loss.py +2 -0
- training/train.py +5 -0
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
.env
|
| 3 |
+
*.pyc
|
LICENSE
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
README.md
CHANGED
|
@@ -1,3 +1,103 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# IntentGraphLM 🚀
|
| 2 |
+
|
| 3 |
+
**IntentGraphLM** is an open-source language model pipeline that converts raw natural language into **dynamic intent graphs**, enabling smarter agents, workflow orchestration, and multi-step task planning.
|
| 4 |
+
|
| 5 |
+
This project is designed to be **lightweight, extensible, and Hugging Face–ready**, with strong future scope in agentic AI systems.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## 🔍 What Problem Does It Solve?
|
| 10 |
+
|
| 11 |
+
Traditional intent detection returns flat labels.
|
| 12 |
+
IntentGraphLM goes further by:
|
| 13 |
+
|
| 14 |
+
- Extracting **multiple intents**
|
| 15 |
+
- Modeling **relationships between intents**
|
| 16 |
+
- Representing them as a **directed graph**
|
| 17 |
+
- Enabling **dynamic updates** during conversations
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## ✨ Key Features
|
| 22 |
+
|
| 23 |
+
- 🧠 Intent extraction from raw text
|
| 24 |
+
- 🕸️ Graph-based intent representation
|
| 25 |
+
- 🔁 Dynamic intent graph updates
|
| 26 |
+
- ⚡ Lightweight, dependency-minimal design
|
| 27 |
+
- 🤗 Hugging Face–compatible inference pipeline
|
| 28 |
+
- 🧩 Modular and extensible architecture
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
## 📂 Project Structure
|
| 33 |
+
|
| 34 |
+
```
|
| 35 |
+
intentgraph-lm/
|
| 36 |
+
├── config/
|
| 37 |
+
├── data/
|
| 38 |
+
├── src/
|
| 39 |
+
├── training/
|
| 40 |
+
├── scripts/
|
| 41 |
+
├── tests/
|
| 42 |
+
├── notebooks/
|
| 43 |
+
├── README.md
|
| 44 |
+
├── model_card.md
|
| 45 |
+
├── requirements.txt
|
| 46 |
+
└── LICENSE
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
## ⚙️ Installation
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
pip install -r requirements.txt
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
---
|
| 58 |
+
|
| 59 |
+
## 🚀 Quick Usage
|
| 60 |
+
|
| 61 |
+
```python
|
| 62 |
+
from src import IntentGraphPipeline
|
| 63 |
+
|
| 64 |
+
pipeline = IntentGraphPipeline()
|
| 65 |
+
|
| 66 |
+
output = pipeline("Book a flight and then reserve a hotel")
|
| 67 |
+
|
| 68 |
+
print(output)
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
---
|
| 72 |
+
|
| 73 |
+
## 🧠 How It Works
|
| 74 |
+
|
| 75 |
+
1. Tokenization
|
| 76 |
+
2. Intent Extraction
|
| 77 |
+
3. Graph Building
|
| 78 |
+
4. Dynamic Updates
|
| 79 |
+
|
| 80 |
+
---
|
| 81 |
+
|
| 82 |
+
## 🧪 Testing
|
| 83 |
+
|
| 84 |
+
```bash
|
| 85 |
+
pytest tests/
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
---
|
| 89 |
+
|
| 90 |
+
## 🔮 Future Scope
|
| 91 |
+
|
| 92 |
+
- Transformer-based intent extraction
|
| 93 |
+
- Hierarchical intent graphs
|
| 94 |
+
- Agent tool planning
|
| 95 |
+
- Multilingual support
|
| 96 |
+
- Hugging Face pipeline release
|
| 97 |
+
- Gradio demo space
|
| 98 |
+
|
| 99 |
+
---
|
| 100 |
+
|
| 101 |
+
## 📜 License
|
| 102 |
+
|
| 103 |
+
Apache License 2.0
|
config/graph_schema.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"node_types": [
|
| 3 |
+
"intent"
|
| 4 |
+
],
|
| 5 |
+
"edge_types": [
|
| 6 |
+
"depends_on",
|
| 7 |
+
"follows"
|
| 8 |
+
],
|
| 9 |
+
"weighted": true
|
| 10 |
+
}
|
config/model_config.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name: intentgraph-lm
|
| 2 |
+
max_intents: 5
|
| 3 |
+
confidence_threshold: 0.5
|
data/examples.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"text": "Book a flight and then reserve a hotel",
|
| 4 |
+
"intents": [
|
| 5 |
+
"book_flight",
|
| 6 |
+
"reserve_hotel"
|
| 7 |
+
]
|
| 8 |
+
}
|
| 9 |
+
]
|
data/synthetic_intents.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"book_flight",
|
| 3 |
+
"reserve_hotel",
|
| 4 |
+
"order_food",
|
| 5 |
+
"check_weather"
|
| 6 |
+
]
|
model_card.md
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: en
|
| 3 |
+
license: apache-2.0
|
| 4 |
+
pipeline_tag: text-to-graph
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
# IntentGraphLM
|
notebooks/intentgraph_demo.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"cells": [], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
networkx
|
scripts/export_model.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def export():
|
| 2 |
+
print('Exporting model')
|
| 3 |
+
|
| 4 |
+
if __name__ == '__main__':
|
| 5 |
+
export()
|
scripts/preprocess_data.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def preprocess():
|
| 2 |
+
print('Preprocessing data')
|
| 3 |
+
|
| 4 |
+
if __name__ == '__main__':
|
| 5 |
+
preprocess()
|
src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .inference import IntentGraphPipeline
|
src/graph_builder.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import networkx as nx
|
| 2 |
+
|
| 3 |
+
class GraphBuilder:
|
| 4 |
+
def build(self, intents):
|
| 5 |
+
graph = nx.DiGraph()
|
| 6 |
+
for i, intent in enumerate(intents):
|
| 7 |
+
graph.add_node(intent, type='intent')
|
| 8 |
+
if i > 0:
|
| 9 |
+
graph.add_edge(intents[i-1], intent, weight=1.0)
|
| 10 |
+
return graph
|
src/graph_updater.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class GraphUpdater:
|
| 2 |
+
def update(self, graph, new_intent):
|
| 3 |
+
if new_intent not in graph:
|
| 4 |
+
graph.add_node(new_intent, type='intent')
|
| 5 |
+
return graph
|
src/inference.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .tokenizer import SimpleTokenizer
|
| 2 |
+
from .intent_extractor import IntentExtractor
|
| 3 |
+
from .graph_builder import GraphBuilder
|
| 4 |
+
|
| 5 |
+
class IntentGraphPipeline:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.tokenizer = SimpleTokenizer()
|
| 8 |
+
self.extractor = IntentExtractor()
|
| 9 |
+
self.builder = GraphBuilder()
|
| 10 |
+
|
| 11 |
+
def __call__(self, text: str):
|
| 12 |
+
tokens = self.tokenizer.tokenize(text)
|
| 13 |
+
intents = self.extractor.extract(tokens)
|
| 14 |
+
graph = self.builder.build(intents)
|
| 15 |
+
return {'intents': intents, 'nodes': list(graph.nodes), 'edges': list(graph.edges)}
|
src/intent_extractor.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class IntentExtractor:
|
| 2 |
+
def extract(self, tokens):
|
| 3 |
+
intents = []
|
| 4 |
+
for token in tokens:
|
| 5 |
+
if token in ['book','reserve','order','check']:
|
| 6 |
+
intents.append(token)
|
| 7 |
+
return list(set(intents))
|
src/tokenizer.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class SimpleTokenizer:
|
| 2 |
+
def tokenize(self, text: str):
|
| 3 |
+
return text.lower().split()
|
tests/test_graph_building.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.graph_builder import GraphBuilder
|
| 2 |
+
|
| 3 |
+
def test_graph():
|
| 4 |
+
builder = GraphBuilder()
|
| 5 |
+
graph = builder.build(['a','b'])
|
| 6 |
+
assert len(graph.nodes) == 2
|
tests/test_intent_extraction.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.intent_extractor import IntentExtractor
|
| 2 |
+
|
| 3 |
+
def test_extract():
|
| 4 |
+
extractor = IntentExtractor()
|
| 5 |
+
intents = extractor.extract(['book','flight'])
|
| 6 |
+
assert 'book' in intents
|
training/evaluate.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def evaluate():
|
| 2 |
+
print('Evaluation placeholder')
|
| 3 |
+
|
| 4 |
+
if __name__ == '__main__':
|
| 5 |
+
evaluate()
|
training/loss.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def loss_fn(pred, target):
|
| 2 |
+
return abs(len(pred) - len(target))
|
training/train.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def train():
|
| 2 |
+
print('Training placeholder')
|
| 3 |
+
|
| 4 |
+
if __name__ == '__main__':
|
| 5 |
+
train()
|