Spaces:
Sleeping
Sleeping
Commit
·
757decb
1
Parent(s):
cc75613
simplify agent
Browse files- agent_graph.png +0 -0
- flow.svg +0 -1
- poetry.lock +250 -3
- pyproject.toml +2 -1
- src/agent.py +46 -264
- src/nodes/__init__.py +0 -19
- src/nodes/audio_processor.py +0 -257
- src/nodes/chat.py +24 -171
- src/nodes/final_response.py +0 -299
- src/nodes/planner.py +21 -308
- src/nodes/processor.py +24 -0
- src/nodes/prompts.py +0 -440
- src/nodes/router.py +0 -167
- src/nodes/script_generator.py +0 -412
- src/nodes/validator.py +23 -265
- src/state.py +10 -45
agent_graph.png
ADDED
|
flow.svg
DELETED
poetry.lock
CHANGED
|
@@ -45,6 +45,22 @@ doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)",
|
|
| 45 |
test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""]
|
| 46 |
trio = ["trio (>=0.26.1)"]
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
[[package]]
|
| 49 |
name = "audioop-lts"
|
| 50 |
version = "0.2.1"
|
|
@@ -306,12 +322,24 @@ description = "Cross-platform colored terminal text."
|
|
| 306 |
optional = false
|
| 307 |
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
| 308 |
groups = ["main"]
|
| 309 |
-
markers = "platform_system == \"Windows\""
|
| 310 |
files = [
|
| 311 |
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
| 312 |
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
| 313 |
]
|
| 314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
[[package]]
|
| 316 |
name = "distro"
|
| 317 |
version = "1.9.0"
|
|
@@ -338,6 +366,21 @@ files = [
|
|
| 338 |
[package.dependencies]
|
| 339 |
python-dotenv = "*"
|
| 340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
[[package]]
|
| 342 |
name = "fastapi"
|
| 343 |
version = "0.115.12"
|
|
@@ -729,6 +772,73 @@ files = [
|
|
| 729 |
[package.extras]
|
| 730 |
all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
|
| 731 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 732 |
[[package]]
|
| 733 |
name = "jinja2"
|
| 734 |
version = "3.1.6"
|
|
@@ -1162,6 +1272,21 @@ files = [
|
|
| 1162 |
{file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
|
| 1163 |
]
|
| 1164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1165 |
[[package]]
|
| 1166 |
name = "mcp"
|
| 1167 |
version = "1.9.3"
|
|
@@ -1523,6 +1648,38 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d
|
|
| 1523 |
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
|
| 1524 |
xml = ["lxml (>=4.9.2)"]
|
| 1525 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1526 |
[[package]]
|
| 1527 |
name = "pillow"
|
| 1528 |
version = "11.2.1"
|
|
@@ -1623,6 +1780,49 @@ tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "ole
|
|
| 1623 |
typing = ["typing-extensions ; python_version < \"3.10\""]
|
| 1624 |
xmp = ["defusedxml"]
|
| 1625 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1626 |
[[package]]
|
| 1627 |
name = "pycparser"
|
| 1628 |
version = "2.22"
|
|
@@ -1813,7 +2013,6 @@ description = "Pygments is a syntax highlighting package written in Python."
|
|
| 1813 |
optional = false
|
| 1814 |
python-versions = ">=3.8"
|
| 1815 |
groups = ["main"]
|
| 1816 |
-
markers = "sys_platform != \"emscripten\""
|
| 1817 |
files = [
|
| 1818 |
{file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
|
| 1819 |
{file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
|
|
@@ -2317,6 +2516,26 @@ examples = ["aiosqlite (>=0.21.0)", "fastapi (>=0.115.12)", "sqlalchemy[asyncio,
|
|
| 2317 |
granian = ["granian (>=2.3.1)"]
|
| 2318 |
uvicorn = ["uvicorn (>=0.34.0)"]
|
| 2319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2320 |
[[package]]
|
| 2321 |
name = "starlette"
|
| 2322 |
version = "0.46.2"
|
|
@@ -2433,6 +2652,22 @@ notebook = ["ipywidgets (>=6)"]
|
|
| 2433 |
slack = ["slack-sdk"]
|
| 2434 |
telegram = ["requests"]
|
| 2435 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2436 |
[[package]]
|
| 2437 |
name = "typer"
|
| 2438 |
version = "0.16.0"
|
|
@@ -2529,6 +2764,18 @@ h11 = ">=0.8"
|
|
| 2529 |
[package.extras]
|
| 2530 |
standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"]
|
| 2531 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2532 |
[[package]]
|
| 2533 |
name = "websockets"
|
| 2534 |
version = "15.0.1"
|
|
@@ -2857,4 +3104,4 @@ cffi = ["cffi (>=1.11)"]
|
|
| 2857 |
[metadata]
|
| 2858 |
lock-version = "2.1"
|
| 2859 |
python-versions = ">=3.13,<4.0"
|
| 2860 |
-
content-hash = "
|
|
|
|
| 45 |
test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""]
|
| 46 |
trio = ["trio (>=0.26.1)"]
|
| 47 |
|
| 48 |
+
[[package]]
|
| 49 |
+
name = "asttokens"
|
| 50 |
+
version = "3.0.0"
|
| 51 |
+
description = "Annotate AST trees with source code positions"
|
| 52 |
+
optional = false
|
| 53 |
+
python-versions = ">=3.8"
|
| 54 |
+
groups = ["main"]
|
| 55 |
+
files = [
|
| 56 |
+
{file = "asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2"},
|
| 57 |
+
{file = "asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7"},
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
[package.extras]
|
| 61 |
+
astroid = ["astroid (>=2,<4)"]
|
| 62 |
+
test = ["astroid (>=2,<4)", "pytest", "pytest-cov", "pytest-xdist"]
|
| 63 |
+
|
| 64 |
[[package]]
|
| 65 |
name = "audioop-lts"
|
| 66 |
version = "0.2.1"
|
|
|
|
| 322 |
optional = false
|
| 323 |
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
| 324 |
groups = ["main"]
|
| 325 |
+
markers = "platform_system == \"Windows\" or sys_platform == \"win32\""
|
| 326 |
files = [
|
| 327 |
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
| 328 |
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
| 329 |
]
|
| 330 |
|
| 331 |
+
[[package]]
|
| 332 |
+
name = "decorator"
|
| 333 |
+
version = "5.2.1"
|
| 334 |
+
description = "Decorators for Humans"
|
| 335 |
+
optional = false
|
| 336 |
+
python-versions = ">=3.8"
|
| 337 |
+
groups = ["main"]
|
| 338 |
+
files = [
|
| 339 |
+
{file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"},
|
| 340 |
+
{file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"},
|
| 341 |
+
]
|
| 342 |
+
|
| 343 |
[[package]]
|
| 344 |
name = "distro"
|
| 345 |
version = "1.9.0"
|
|
|
|
| 366 |
[package.dependencies]
|
| 367 |
python-dotenv = "*"
|
| 368 |
|
| 369 |
+
[[package]]
|
| 370 |
+
name = "executing"
|
| 371 |
+
version = "2.2.0"
|
| 372 |
+
description = "Get the currently executing AST node of a frame, and other information"
|
| 373 |
+
optional = false
|
| 374 |
+
python-versions = ">=3.8"
|
| 375 |
+
groups = ["main"]
|
| 376 |
+
files = [
|
| 377 |
+
{file = "executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa"},
|
| 378 |
+
{file = "executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755"},
|
| 379 |
+
]
|
| 380 |
+
|
| 381 |
+
[package.extras]
|
| 382 |
+
tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""]
|
| 383 |
+
|
| 384 |
[[package]]
|
| 385 |
name = "fastapi"
|
| 386 |
version = "0.115.12"
|
|
|
|
| 772 |
[package.extras]
|
| 773 |
all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
|
| 774 |
|
| 775 |
+
[[package]]
|
| 776 |
+
name = "ipython"
|
| 777 |
+
version = "9.3.0"
|
| 778 |
+
description = "IPython: Productive Interactive Computing"
|
| 779 |
+
optional = false
|
| 780 |
+
python-versions = ">=3.11"
|
| 781 |
+
groups = ["main"]
|
| 782 |
+
files = [
|
| 783 |
+
{file = "ipython-9.3.0-py3-none-any.whl", hash = "sha256:1a0b6dd9221a1f5dddf725b57ac0cb6fddc7b5f470576231ae9162b9b3455a04"},
|
| 784 |
+
{file = "ipython-9.3.0.tar.gz", hash = "sha256:79eb896f9f23f50ad16c3bc205f686f6e030ad246cc309c6279a242b14afe9d8"},
|
| 785 |
+
]
|
| 786 |
+
|
| 787 |
+
[package.dependencies]
|
| 788 |
+
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
| 789 |
+
decorator = "*"
|
| 790 |
+
ipython-pygments-lexers = "*"
|
| 791 |
+
jedi = ">=0.16"
|
| 792 |
+
matplotlib-inline = "*"
|
| 793 |
+
pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""}
|
| 794 |
+
prompt_toolkit = ">=3.0.41,<3.1.0"
|
| 795 |
+
pygments = ">=2.4.0"
|
| 796 |
+
stack_data = "*"
|
| 797 |
+
traitlets = ">=5.13.0"
|
| 798 |
+
|
| 799 |
+
[package.extras]
|
| 800 |
+
all = ["ipython[doc,matplotlib,test,test-extra]"]
|
| 801 |
+
black = ["black"]
|
| 802 |
+
doc = ["docrepr", "exceptiongroup", "intersphinx_registry", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinx_toml (==0.0.4)", "typing_extensions"]
|
| 803 |
+
matplotlib = ["matplotlib"]
|
| 804 |
+
test = ["packaging", "pytest", "pytest-asyncio (<0.22)", "testpath"]
|
| 805 |
+
test-extra = ["curio", "ipykernel", "ipython[test]", "jupyter_ai", "matplotlib (!=3.2.0)", "nbclient", "nbformat", "numpy (>=1.23)", "pandas", "trio"]
|
| 806 |
+
|
| 807 |
+
[[package]]
|
| 808 |
+
name = "ipython-pygments-lexers"
|
| 809 |
+
version = "1.1.1"
|
| 810 |
+
description = "Defines a variety of Pygments lexers for highlighting IPython code."
|
| 811 |
+
optional = false
|
| 812 |
+
python-versions = ">=3.8"
|
| 813 |
+
groups = ["main"]
|
| 814 |
+
files = [
|
| 815 |
+
{file = "ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c"},
|
| 816 |
+
{file = "ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81"},
|
| 817 |
+
]
|
| 818 |
+
|
| 819 |
+
[package.dependencies]
|
| 820 |
+
pygments = "*"
|
| 821 |
+
|
| 822 |
+
[[package]]
|
| 823 |
+
name = "jedi"
|
| 824 |
+
version = "0.19.2"
|
| 825 |
+
description = "An autocompletion tool for Python that can be used for text editors."
|
| 826 |
+
optional = false
|
| 827 |
+
python-versions = ">=3.6"
|
| 828 |
+
groups = ["main"]
|
| 829 |
+
files = [
|
| 830 |
+
{file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"},
|
| 831 |
+
{file = "jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0"},
|
| 832 |
+
]
|
| 833 |
+
|
| 834 |
+
[package.dependencies]
|
| 835 |
+
parso = ">=0.8.4,<0.9.0"
|
| 836 |
+
|
| 837 |
+
[package.extras]
|
| 838 |
+
docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx (==1.8.5)", "sphinx-rtd-theme (==0.4.3)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"]
|
| 839 |
+
qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
|
| 840 |
+
testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"]
|
| 841 |
+
|
| 842 |
[[package]]
|
| 843 |
name = "jinja2"
|
| 844 |
version = "3.1.6"
|
|
|
|
| 1272 |
{file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
|
| 1273 |
]
|
| 1274 |
|
| 1275 |
+
[[package]]
|
| 1276 |
+
name = "matplotlib-inline"
|
| 1277 |
+
version = "0.1.7"
|
| 1278 |
+
description = "Inline Matplotlib backend for Jupyter"
|
| 1279 |
+
optional = false
|
| 1280 |
+
python-versions = ">=3.8"
|
| 1281 |
+
groups = ["main"]
|
| 1282 |
+
files = [
|
| 1283 |
+
{file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"},
|
| 1284 |
+
{file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"},
|
| 1285 |
+
]
|
| 1286 |
+
|
| 1287 |
+
[package.dependencies]
|
| 1288 |
+
traitlets = "*"
|
| 1289 |
+
|
| 1290 |
[[package]]
|
| 1291 |
name = "mcp"
|
| 1292 |
version = "1.9.3"
|
|
|
|
| 1648 |
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
|
| 1649 |
xml = ["lxml (>=4.9.2)"]
|
| 1650 |
|
| 1651 |
+
[[package]]
|
| 1652 |
+
name = "parso"
|
| 1653 |
+
version = "0.8.4"
|
| 1654 |
+
description = "A Python Parser"
|
| 1655 |
+
optional = false
|
| 1656 |
+
python-versions = ">=3.6"
|
| 1657 |
+
groups = ["main"]
|
| 1658 |
+
files = [
|
| 1659 |
+
{file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"},
|
| 1660 |
+
{file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"},
|
| 1661 |
+
]
|
| 1662 |
+
|
| 1663 |
+
[package.extras]
|
| 1664 |
+
qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
|
| 1665 |
+
testing = ["docopt", "pytest"]
|
| 1666 |
+
|
| 1667 |
+
[[package]]
|
| 1668 |
+
name = "pexpect"
|
| 1669 |
+
version = "4.9.0"
|
| 1670 |
+
description = "Pexpect allows easy control of interactive console applications."
|
| 1671 |
+
optional = false
|
| 1672 |
+
python-versions = "*"
|
| 1673 |
+
groups = ["main"]
|
| 1674 |
+
markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""
|
| 1675 |
+
files = [
|
| 1676 |
+
{file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"},
|
| 1677 |
+
{file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"},
|
| 1678 |
+
]
|
| 1679 |
+
|
| 1680 |
+
[package.dependencies]
|
| 1681 |
+
ptyprocess = ">=0.5"
|
| 1682 |
+
|
| 1683 |
[[package]]
|
| 1684 |
name = "pillow"
|
| 1685 |
version = "11.2.1"
|
|
|
|
| 1780 |
typing = ["typing-extensions ; python_version < \"3.10\""]
|
| 1781 |
xmp = ["defusedxml"]
|
| 1782 |
|
| 1783 |
+
[[package]]
|
| 1784 |
+
name = "prompt-toolkit"
|
| 1785 |
+
version = "3.0.51"
|
| 1786 |
+
description = "Library for building powerful interactive command lines in Python"
|
| 1787 |
+
optional = false
|
| 1788 |
+
python-versions = ">=3.8"
|
| 1789 |
+
groups = ["main"]
|
| 1790 |
+
files = [
|
| 1791 |
+
{file = "prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07"},
|
| 1792 |
+
{file = "prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed"},
|
| 1793 |
+
]
|
| 1794 |
+
|
| 1795 |
+
[package.dependencies]
|
| 1796 |
+
wcwidth = "*"
|
| 1797 |
+
|
| 1798 |
+
[[package]]
|
| 1799 |
+
name = "ptyprocess"
|
| 1800 |
+
version = "0.7.0"
|
| 1801 |
+
description = "Run a subprocess in a pseudo terminal"
|
| 1802 |
+
optional = false
|
| 1803 |
+
python-versions = "*"
|
| 1804 |
+
groups = ["main"]
|
| 1805 |
+
markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""
|
| 1806 |
+
files = [
|
| 1807 |
+
{file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
|
| 1808 |
+
{file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
|
| 1809 |
+
]
|
| 1810 |
+
|
| 1811 |
+
[[package]]
|
| 1812 |
+
name = "pure-eval"
|
| 1813 |
+
version = "0.2.3"
|
| 1814 |
+
description = "Safely evaluate AST nodes without side effects"
|
| 1815 |
+
optional = false
|
| 1816 |
+
python-versions = "*"
|
| 1817 |
+
groups = ["main"]
|
| 1818 |
+
files = [
|
| 1819 |
+
{file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"},
|
| 1820 |
+
{file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"},
|
| 1821 |
+
]
|
| 1822 |
+
|
| 1823 |
+
[package.extras]
|
| 1824 |
+
tests = ["pytest"]
|
| 1825 |
+
|
| 1826 |
[[package]]
|
| 1827 |
name = "pycparser"
|
| 1828 |
version = "2.22"
|
|
|
|
| 2013 |
optional = false
|
| 2014 |
python-versions = ">=3.8"
|
| 2015 |
groups = ["main"]
|
|
|
|
| 2016 |
files = [
|
| 2017 |
{file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
|
| 2018 |
{file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
|
|
|
|
| 2516 |
granian = ["granian (>=2.3.1)"]
|
| 2517 |
uvicorn = ["uvicorn (>=0.34.0)"]
|
| 2518 |
|
| 2519 |
+
[[package]]
|
| 2520 |
+
name = "stack-data"
|
| 2521 |
+
version = "0.6.3"
|
| 2522 |
+
description = "Extract data from python stack frames and tracebacks for informative displays"
|
| 2523 |
+
optional = false
|
| 2524 |
+
python-versions = "*"
|
| 2525 |
+
groups = ["main"]
|
| 2526 |
+
files = [
|
| 2527 |
+
{file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"},
|
| 2528 |
+
{file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"},
|
| 2529 |
+
]
|
| 2530 |
+
|
| 2531 |
+
[package.dependencies]
|
| 2532 |
+
asttokens = ">=2.1.0"
|
| 2533 |
+
executing = ">=1.2.0"
|
| 2534 |
+
pure-eval = "*"
|
| 2535 |
+
|
| 2536 |
+
[package.extras]
|
| 2537 |
+
tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
|
| 2538 |
+
|
| 2539 |
[[package]]
|
| 2540 |
name = "starlette"
|
| 2541 |
version = "0.46.2"
|
|
|
|
| 2652 |
slack = ["slack-sdk"]
|
| 2653 |
telegram = ["requests"]
|
| 2654 |
|
| 2655 |
+
[[package]]
|
| 2656 |
+
name = "traitlets"
|
| 2657 |
+
version = "5.14.3"
|
| 2658 |
+
description = "Traitlets Python configuration system"
|
| 2659 |
+
optional = false
|
| 2660 |
+
python-versions = ">=3.8"
|
| 2661 |
+
groups = ["main"]
|
| 2662 |
+
files = [
|
| 2663 |
+
{file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"},
|
| 2664 |
+
{file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"},
|
| 2665 |
+
]
|
| 2666 |
+
|
| 2667 |
+
[package.extras]
|
| 2668 |
+
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
|
| 2669 |
+
test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"]
|
| 2670 |
+
|
| 2671 |
[[package]]
|
| 2672 |
name = "typer"
|
| 2673 |
version = "0.16.0"
|
|
|
|
| 2764 |
[package.extras]
|
| 2765 |
standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"]
|
| 2766 |
|
| 2767 |
+
[[package]]
|
| 2768 |
+
name = "wcwidth"
|
| 2769 |
+
version = "0.2.13"
|
| 2770 |
+
description = "Measures the displayed width of unicode strings in a terminal"
|
| 2771 |
+
optional = false
|
| 2772 |
+
python-versions = "*"
|
| 2773 |
+
groups = ["main"]
|
| 2774 |
+
files = [
|
| 2775 |
+
{file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
|
| 2776 |
+
{file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
|
| 2777 |
+
]
|
| 2778 |
+
|
| 2779 |
[[package]]
|
| 2780 |
name = "websockets"
|
| 2781 |
version = "15.0.1"
|
|
|
|
| 3104 |
[metadata]
|
| 3105 |
lock-version = "2.1"
|
| 3106 |
python-versions = ">=3.13,<4.0"
|
| 3107 |
+
content-hash = "1c1c843aa68874643d9202518e0a9f2b71885314c4007df9916b81ffb66a7d0d"
|
pyproject.toml
CHANGED
|
@@ -13,7 +13,8 @@ dependencies = [
|
|
| 13 |
"langchain-openai (>=0.3.21,<0.4.0)",
|
| 14 |
"langchain-mcp-adapters (>=0.1.7,<0.2.0)",
|
| 15 |
"dotenv (>=0.9.9,<0.10.0)",
|
| 16 |
-
"langchain (>=0.3.25,<0.4.0)"
|
|
|
|
| 17 |
]
|
| 18 |
|
| 19 |
|
|
|
|
| 13 |
"langchain-openai (>=0.3.21,<0.4.0)",
|
| 14 |
"langchain-mcp-adapters (>=0.1.7,<0.2.0)",
|
| 15 |
"dotenv (>=0.9.9,<0.10.0)",
|
| 16 |
+
"langchain (>=0.3.25,<0.4.0)",
|
| 17 |
+
"ipython (>=9.3.0,<10.0.0)"
|
| 18 |
]
|
| 19 |
|
| 20 |
|
src/agent.py
CHANGED
|
@@ -1,77 +1,16 @@
|
|
| 1 |
import asyncio
|
| 2 |
-
from typing import Dict, Any, TypedDict, Annotated, List
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
|
| 5 |
-
from langchain_core.messages import BaseMessage, AIMessage
|
| 6 |
from langchain_mcp_adapters.client import MultiServerMCPClient
|
| 7 |
-
from langgraph.graph import StateGraph, END
|
| 8 |
-
from langgraph.graph.message import add_messages
|
| 9 |
-
from langgraph.checkpoint.memory import MemorySaver
|
| 10 |
-
|
| 11 |
-
from .nodes import (
|
| 12 |
-
router_node,
|
| 13 |
-
script_generator_node,
|
| 14 |
-
planner_node,
|
| 15 |
-
audio_processor_node,
|
| 16 |
-
validator_node,
|
| 17 |
-
final_response_node
|
| 18 |
-
)
|
| 19 |
-
from .nodes.chat import chat_node
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
class AudioProcessingState(TypedDict):
|
| 23 |
-
"""State schema for the audio processing graph."""
|
| 24 |
-
|
| 25 |
-
# Chat history
|
| 26 |
-
messages: Annotated[List[BaseMessage], add_messages]
|
| 27 |
-
|
| 28 |
-
# Audio files provided by user
|
| 29 |
-
audio_files: List[str]
|
| 30 |
-
|
| 31 |
-
# User's processing request
|
| 32 |
-
user_request: str
|
| 33 |
-
|
| 34 |
-
# Processing type determined by router
|
| 35 |
-
processing_type: str
|
| 36 |
-
|
| 37 |
-
# Generated scripts with timestamps
|
| 38 |
-
scripts: Dict[str, Any]
|
| 39 |
-
|
| 40 |
-
# Execution plan created by planner
|
| 41 |
-
execution_plan: List[Dict[str, Any]]
|
| 42 |
-
|
| 43 |
-
# Processing results
|
| 44 |
-
processed_files: Dict[str, str]
|
| 45 |
-
|
| 46 |
-
# Processing steps completed
|
| 47 |
-
completed_steps: List[str]
|
| 48 |
-
|
| 49 |
-
# Final output
|
| 50 |
-
final_audio_url: str
|
| 51 |
-
final_response: str
|
| 52 |
-
|
| 53 |
-
# Error handling
|
| 54 |
-
errors: List[str]
|
| 55 |
-
needs_reprocessing: bool
|
| 56 |
-
|
| 57 |
-
# Metadata
|
| 58 |
-
processing_metadata: Dict[str, Any]
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
class AudioAgent:
|
| 62 |
-
"""
|
| 63 |
-
Advanced LangGraph-based audio processing agent with custom nodes.
|
| 64 |
-
|
| 65 |
-
Handles audio file processing through a sophisticated workflow:
|
| 66 |
-
1. Router - Determines processing type
|
| 67 |
-
2. Chat or Audio Processing Pipeline
|
| 68 |
-
3. Script Generation - Creates timestamped transcripts
|
| 69 |
-
4. Planning - Creates execution plan
|
| 70 |
-
5. Processing - Executes audio tools
|
| 71 |
-
6. Validation - Checks results and determines reprocessing
|
| 72 |
-
7. Final Response - Formats output for user
|
| 73 |
-
"""
|
| 74 |
-
|
| 75 |
def __init__(
|
| 76 |
self,
|
| 77 |
model_name: str = "gpt-4o",
|
|
@@ -80,229 +19,72 @@ class AudioAgent:
|
|
| 80 |
load_dotenv()
|
| 81 |
self.model_name = model_name
|
| 82 |
self.server_url = server_url
|
|
|
|
| 83 |
|
| 84 |
-
# SSE client for audio tools
|
| 85 |
self._client = MultiServerMCPClient({
|
| 86 |
"audio-tools": {"url": self.server_url, "transport": "sse"}
|
| 87 |
})
|
| 88 |
|
| 89 |
-
self._graph = None
|
| 90 |
-
self._tools = []
|
| 91 |
-
|
| 92 |
@property
|
| 93 |
def is_initialized(self) -> bool:
|
| 94 |
-
return self.
|
| 95 |
-
|
| 96 |
-
async def initialize(self) -> None:
|
| 97 |
-
"""Initialize the LangGraph workflow with audio tools."""
|
| 98 |
-
if self.is_initialized:
|
| 99 |
-
return
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
if not self._tools:
|
| 104 |
-
raise RuntimeError("No tools available from MCP server")
|
| 105 |
|
| 106 |
-
|
| 107 |
-
self._graph = self._build_graph()
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
workflow = StateGraph(AudioProcessingState)
|
| 114 |
-
|
| 115 |
-
# Add nodes
|
| 116 |
-
workflow.add_node("router", self._router_async)
|
| 117 |
-
workflow.add_node("chat", self._chat_with_tools)
|
| 118 |
-
workflow.add_node("script_generator", self._script_generator_with_tools)
|
| 119 |
-
workflow.add_node("planner", self._planner_async)
|
| 120 |
-
workflow.add_node("audio_processor", self._audio_processor_with_tools)
|
| 121 |
-
workflow.add_node("validator", self._validator_async)
|
| 122 |
-
workflow.add_node("response_formatter", self._final_response_async)
|
| 123 |
-
|
| 124 |
-
# Set entry point
|
| 125 |
-
workflow.set_entry_point("router")
|
| 126 |
-
|
| 127 |
-
# Add conditional edges based on processing type
|
| 128 |
-
workflow.add_conditional_edges(
|
| 129 |
-
"router",
|
| 130 |
-
self._route_processing_type,
|
| 131 |
{
|
| 132 |
-
"
|
| 133 |
-
"
|
| 134 |
-
"dialogue_generation": "script_generator"
|
| 135 |
}
|
| 136 |
)
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
# Validation flow with conditional reprocessing
|
| 147 |
-
workflow.add_conditional_edges(
|
| 148 |
"validator",
|
| 149 |
-
|
| 150 |
{
|
| 151 |
-
"
|
| 152 |
-
"
|
| 153 |
}
|
| 154 |
)
|
| 155 |
-
|
| 156 |
-
# Final response leads to end
|
| 157 |
-
workflow.add_edge("response_formatter", END)
|
| 158 |
-
|
| 159 |
-
# Compile with memory for conversation history
|
| 160 |
-
memory = MemorySaver()
|
| 161 |
-
return workflow.compile(checkpointer=memory)
|
| 162 |
|
| 163 |
-
|
| 164 |
-
""
|
| 165 |
-
|
| 166 |
|
| 167 |
-
async def
|
| 168 |
-
"""
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
async def _audio_processor_with_tools(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
| 172 |
-
"""Audio processor node with tools access."""
|
| 173 |
-
return await audio_processor_node(state, self._tools)
|
| 174 |
-
|
| 175 |
-
async def _validator_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
| 176 |
-
"""Async validator node wrapper."""
|
| 177 |
-
return await validator_node(state)
|
| 178 |
-
|
| 179 |
-
async def _router_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
| 180 |
-
"""Async router node wrapper."""
|
| 181 |
-
return await router_node(state)
|
| 182 |
-
|
| 183 |
-
async def _planner_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
| 184 |
-
"""Async planner node wrapper."""
|
| 185 |
-
return await planner_node(state)
|
| 186 |
-
|
| 187 |
-
async def _final_response_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
| 188 |
-
"""Async final response node wrapper."""
|
| 189 |
-
return await final_response_node(state)
|
| 190 |
-
|
| 191 |
-
def _route_processing_type(self, state: Dict[str, Any]) -> str:
|
| 192 |
-
"""Route based on processing type."""
|
| 193 |
-
return state.get("processing_type", "chat")
|
| 194 |
-
|
| 195 |
-
def _check_reprocessing_need(self, state: Dict[str, Any]) -> str:
|
| 196 |
-
"""Check if reprocessing is needed."""
|
| 197 |
-
if state.get("needs_reprocessing", False):
|
| 198 |
-
return "reprocess"
|
| 199 |
-
return "complete"
|
| 200 |
-
|
| 201 |
-
def process_user_input(self, user_input: str) -> Dict[str, Any]:
|
| 202 |
-
"""Process user input and create initial state."""
|
| 203 |
-
from langchain_core.messages import HumanMessage
|
| 204 |
-
|
| 205 |
-
return {
|
| 206 |
-
"messages": [HumanMessage(content=user_input)],
|
| 207 |
-
"audio_files": [],
|
| 208 |
-
"user_request": "",
|
| 209 |
-
"processing_type": "",
|
| 210 |
-
"scripts": {},
|
| 211 |
-
"execution_plan": [],
|
| 212 |
-
"processed_files": {},
|
| 213 |
-
"completed_steps": [],
|
| 214 |
-
"final_audio_url": "",
|
| 215 |
-
"final_response": "",
|
| 216 |
-
"errors": [],
|
| 217 |
-
"needs_reprocessing": False,
|
| 218 |
-
"processing_metadata": {}
|
| 219 |
-
}
|
| 220 |
-
|
| 221 |
-
async def chat(self, prompt: str) -> Dict[str, Any]:
|
| 222 |
-
"""
|
| 223 |
-
One-shot chat: returns the full processing result.
|
| 224 |
-
"""
|
| 225 |
-
if not self.is_initialized:
|
| 226 |
-
await self.initialize()
|
| 227 |
-
|
| 228 |
-
config = {"configurable": {"thread_id": "audio_agent_session"}}
|
| 229 |
-
initial_state = self.process_user_input(prompt)
|
| 230 |
-
|
| 231 |
-
result = await self._graph.ainvoke(initial_state, config)
|
| 232 |
-
return result
|
| 233 |
-
|
| 234 |
-
async def stream_chat(self, prompt: str):
|
| 235 |
-
"""
|
| 236 |
-
Streaming chat: yields intermediate results as processing continues.
|
| 237 |
-
"""
|
| 238 |
-
if not self.is_initialized:
|
| 239 |
-
await self.initialize()
|
| 240 |
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
# Special handling for chat-only requests to enable streaming
|
| 245 |
-
processing_type = None
|
| 246 |
-
|
| 247 |
-
# First, run the router to determine processing type
|
| 248 |
-
router_result = await self._graph.ainvoke(initial_state, config)
|
| 249 |
-
processing_type = router_result.get("processing_type", "")
|
| 250 |
-
|
| 251 |
-
if processing_type == "chat":
|
| 252 |
-
# For chat requests, use direct streaming from the chat node
|
| 253 |
-
from .nodes.chat import stream_chat_response
|
| 254 |
-
messages = initial_state.get("messages", [])
|
| 255 |
-
|
| 256 |
-
accumulated_content = ""
|
| 257 |
-
async for chunk in stream_chat_response(messages, self._tools):
|
| 258 |
-
accumulated_content += chunk
|
| 259 |
-
yield chunk, "chat"
|
| 260 |
-
|
| 261 |
-
# Update the state with the final response
|
| 262 |
-
final_state = router_result.copy()
|
| 263 |
-
final_state["messages"].append(AIMessage(content=accumulated_content))
|
| 264 |
-
final_state["final_response"] = accumulated_content
|
| 265 |
-
|
| 266 |
-
else:
|
| 267 |
-
# For audio processing, use the normal graph streaming
|
| 268 |
-
async for chunk in self._graph.astream(initial_state, config):
|
| 269 |
-
# Extract the node name and content
|
| 270 |
-
for node_name, node_output in chunk.items():
|
| 271 |
-
if node_name == "__end__":
|
| 272 |
-
continue
|
| 273 |
-
|
| 274 |
-
# Get the latest message if available
|
| 275 |
-
messages = node_output.get("messages", [])
|
| 276 |
-
if messages and hasattr(messages[-1], 'content'):
|
| 277 |
-
content = messages[-1].content
|
| 278 |
-
if content:
|
| 279 |
-
yield content, node_name
|
| 280 |
-
|
| 281 |
-
# Also yield final audio URL if available
|
| 282 |
-
final_audio_url = node_output.get("final_audio_url", "")
|
| 283 |
-
if final_audio_url:
|
| 284 |
-
yield f"\n🎵 **Audio Ready**: [{final_audio_url}]({final_audio_url})", node_name
|
| 285 |
|
|
|
|
| 286 |
|
| 287 |
async def main():
|
| 288 |
"""Test the agent with various scenarios."""
|
| 289 |
agent = AudioAgent()
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
print()
|
| 296 |
-
|
| 297 |
-
# Test 2: Audio processing request
|
| 298 |
-
print("=== Test 2: Audio Processing ===")
|
| 299 |
-
audio_request = "Process this audio file https://example.com/audio.mp3 - remove filler words and normalize volume"
|
| 300 |
-
|
| 301 |
-
print("Streaming response:")
|
| 302 |
-
async for content, node in agent.stream_chat(audio_request):
|
| 303 |
-
print(f"[{node}] {content[:100]}..." if len(content) > 100 else f"[{node}] {content}")
|
| 304 |
-
print()
|
| 305 |
|
|
|
|
| 306 |
|
| 307 |
if __name__ == "__main__":
|
| 308 |
asyncio.run(main())
|
|
|
|
| 1 |
import asyncio
|
|
|
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
|
|
|
|
| 4 |
from langchain_mcp_adapters.client import MultiServerMCPClient
|
| 5 |
+
from langgraph.graph import StateGraph, END, START
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
from .state import AgentState
|
| 8 |
+
from .nodes.chat import chat_node, chat_node_router
|
| 9 |
+
from .nodes.planner import planner_node
|
| 10 |
+
from .nodes.processor import processor_node
|
| 11 |
+
from .nodes.validator import validator_node, validator_node_router
|
| 12 |
|
| 13 |
class AudioAgent:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def __init__(
|
| 15 |
self,
|
| 16 |
model_name: str = "gpt-4o",
|
|
|
|
| 19 |
load_dotenv()
|
| 20 |
self.model_name = model_name
|
| 21 |
self.server_url = server_url
|
| 22 |
+
self.graph = None
|
| 23 |
|
|
|
|
| 24 |
self._client = MultiServerMCPClient({
|
| 25 |
"audio-tools": {"url": self.server_url, "transport": "sse"}
|
| 26 |
})
|
| 27 |
|
|
|
|
|
|
|
|
|
|
| 28 |
@property
|
| 29 |
def is_initialized(self) -> bool:
|
| 30 |
+
return self.graph is not None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
async def _build_graph(self) -> None:
|
| 33 |
+
"""Build the LangGraph workflow."""
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
_graph = StateGraph(AgentState)
|
|
|
|
| 36 |
|
| 37 |
+
_graph.add_node("chat", chat_node)
|
| 38 |
+
_graph.add_conditional_edges(
|
| 39 |
+
"chat",
|
| 40 |
+
chat_node_router,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
{
|
| 42 |
+
"planner": "planner",
|
| 43 |
+
"end": END
|
|
|
|
| 44 |
}
|
| 45 |
)
|
| 46 |
+
|
| 47 |
+
_graph.add_node("planner", planner_node)
|
| 48 |
+
_graph.add_edge("planner", "audio_processor")
|
| 49 |
+
|
| 50 |
+
_graph.add_node("audio_processor", processor_node)
|
| 51 |
+
_graph.add_edge("audio_processor", "validator")
|
| 52 |
+
|
| 53 |
+
_graph.add_node("validator", validator_node)
|
| 54 |
+
_graph.add_conditional_edges(
|
|
|
|
|
|
|
| 55 |
"validator",
|
| 56 |
+
validator_node_router,
|
| 57 |
{
|
| 58 |
+
"chat": "chat",
|
| 59 |
+
"planner": "planner"
|
| 60 |
}
|
| 61 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
+
_graph.add_edge(START, "chat")
|
| 64 |
+
_graph.add_edge("chat", END)
|
| 65 |
+
self.graph = _graph.compile()
|
| 66 |
|
| 67 |
+
async def initialize(self) -> None:
|
| 68 |
+
"""Initialize the LangGraph workflow with audio tools."""
|
| 69 |
+
if self.is_initialized:
|
| 70 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
+
self.tools = await self._client.get_tools()
|
| 73 |
+
if not self.tools:
|
| 74 |
+
raise RuntimeError("No tools available from MCP server")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
+
await self._build_graph()
|
| 77 |
|
| 78 |
async def main():
|
| 79 |
"""Test the agent with various scenarios."""
|
| 80 |
agent = AudioAgent()
|
| 81 |
+
await agent.initialize()
|
| 82 |
+
|
| 83 |
+
res = agent.graph.invoke({
|
| 84 |
+
"user_input": "I want to edit my audio file",
|
| 85 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
+
print(res)
|
| 88 |
|
| 89 |
if __name__ == "__main__":
|
| 90 |
asyncio.run(main())
|
src/nodes/__init__.py
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Audio processing graph nodes.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
from .router import router_node
|
| 6 |
-
from .script_generator import script_generator_node
|
| 7 |
-
from .planner import planner_node
|
| 8 |
-
from .audio_processor import audio_processor_node
|
| 9 |
-
from .validator import validator_node
|
| 10 |
-
from .final_response import final_response_node
|
| 11 |
-
|
| 12 |
-
__all__ = [
|
| 13 |
-
"router_node",
|
| 14 |
-
"script_generator_node",
|
| 15 |
-
"planner_node",
|
| 16 |
-
"audio_processor_node",
|
| 17 |
-
"validator_node",
|
| 18 |
-
"final_response_node"
|
| 19 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/nodes/audio_processor.py
DELETED
|
@@ -1,257 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Intelligent LLM-powered audio processor for executing planned processing steps.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
from typing import Dict, Any, List
|
| 6 |
-
from langchain_core.messages import AIMessage, SystemMessage
|
| 7 |
-
from langchain_openai import ChatOpenAI
|
| 8 |
-
import json
|
| 9 |
-
import re
|
| 10 |
-
|
| 11 |
-
from .prompts import (
|
| 12 |
-
LLM_PROCESSING_DECISION_PROMPT_TEMPLATE,
|
| 13 |
-
LLM_PROCESSING_SUMMARY_PROMPT_TEMPLATE,
|
| 14 |
-
)
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
async def audio_processor_node(state: Dict[str, Any], tools: list) -> Dict[str, Any]:
|
| 18 |
-
"""
|
| 19 |
-
Execute audio processing plan with intelligent LLM-guided decisions.
|
| 20 |
-
"""
|
| 21 |
-
|
| 22 |
-
execution_plan = state.get("execution_plan", [])
|
| 23 |
-
user_request = state.get("user_request", "")
|
| 24 |
-
processed_files = state.get("processed_files", {})
|
| 25 |
-
completed_steps = state.get("completed_steps", [])
|
| 26 |
-
errors = state.get("errors", [])
|
| 27 |
-
|
| 28 |
-
if not execution_plan:
|
| 29 |
-
return create_no_plan_response(state)
|
| 30 |
-
|
| 31 |
-
# Create tool lookup
|
| 32 |
-
tool_lookup = {tool.name: tool for tool in tools}
|
| 33 |
-
|
| 34 |
-
# Track current file URLs through processing
|
| 35 |
-
current_file_urls = {}
|
| 36 |
-
|
| 37 |
-
# Execute plan with LLM guidance
|
| 38 |
-
for i, step in enumerate(execution_plan):
|
| 39 |
-
# Get LLM decision for this step
|
| 40 |
-
should_execute, adapted_params = await get_llm_processing_decision(
|
| 41 |
-
step, user_request, current_file_urls, completed_steps, errors, list(tool_lookup.keys())
|
| 42 |
-
)
|
| 43 |
-
|
| 44 |
-
if not should_execute:
|
| 45 |
-
completed_steps.append(f"⏭️ Skipped: {step.get('description', 'unknown')}")
|
| 46 |
-
continue
|
| 47 |
-
|
| 48 |
-
# Execute the step
|
| 49 |
-
step_result = await execute_processing_step(
|
| 50 |
-
step, adapted_params, tool_lookup, current_file_urls
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
if step_result["success"]:
|
| 54 |
-
if step_result["new_file_url"]:
|
| 55 |
-
original_file = step_result["original_file"]
|
| 56 |
-
current_file_urls[original_file] = step_result["new_file_url"]
|
| 57 |
-
processed_files[original_file] = step_result["new_file_url"]
|
| 58 |
-
completed_steps.append(f"✅ {step_result['description']}")
|
| 59 |
-
else:
|
| 60 |
-
errors.append(step_result["error"])
|
| 61 |
-
completed_steps.append(f"❌ Failed: {step.get('description', 'unknown')}")
|
| 62 |
-
|
| 63 |
-
# Create processing summary
|
| 64 |
-
processing_summary = await create_llm_processing_summary(
|
| 65 |
-
user_request, completed_steps, errors, processed_files
|
| 66 |
-
)
|
| 67 |
-
|
| 68 |
-
messages = state.get("messages", [])
|
| 69 |
-
messages.append(AIMessage(content=processing_summary))
|
| 70 |
-
|
| 71 |
-
return {
|
| 72 |
-
"processed_files": processed_files,
|
| 73 |
-
"completed_steps": completed_steps,
|
| 74 |
-
"errors": errors,
|
| 75 |
-
"needs_reprocessing": len(errors) > 0 and len(completed_steps) > 0,
|
| 76 |
-
"final_audio_url": get_primary_output_file(processed_files),
|
| 77 |
-
"messages": messages
|
| 78 |
-
}
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
async def get_llm_processing_decision(
|
| 82 |
-
step: Dict[str, Any],
|
| 83 |
-
user_request: str,
|
| 84 |
-
current_file_urls: Dict[str, str],
|
| 85 |
-
completed_steps: List[str],
|
| 86 |
-
errors: List[str],
|
| 87 |
-
available_tools: List[str]
|
| 88 |
-
) -> tuple:
|
| 89 |
-
"""Use LLM to decide whether to execute step and with what parameters."""
|
| 90 |
-
|
| 91 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
|
| 92 |
-
|
| 93 |
-
prompt = LLM_PROCESSING_DECISION_PROMPT_TEMPLATE.format(
|
| 94 |
-
tool_name=step.get('tool', 'unknown'),
|
| 95 |
-
description=step.get('description', 'No description'),
|
| 96 |
-
planned_parameters=json.dumps(step.get('params', {}), indent=2),
|
| 97 |
-
user_request=user_request,
|
| 98 |
-
completed_steps_count=len(completed_steps),
|
| 99 |
-
error_count=len(errors),
|
| 100 |
-
available_tools=', '.join(available_tools),
|
| 101 |
-
current_file_urls=json.dumps(current_file_urls, indent=2),
|
| 102 |
-
recent_activity="\n".join(completed_steps[-3:]) if completed_steps else "No steps completed yet"
|
| 103 |
-
)
|
| 104 |
-
|
| 105 |
-
try:
|
| 106 |
-
response = await llm.ainvoke([SystemMessage(content=prompt)])
|
| 107 |
-
content = response.content.strip()
|
| 108 |
-
|
| 109 |
-
if content.startswith("SKIP"):
|
| 110 |
-
return False, {}
|
| 111 |
-
elif content.startswith("EXECUTE"):
|
| 112 |
-
lines = content.split('\n')
|
| 113 |
-
if len(lines) > 1 and lines[1].strip() != "NO_CHANGES":
|
| 114 |
-
try:
|
| 115 |
-
adapted_params = json.loads(lines[1])
|
| 116 |
-
return True, adapted_params
|
| 117 |
-
except json.JSONDecodeError:
|
| 118 |
-
return True, {}
|
| 119 |
-
return True, {}
|
| 120 |
-
else:
|
| 121 |
-
return True, {} # Default to execute if unclear
|
| 122 |
-
|
| 123 |
-
except Exception as e:
|
| 124 |
-
return True, {} # Default to execute on error
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
async def execute_processing_step(
|
| 128 |
-
step: Dict[str, Any],
|
| 129 |
-
adapted_params: Dict[str, Any],
|
| 130 |
-
tool_lookup: Dict[str, Any],
|
| 131 |
-
current_file_urls: Dict[str, str]
|
| 132 |
-
) -> Dict[str, Any]:
|
| 133 |
-
"""Execute a processing step with the given parameters."""
|
| 134 |
-
|
| 135 |
-
tool_name = step.get("tool", "")
|
| 136 |
-
params = step.get("params", {}).copy()
|
| 137 |
-
params.update(adapted_params) # Apply LLM adaptations
|
| 138 |
-
|
| 139 |
-
if tool_name not in tool_lookup:
|
| 140 |
-
return {
|
| 141 |
-
"success": False,
|
| 142 |
-
"error": f"Tool '{tool_name}' not available",
|
| 143 |
-
"description": f"Failed to find tool {tool_name}",
|
| 144 |
-
"original_file": params.get("audio_file", ""),
|
| 145 |
-
"new_file_url": None
|
| 146 |
-
}
|
| 147 |
-
|
| 148 |
-
try:
|
| 149 |
-
# Update file URL if this file has been processed before
|
| 150 |
-
original_file = params.get("audio_file", "")
|
| 151 |
-
if original_file in current_file_urls:
|
| 152 |
-
params["audio_file"] = current_file_urls[original_file]
|
| 153 |
-
|
| 154 |
-
# Execute the tool
|
| 155 |
-
tool = tool_lookup[tool_name]
|
| 156 |
-
result = await tool.ainvoke(params)
|
| 157 |
-
|
| 158 |
-
# Extract new file URL from result
|
| 159 |
-
new_file_url = extract_file_url_from_result(result, params["audio_file"])
|
| 160 |
-
|
| 161 |
-
return {
|
| 162 |
-
"success": True,
|
| 163 |
-
"description": f"{tool_name}: {step.get('description', '')}",
|
| 164 |
-
"original_file": original_file,
|
| 165 |
-
"new_file_url": new_file_url if new_file_url != params["audio_file"] else None,
|
| 166 |
-
"result": result
|
| 167 |
-
}
|
| 168 |
-
|
| 169 |
-
except Exception as e:
|
| 170 |
-
return {
|
| 171 |
-
"success": False,
|
| 172 |
-
"error": f"{tool_name} failed: {str(e)}",
|
| 173 |
-
"description": f"Failed {tool_name}",
|
| 174 |
-
"original_file": params.get("audio_file", ""),
|
| 175 |
-
"new_file_url": None
|
| 176 |
-
}
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
async def create_llm_processing_summary(
|
| 180 |
-
user_request: str,
|
| 181 |
-
completed_steps: List[str],
|
| 182 |
-
errors: List[str],
|
| 183 |
-
processed_files: Dict[str, str]
|
| 184 |
-
) -> str:
|
| 185 |
-
"""Create LLM-generated processing summary."""
|
| 186 |
-
|
| 187 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
|
| 188 |
-
|
| 189 |
-
prompt = LLM_PROCESSING_SUMMARY_PROMPT_TEMPLATE.format(
|
| 190 |
-
user_request=user_request,
|
| 191 |
-
completed_steps_count=len(completed_steps),
|
| 192 |
-
error_count=len(errors),
|
| 193 |
-
processed_files_count=len(processed_files),
|
| 194 |
-
step_details="\n".join(completed_steps[-5:]) if completed_steps else "No steps completed",
|
| 195 |
-
processed_files=json.dumps(processed_files, indent=2) if processed_files else "No files processed",
|
| 196 |
-
errors="\n".join(errors) if errors else "No errors"
|
| 197 |
-
)
|
| 198 |
-
|
| 199 |
-
try:
|
| 200 |
-
response = await llm.ainvoke([SystemMessage(content=prompt)])
|
| 201 |
-
return f"🎛️ **Processing Summary**\n\n{response.content}"
|
| 202 |
-
except Exception as e:
|
| 203 |
-
# Fallback summary
|
| 204 |
-
if processed_files:
|
| 205 |
-
return f"🎛️ **Processing Complete**\n\nSuccessfully processed {len(processed_files)} file(s) with {len(completed_steps)} steps completed."
|
| 206 |
-
else:
|
| 207 |
-
return f"⚠️ **Processing Issues**\n\nEncountered {len(errors)} error(s) during processing. Please check the issues above."
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
def extract_file_url_from_result(result, original_file: str) -> str:
|
| 211 |
-
"""Extract the new file URL from tool result."""
|
| 212 |
-
|
| 213 |
-
if hasattr(result, 'artifact') and result.artifact:
|
| 214 |
-
if hasattr(result.artifact, 'url'):
|
| 215 |
-
return result.artifact.url
|
| 216 |
-
elif hasattr(result.artifact, 'path'):
|
| 217 |
-
return result.artifact.path
|
| 218 |
-
|
| 219 |
-
if hasattr(result, 'content'):
|
| 220 |
-
content = result.content
|
| 221 |
-
# Look for URLs in the content
|
| 222 |
-
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+\.(mp3|wav|m4a|flac|aac|ogg)'
|
| 223 |
-
urls = re.findall(url_pattern, content, re.IGNORECASE)
|
| 224 |
-
if urls:
|
| 225 |
-
return urls[0]
|
| 226 |
-
|
| 227 |
-
return original_file
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
def get_primary_output_file(processed_files: Dict[str, str]) -> str:
|
| 231 |
-
"""Get the primary output file URL."""
|
| 232 |
-
|
| 233 |
-
if not processed_files:
|
| 234 |
-
return ""
|
| 235 |
-
|
| 236 |
-
# If there's a combined file, prioritize that
|
| 237 |
-
for original, processed in processed_files.items():
|
| 238 |
-
if "combined" in processed.lower():
|
| 239 |
-
return processed
|
| 240 |
-
|
| 241 |
-
# Otherwise return the first processed file
|
| 242 |
-
return list(processed_files.values())[0]
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
def create_no_plan_response(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 246 |
-
"""Handle case when no execution plan is available."""
|
| 247 |
-
|
| 248 |
-
messages = state.get("messages", [])
|
| 249 |
-
messages.append(AIMessage(content="❌ **No Execution Plan**: Cannot process audio without a plan."))
|
| 250 |
-
|
| 251 |
-
return {
|
| 252 |
-
"processed_files": {},
|
| 253 |
-
"completed_steps": [],
|
| 254 |
-
"errors": ["No execution plan available"],
|
| 255 |
-
"messages": messages,
|
| 256 |
-
"needs_reprocessing": False
|
| 257 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/nodes/chat.py
CHANGED
|
@@ -1,181 +1,34 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Chat node for handling general questions and conversations using streaming LLM.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
from typing import Dict, Any, List, AsyncGenerator
|
| 6 |
-
from langchain_core.messages import AIMessage, SystemMessage
|
| 7 |
from langchain_openai import ChatOpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
| 10 |
-
CHAT_SYSTEM_PROMPT_BASE,
|
| 11 |
-
CHAT_SYSTEM_PROMPT_TOOLS_HEADER,
|
| 12 |
-
CHAT_SYSTEM_PROMPT_GUIDELINES,
|
| 13 |
-
)
|
| 14 |
-
|
| 15 |
-
# Export the streaming function for direct use
|
| 16 |
-
__all__ = ["chat_node", "stream_chat_response"]
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
async def chat_node(state: Dict[str, Any], tools: List = None) -> Dict[str, Any]:
|
| 20 |
"""
|
| 21 |
-
Handle general
|
| 22 |
-
Returns the complete response after streaming is done.
|
| 23 |
"""
|
| 24 |
-
|
| 25 |
-
messages = state.get("messages", [])
|
| 26 |
-
|
| 27 |
-
if not messages:
|
| 28 |
-
return {
|
| 29 |
-
"messages": messages,
|
| 30 |
-
"final_response": "No messages to process."
|
| 31 |
-
}
|
| 32 |
-
|
| 33 |
-
# Generate streaming response and collect it
|
| 34 |
-
response_content = ""
|
| 35 |
-
async for chunk in stream_chat_response(messages, tools):
|
| 36 |
-
response_content += chunk
|
| 37 |
-
|
| 38 |
-
# Add AI response to messages
|
| 39 |
-
messages.append(AIMessage(content=response_content))
|
| 40 |
-
|
| 41 |
-
return {
|
| 42 |
-
"messages": messages,
|
| 43 |
-
"final_response": response_content
|
| 44 |
-
}
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
"""
|
| 49 |
-
Stream chat response chunks as they're generated by the LLM.
|
| 50 |
-
This is the core streaming function that yields content incrementally.
|
| 51 |
-
"""
|
| 52 |
-
|
| 53 |
-
# Create system message with tool information
|
| 54 |
-
system_message = create_system_message_with_tools(tools or [])
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.7)
|
| 61 |
-
|
| 62 |
-
response_content = ""
|
| 63 |
-
full_response = None
|
| 64 |
-
|
| 65 |
-
# Stream the main response
|
| 66 |
-
if tools:
|
| 67 |
-
llm_with_tools = llm.bind_tools(tools)
|
| 68 |
-
|
| 69 |
-
async for chunk in llm_with_tools.astream(llm_messages):
|
| 70 |
-
if chunk.content:
|
| 71 |
-
response_content += chunk.content
|
| 72 |
-
yield chunk.content # Yield each chunk as it comes
|
| 73 |
-
|
| 74 |
-
# Keep track of the complete response for tool calls
|
| 75 |
-
full_response = chunk
|
| 76 |
-
else:
|
| 77 |
-
async for chunk in llm.astream(llm_messages):
|
| 78 |
-
if chunk.content:
|
| 79 |
-
response_content += chunk.content
|
| 80 |
-
yield chunk.content # Yield each chunk as it comes
|
| 81 |
-
|
| 82 |
-
full_response = chunk
|
| 83 |
-
|
| 84 |
-
# Handle tool calls if any (after main streaming is complete)
|
| 85 |
-
if full_response and hasattr(full_response, 'tool_calls') and full_response.tool_calls:
|
| 86 |
-
tool_results_content = await handle_tool_calls(full_response, tools)
|
| 87 |
-
if tool_results_content:
|
| 88 |
-
yield tool_results_content
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
def create_system_message_with_tools(tools: List) -> SystemMessage:
|
| 92 |
-
"""Create a comprehensive system message that includes tool information."""
|
| 93 |
-
|
| 94 |
-
# Basic system prompt
|
| 95 |
-
system_content = CHAT_SYSTEM_PROMPT_BASE
|
| 96 |
-
|
| 97 |
-
# Add tool descriptions if available
|
| 98 |
-
if tools:
|
| 99 |
-
system_content += CHAT_SYSTEM_PROMPT_TOOLS_HEADER
|
| 100 |
-
|
| 101 |
-
for tool in tools:
|
| 102 |
-
tool_name = getattr(tool, 'name', 'Unknown Tool')
|
| 103 |
-
tool_description = getattr(tool, 'description', 'No description available')
|
| 104 |
-
|
| 105 |
-
# Get tool parameters
|
| 106 |
-
tool_args = getattr(tool, 'args_schema', None)
|
| 107 |
-
if tool_args and hasattr(tool_args, 'schema'):
|
| 108 |
-
schema = tool_args.schema()
|
| 109 |
-
properties = schema.get('properties', {})
|
| 110 |
-
|
| 111 |
-
system_content += f"\n**{tool_name}**:\n"
|
| 112 |
-
system_content += f"- Description: {tool_description}\n"
|
| 113 |
-
|
| 114 |
-
if properties:
|
| 115 |
-
system_content += "- Parameters:\n"
|
| 116 |
-
for param_name, param_info in properties.items():
|
| 117 |
-
param_type = param_info.get('type', 'unknown')
|
| 118 |
-
param_desc = param_info.get('description', 'No description')
|
| 119 |
-
system_content += f" • {param_name} ({param_type}): {param_desc}\n"
|
| 120 |
-
|
| 121 |
-
system_content += "\n"
|
| 122 |
-
|
| 123 |
-
system_content += CHAT_SYSTEM_PROMPT_GUIDELINES
|
| 124 |
-
|
| 125 |
-
return SystemMessage(content=system_content)
|
| 126 |
-
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
tool_name = tool_call["name"]
|
| 136 |
-
tool_args = tool_call["args"]
|
| 137 |
-
|
| 138 |
-
if tool_name in tool_lookup:
|
| 139 |
-
try:
|
| 140 |
-
tool = tool_lookup[tool_name]
|
| 141 |
-
result = await tool.ainvoke(tool_args)
|
| 142 |
-
|
| 143 |
-
# Format the tool result for display
|
| 144 |
-
tool_result_text = format_tool_result(tool_name, tool_args, result)
|
| 145 |
-
tool_results.append(tool_result_text)
|
| 146 |
-
|
| 147 |
-
except Exception as e:
|
| 148 |
-
error_msg = f"❌ Tool '{tool_name}' failed: {str(e)}"
|
| 149 |
-
tool_results.append(error_msg)
|
| 150 |
-
else:
|
| 151 |
-
error_msg = f"❌ Tool '{tool_name}' not available"
|
| 152 |
-
tool_results.append(error_msg)
|
| 153 |
-
|
| 154 |
-
return "\n\n" + "\n\n".join(tool_results) if tool_results else ""
|
| 155 |
|
|
|
|
| 156 |
|
| 157 |
-
def
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
formatted_result = f"\n\n🔧 **Tool Demo: {tool_name}**\n"
|
| 161 |
-
|
| 162 |
-
# Show parameters used
|
| 163 |
-
if tool_args:
|
| 164 |
-
formatted_result += "**Parameters used:**\n"
|
| 165 |
-
for key, value in tool_args.items():
|
| 166 |
-
formatted_result += f"- {key}: {value}\n"
|
| 167 |
-
|
| 168 |
-
# Show result
|
| 169 |
-
formatted_result += "\n**Result:**\n"
|
| 170 |
-
|
| 171 |
-
if hasattr(result, 'content'):
|
| 172 |
-
formatted_result += f"{result.content}"
|
| 173 |
-
elif hasattr(result, 'artifact'):
|
| 174 |
-
if hasattr(result.artifact, 'url'):
|
| 175 |
-
formatted_result += f"🎵 Audio processed: {result.artifact.url}"
|
| 176 |
-
else:
|
| 177 |
-
formatted_result += f"{result.artifact}"
|
| 178 |
else:
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
return formatted_result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from langchain_openai import ChatOpenAI
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 3 |
+
from langchain_core.runnables import RunnableParallel
|
| 4 |
+
from src.state import AgentState
|
| 5 |
+
from operator import itemgetter
|
| 6 |
|
| 7 |
+
def chat_node(state: AgentState) -> AgentState:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
+
Handle general questions and conversations using streaming LLM.
|
|
|
|
| 10 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
llm = ChatOpenAI(model="gpt-4.1")
|
| 13 |
+
llm = llm.with_structured_output(AgentState)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 16 |
+
("system", "You are a helpful assistant that can answer questions and help with tasks."),
|
| 17 |
+
("user", "Current state: {state}")
|
| 18 |
+
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
chain = (
|
| 21 |
+
RunnableParallel({
|
| 22 |
+
"state": itemgetter("state")
|
| 23 |
+
})
|
| 24 |
+
| prompt
|
| 25 |
+
| llm
|
| 26 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
return chain.invoke({"state": state})
|
| 29 |
|
| 30 |
+
def chat_node_router(state: AgentState) -> str:
|
| 31 |
+
if state.requires_processing:
|
| 32 |
+
return "audio_processor"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
else:
|
| 34 |
+
return "end"
|
|
|
|
|
|
src/nodes/final_response.py
DELETED
|
@@ -1,299 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Intelligent LLM-powered final response formatter for comprehensive user communication.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
from typing import Dict, Any, List
|
| 6 |
-
from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
|
| 7 |
-
from langchain_openai import ChatOpenAI
|
| 8 |
-
from langchain_core.output_parsers import PydanticOutputParser
|
| 9 |
-
from pydantic import BaseModel, Field
|
| 10 |
-
import json
|
| 11 |
-
|
| 12 |
-
from .prompts import (
|
| 13 |
-
FINAL_RESPONSE_SYSTEM_PROMPT,
|
| 14 |
-
FINAL_RESPONSE_USER_PROMPT_TEMPLATE,
|
| 15 |
-
)
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class FinalResponse(BaseModel):
|
| 19 |
-
"""Structured final response from LLM analysis."""
|
| 20 |
-
|
| 21 |
-
response_title: str = Field(description="Engaging title for the response")
|
| 22 |
-
main_message: str = Field(description="Primary message about what was accomplished")
|
| 23 |
-
processed_files_summary: List[str] = Field(description="Summary of each processed file with download info")
|
| 24 |
-
key_improvements: List[str] = Field(description="Key improvements and enhancements made")
|
| 25 |
-
quality_assessment: str = Field(description="Assessment of final quality and success")
|
| 26 |
-
user_recommendations: List[str] = Field(description="Personalized recommendations for the user")
|
| 27 |
-
next_steps: str = Field(description="Suggested next steps or call to action")
|
| 28 |
-
technical_summary: str = Field(description="Brief technical summary of what was done")
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
async def final_response_node(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 32 |
-
"""
|
| 33 |
-
Generate intelligent, personalized final response using LLM analysis.
|
| 34 |
-
|
| 35 |
-
The LLM creates a comprehensive response that:
|
| 36 |
-
- Summarizes what was accomplished
|
| 37 |
-
- Highlights key improvements and results
|
| 38 |
-
- Provides download links for processed files
|
| 39 |
-
- Offers personalized recommendations
|
| 40 |
-
- Suggests appropriate next steps
|
| 41 |
-
"""
|
| 42 |
-
|
| 43 |
-
processing_type = state.get("processing_type", "")
|
| 44 |
-
processed_files = state.get("processed_files", {})
|
| 45 |
-
scripts = state.get("scripts", {})
|
| 46 |
-
errors = state.get("errors", [])
|
| 47 |
-
processing_metadata = state.get("processing_metadata", {})
|
| 48 |
-
user_request = state.get("user_request", "")
|
| 49 |
-
completed_steps = state.get("completed_steps", [])
|
| 50 |
-
execution_plan = state.get("execution_plan", [])
|
| 51 |
-
|
| 52 |
-
# For chat responses, use existing final_response
|
| 53 |
-
if processing_type == "chat":
|
| 54 |
-
final_response = state.get("final_response", "")
|
| 55 |
-
if not final_response:
|
| 56 |
-
# Generate a chat response if none exists
|
| 57 |
-
final_response = await create_chat_final_response(user_request, processing_metadata)
|
| 58 |
-
else:
|
| 59 |
-
# Generate intelligent audio processing response
|
| 60 |
-
llm_response = await create_intelligent_final_response_with_llm(
|
| 61 |
-
user_request, processing_type, processed_files, scripts,
|
| 62 |
-
errors, processing_metadata, completed_steps, execution_plan
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
final_response = format_llm_response(llm_response, processed_files)
|
| 66 |
-
|
| 67 |
-
# Add final response to messages if not already present
|
| 68 |
-
messages = state.get("messages", [])
|
| 69 |
-
if not any(msg.content == final_response for msg in messages if hasattr(msg, 'content')):
|
| 70 |
-
messages.append(AIMessage(content=final_response))
|
| 71 |
-
|
| 72 |
-
# Set final audio URL if available
|
| 73 |
-
final_audio_url = get_final_audio_url(processed_files, processing_type)
|
| 74 |
-
|
| 75 |
-
return {
|
| 76 |
-
"final_response": final_response,
|
| 77 |
-
"final_audio_url": final_audio_url,
|
| 78 |
-
"messages": messages
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
async def create_intelligent_final_response_with_llm(
|
| 83 |
-
user_request: str,
|
| 84 |
-
processing_type: str,
|
| 85 |
-
processed_files: Dict[str, str],
|
| 86 |
-
scripts: Dict[str, Any],
|
| 87 |
-
errors: List[str],
|
| 88 |
-
processing_metadata: Dict[str, Any],
|
| 89 |
-
completed_steps: List[str],
|
| 90 |
-
execution_plan: List[Dict[str, Any]]
|
| 91 |
-
) -> FinalResponse:
|
| 92 |
-
"""Use LLM to create intelligent, personalized final response."""
|
| 93 |
-
|
| 94 |
-
system_message = create_final_response_system_message()
|
| 95 |
-
user_message = create_final_response_user_message(
|
| 96 |
-
user_request, processing_type, processed_files, scripts,
|
| 97 |
-
errors, processing_metadata, completed_steps, execution_plan
|
| 98 |
-
)
|
| 99 |
-
|
| 100 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
|
| 101 |
-
parser = PydanticOutputParser(pydantic_object=FinalResponse)
|
| 102 |
-
|
| 103 |
-
prompt_messages = [
|
| 104 |
-
SystemMessage(content=system_message.content),
|
| 105 |
-
HumanMessage(content=user_message),
|
| 106 |
-
HumanMessage(content=parser.get_format_instructions())
|
| 107 |
-
]
|
| 108 |
-
|
| 109 |
-
try:
|
| 110 |
-
response = await llm.ainvoke(prompt_messages)
|
| 111 |
-
final_response = parser.parse(response.content)
|
| 112 |
-
return final_response
|
| 113 |
-
except Exception as e:
|
| 114 |
-
# Fallback response
|
| 115 |
-
return create_fallback_final_response(user_request, processed_files, errors)
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
def create_final_response_system_message() -> SystemMessage:
|
| 119 |
-
"""Create system message for final response generation."""
|
| 120 |
-
return SystemMessage(content=FINAL_RESPONSE_SYSTEM_PROMPT)
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
def create_final_response_user_message(
|
| 124 |
-
user_request: str,
|
| 125 |
-
processing_type: str,
|
| 126 |
-
processed_files: Dict[str, str],
|
| 127 |
-
scripts: Dict[str, Any],
|
| 128 |
-
errors: List[str],
|
| 129 |
-
processing_metadata: Dict[str, Any],
|
| 130 |
-
completed_steps: List[str],
|
| 131 |
-
execution_plan: List[Dict[str, Any]]
|
| 132 |
-
) -> str:
|
| 133 |
-
"""Create user message for final response generation."""
|
| 134 |
-
|
| 135 |
-
# Analyze processing context
|
| 136 |
-
processing_summary = analyze_processing_context(
|
| 137 |
-
processed_files, scripts, processing_metadata, completed_steps
|
| 138 |
-
)
|
| 139 |
-
|
| 140 |
-
return FINAL_RESPONSE_USER_PROMPT_TEMPLATE.format(
|
| 141 |
-
user_request=user_request,
|
| 142 |
-
processing_type=processing_type,
|
| 143 |
-
processed_files_count=len(processed_files),
|
| 144 |
-
completed_steps_count=len(completed_steps),
|
| 145 |
-
error_count=len(errors),
|
| 146 |
-
processed_files=json.dumps(processed_files, indent=2) if processed_files else "No files processed",
|
| 147 |
-
processing_summary=processing_summary,
|
| 148 |
-
plan_steps=len(execution_plan),
|
| 149 |
-
tools_used=list(set([step.get('tool', 'unknown') for step in execution_plan])),
|
| 150 |
-
completed_steps="\n".join(completed_steps[-5:]) if completed_steps else "No steps completed",
|
| 151 |
-
errors="\n".join(errors) if errors else "No errors encountered",
|
| 152 |
-
processing_metadata=json.dumps(processing_metadata, indent=2) if processing_metadata else "No additional metadata"
|
| 153 |
-
)
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
def analyze_processing_context(
|
| 157 |
-
processed_files: Dict[str, str],
|
| 158 |
-
scripts: Dict[str, Any],
|
| 159 |
-
processing_metadata: Dict[str, Any],
|
| 160 |
-
completed_steps: List[str]
|
| 161 |
-
) -> str:
|
| 162 |
-
"""Analyze processing context to inform final response."""
|
| 163 |
-
|
| 164 |
-
analysis = "**Processing Analysis:**\n"
|
| 165 |
-
|
| 166 |
-
# File analysis
|
| 167 |
-
analysis += f"- Files processed: {len(processed_files)}\n"
|
| 168 |
-
if processed_files:
|
| 169 |
-
for original, processed in processed_files.items():
|
| 170 |
-
original_name = original.split('/')[-1] if '/' in original else original
|
| 171 |
-
processed_name = processed.split('/')[-1] if '/' in processed else processed
|
| 172 |
-
analysis += f" • {original_name} → {processed_name}\n"
|
| 173 |
-
|
| 174 |
-
# Script analysis
|
| 175 |
-
if scripts:
|
| 176 |
-
total_transcript_length = sum(len(script.get("transcript", "")) for script in scripts.values())
|
| 177 |
-
total_filler_words = sum(len(script.get("filler_words", [])) for script in scripts.values())
|
| 178 |
-
analysis += f"- Total transcript length: {total_transcript_length} characters\n"
|
| 179 |
-
analysis += f"- Filler words detected: {total_filler_words}\n"
|
| 180 |
-
|
| 181 |
-
# Quality assessment
|
| 182 |
-
quality_score = processing_metadata.get("quality_score", 0)
|
| 183 |
-
if quality_score > 0:
|
| 184 |
-
analysis += f"- Estimated quality score: {quality_score:.1%}\n"
|
| 185 |
-
|
| 186 |
-
# Processing insights
|
| 187 |
-
final_analysis = processing_metadata.get("final_analysis", {})
|
| 188 |
-
if final_analysis:
|
| 189 |
-
analysis += f"- AI assessment: {final_analysis.get('success_assessment', 'N/A')}\n"
|
| 190 |
-
quality_improvements = final_analysis.get("quality_improvements", [])
|
| 191 |
-
if quality_improvements:
|
| 192 |
-
analysis += f"- Key improvements: {', '.join(quality_improvements[:3])}\n"
|
| 193 |
-
|
| 194 |
-
# Step analysis
|
| 195 |
-
successful_steps = len([step for step in completed_steps if step.startswith("✅")])
|
| 196 |
-
analysis += f"- Successful steps: {successful_steps}/{len(completed_steps)}\n"
|
| 197 |
-
|
| 198 |
-
return analysis
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
def create_fallback_final_response(
|
| 202 |
-
user_request: str,
|
| 203 |
-
processed_files: Dict[str, str],
|
| 204 |
-
errors: List[str]
|
| 205 |
-
) -> FinalResponse:
|
| 206 |
-
"""Create fallback response if LLM generation fails."""
|
| 207 |
-
|
| 208 |
-
if processed_files:
|
| 209 |
-
return FinalResponse(
|
| 210 |
-
response_title="Audio Processing Complete",
|
| 211 |
-
main_message=f"Successfully processed {len(processed_files)} audio file(s) according to your request.",
|
| 212 |
-
processed_files_summary=[f"{original.split('/')[-1]}: [Download]({processed})" for original, processed in processed_files.items()],
|
| 213 |
-
key_improvements=["Audio processing completed", "Files enhanced and optimized"],
|
| 214 |
-
quality_assessment="Processing completed successfully",
|
| 215 |
-
user_recommendations=["Download your processed files", "Review the results"],
|
| 216 |
-
next_steps="Your enhanced audio files are ready for download. Let me know if you need any adjustments!",
|
| 217 |
-
technical_summary=f"Applied audio processing workflow to {len(processed_files)} file(s)"
|
| 218 |
-
)
|
| 219 |
-
else:
|
| 220 |
-
return FinalResponse(
|
| 221 |
-
response_title="Processing Attempt Complete",
|
| 222 |
-
main_message="Audio processing encountered some challenges.",
|
| 223 |
-
processed_files_summary=[],
|
| 224 |
-
key_improvements=[],
|
| 225 |
-
quality_assessment="Processing was not successful",
|
| 226 |
-
user_recommendations=["Check your audio file URLs", "Try a simpler processing request"],
|
| 227 |
-
next_steps="Please check the errors above and try again with valid audio files.",
|
| 228 |
-
technical_summary=f"Processing attempted but encountered {len(errors)} error(s)"
|
| 229 |
-
)
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
def format_llm_response(llm_response: FinalResponse, processed_files: Dict[str, str]) -> str:
|
| 233 |
-
"""Format the LLM response into final markdown response."""
|
| 234 |
-
|
| 235 |
-
response = f"🎵 **{llm_response.response_title}**\n\n"
|
| 236 |
-
|
| 237 |
-
# Main message
|
| 238 |
-
response += f"{llm_response.main_message}\n\n"
|
| 239 |
-
|
| 240 |
-
# Processed files with actual download links
|
| 241 |
-
if processed_files:
|
| 242 |
-
response += "**🎵 Your Processed Audio Files:**\n"
|
| 243 |
-
for original, processed in processed_files.items():
|
| 244 |
-
filename = original.split('/')[-1] if '/' in original else original
|
| 245 |
-
response += f"- **{filename}**: [Download]({processed})\n"
|
| 246 |
-
response += "\n"
|
| 247 |
-
|
| 248 |
-
# Key improvements
|
| 249 |
-
if llm_response.key_improvements:
|
| 250 |
-
response += "**✨ Key Improvements:**\n"
|
| 251 |
-
for improvement in llm_response.key_improvements:
|
| 252 |
-
response += f"- {improvement}\n"
|
| 253 |
-
response += "\n"
|
| 254 |
-
|
| 255 |
-
# Quality assessment
|
| 256 |
-
response += f"**🎯 Quality Assessment:** {llm_response.quality_assessment}\n\n"
|
| 257 |
-
|
| 258 |
-
# Recommendations
|
| 259 |
-
if llm_response.user_recommendations:
|
| 260 |
-
response += "**💡 Recommendations:**\n"
|
| 261 |
-
for rec in llm_response.user_recommendations:
|
| 262 |
-
response += f"- {rec}\n"
|
| 263 |
-
response += "\n"
|
| 264 |
-
|
| 265 |
-
# Technical summary
|
| 266 |
-
if llm_response.technical_summary:
|
| 267 |
-
response += f"**🔧 Technical Summary:** {llm_response.technical_summary}\n\n"
|
| 268 |
-
|
| 269 |
-
# Next steps
|
| 270 |
-
response += f"**🚀 Next Steps:** {llm_response.next_steps}"
|
| 271 |
-
|
| 272 |
-
return response
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
async def create_chat_final_response(user_request: str, processing_metadata: Dict[str, Any]) -> str:
|
| 276 |
-
"""Create final response for chat interactions."""
|
| 277 |
-
|
| 278 |
-
# For chat, create a simple acknowledgment
|
| 279 |
-
return f"I've provided information about our audio processing capabilities. Is there anything specific you'd like to know more about or any audio files you'd like me to help process?"
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
def get_final_audio_url(processed_files: Dict[str, str], processing_type: str) -> str:
|
| 283 |
-
"""Get the final audio URL to return to the user."""
|
| 284 |
-
|
| 285 |
-
if not processed_files:
|
| 286 |
-
return ""
|
| 287 |
-
|
| 288 |
-
# For dialogue generation, look for combined file
|
| 289 |
-
if processing_type == "dialogue_generation":
|
| 290 |
-
for original, processed in processed_files.items():
|
| 291 |
-
if "combined" in processed or "dialogue" in processed:
|
| 292 |
-
return processed
|
| 293 |
-
|
| 294 |
-
# For single file processing, return the processed file
|
| 295 |
-
if len(processed_files) == 1:
|
| 296 |
-
return list(processed_files.values())[0]
|
| 297 |
-
|
| 298 |
-
# For multiple files, return the first one
|
| 299 |
-
return list(processed_files.values())[0] if processed_files else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/nodes/planner.py
CHANGED
|
@@ -1,311 +1,24 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Intelligent LLM-powered planner for creating optimal audio processing execution plans.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
from typing import Dict, Any, List
|
| 6 |
-
from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
|
| 7 |
from langchain_openai import ChatOpenAI
|
| 8 |
-
from langchain_core.
|
| 9 |
-
from
|
| 10 |
-
import
|
| 11 |
-
from
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
steps: List[ExecutionStep] = Field(description="Ordered list of execution steps")
|
| 29 |
-
strategy: str = Field(description="Overall strategy and approach")
|
| 30 |
-
expected_outcomes: List[str] = Field(description="What outcomes to expect from this plan")
|
| 31 |
-
estimated_duration: str = Field(description="Estimated time to complete")
|
| 32 |
-
risks_and_mitigations: List[str] = Field(description="Potential issues and how to handle them")
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
async def planner_node(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 36 |
-
"""
|
| 37 |
-
Create intelligent execution plan using LLM analysis.
|
| 38 |
-
|
| 39 |
-
The LLM analyzes:
|
| 40 |
-
- User request and intent
|
| 41 |
-
- Available audio files and their characteristics
|
| 42 |
-
- Transcript insights and quality
|
| 43 |
-
- Available tools and capabilities
|
| 44 |
-
- Processing type and requirements
|
| 45 |
-
"""
|
| 46 |
-
|
| 47 |
-
user_request = state.get("user_request", "")
|
| 48 |
-
audio_files = state.get("audio_files", [])
|
| 49 |
-
scripts = state.get("scripts", {})
|
| 50 |
-
processing_type = state.get("processing_type", "")
|
| 51 |
-
processing_metadata = state.get("processing_metadata", {})
|
| 52 |
-
|
| 53 |
-
if not audio_files:
|
| 54 |
-
return create_no_files_plan_response(state)
|
| 55 |
-
|
| 56 |
-
# Use LLM to create intelligent execution plan
|
| 57 |
-
execution_plan = await create_execution_plan_with_llm(
|
| 58 |
-
user_request, audio_files, scripts, processing_type, processing_metadata
|
| 59 |
-
)
|
| 60 |
-
|
| 61 |
-
# Convert to the format expected by audio processor
|
| 62 |
-
formatted_plan = convert_plan_to_execution_format(execution_plan)
|
| 63 |
-
|
| 64 |
-
# Create plan summary message
|
| 65 |
-
plan_summary = create_plan_summary_message(execution_plan, formatted_plan)
|
| 66 |
-
messages = state.get("messages", [])
|
| 67 |
-
messages.append(AIMessage(content=plan_summary))
|
| 68 |
-
|
| 69 |
-
return {
|
| 70 |
-
"execution_plan": formatted_plan,
|
| 71 |
-
"messages": messages,
|
| 72 |
-
"processing_metadata": {
|
| 73 |
-
**processing_metadata,
|
| 74 |
-
"execution_strategy": execution_plan.strategy,
|
| 75 |
-
"expected_outcomes": execution_plan.expected_outcomes,
|
| 76 |
-
"plan_metadata": execution_plan.dict()
|
| 77 |
-
}
|
| 78 |
-
}
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
async def create_execution_plan_with_llm(
|
| 82 |
-
user_request: str,
|
| 83 |
-
audio_files: List[str],
|
| 84 |
-
scripts: Dict[str, Any],
|
| 85 |
-
processing_type: str,
|
| 86 |
-
processing_metadata: Dict[str, Any]
|
| 87 |
-
) -> ExecutionPlan:
|
| 88 |
-
"""Use LLM to create intelligent execution plan."""
|
| 89 |
-
|
| 90 |
-
system_message = create_planning_system_message()
|
| 91 |
-
user_message_content = create_planning_user_message(
|
| 92 |
-
user_request, audio_files, scripts, processing_type, processing_metadata
|
| 93 |
-
)
|
| 94 |
-
|
| 95 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
|
| 96 |
-
parser = PydanticOutputParser(pydantic_object=ExecutionPlan)
|
| 97 |
-
|
| 98 |
-
prompt_messages = [
|
| 99 |
-
system_message,
|
| 100 |
-
HumanMessage(content=user_message_content),
|
| 101 |
-
HumanMessage(content=parser.get_format_instructions())
|
| 102 |
-
]
|
| 103 |
-
|
| 104 |
-
try:
|
| 105 |
-
response = await llm.ainvoke(prompt_messages)
|
| 106 |
-
plan = parser.parse(response.content)
|
| 107 |
-
return plan
|
| 108 |
-
except Exception as e:
|
| 109 |
-
# Fallback to simple plan
|
| 110 |
-
return create_fallback_execution_plan(user_request, audio_files, processing_type)
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
def create_planning_system_message() -> SystemMessage:
|
| 114 |
-
"""Create system message for execution planning."""
|
| 115 |
-
return SystemMessage(content=PLANNER_SYSTEM_PROMPT)
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
def create_planning_user_message(
|
| 119 |
-
user_request: str,
|
| 120 |
-
audio_files: List[str],
|
| 121 |
-
scripts: Dict[str, Any],
|
| 122 |
-
processing_type: str,
|
| 123 |
-
processing_metadata: Dict[str, Any]
|
| 124 |
-
) -> str:
|
| 125 |
-
"""Create user message for execution planning."""
|
| 126 |
-
|
| 127 |
-
# Analyze transcript data
|
| 128 |
-
transcript_summary = analyze_transcript_data(scripts)
|
| 129 |
-
file_list = "\n".join([f"- {file.split('/')[-1]}" for file in audio_files])
|
| 130 |
-
processing_context = json.dumps(processing_metadata, indent=2) if processing_metadata else "No additional context"
|
| 131 |
-
|
| 132 |
-
return PLANNER_USER_PROMPT_TEMPLATE.format(
|
| 133 |
-
user_request=user_request,
|
| 134 |
-
processing_type=processing_type,
|
| 135 |
-
file_count=len(audio_files),
|
| 136 |
-
file_list=file_list,
|
| 137 |
-
transcript_summary=transcript_summary,
|
| 138 |
-
processing_context=processing_context,
|
| 139 |
-
)
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
def analyze_transcript_data(scripts: Dict[str, Any]) -> str:
|
| 143 |
-
"""Analyze transcript data to inform planning decisions."""
|
| 144 |
-
|
| 145 |
-
if not scripts:
|
| 146 |
-
return "No transcript data available"
|
| 147 |
-
|
| 148 |
-
summary = ""
|
| 149 |
-
total_filler_words = 0
|
| 150 |
-
quality_scores = []
|
| 151 |
-
insights = []
|
| 152 |
-
|
| 153 |
-
for file_url, script_data in scripts.items():
|
| 154 |
-
filename = file_url.split('/')[-1] if '/' in file_url else file_url
|
| 155 |
-
transcript = script_data.get("transcript", "")
|
| 156 |
-
filler_words = script_data.get("filler_words", [])
|
| 157 |
-
quality_score = script_data.get("quality_score", 0)
|
| 158 |
-
file_insights = script_data.get("insights", [])
|
| 159 |
-
|
| 160 |
-
total_filler_words += len(filler_words)
|
| 161 |
-
if quality_score > 0:
|
| 162 |
-
quality_scores.append(quality_score)
|
| 163 |
-
insights.extend(file_insights)
|
| 164 |
-
|
| 165 |
-
summary += f"\n- **{filename}**: {len(transcript)} chars, {len(filler_words)} fillers"
|
| 166 |
-
if quality_score > 0:
|
| 167 |
-
summary += f", {quality_score:.1%} quality"
|
| 168 |
-
|
| 169 |
-
# Overall analysis
|
| 170 |
-
avg_quality = sum(quality_scores) / len(quality_scores) if quality_scores else 0
|
| 171 |
-
summary += f"\n\n**Overall Analysis:**"
|
| 172 |
-
summary += f"\n- Total filler words across all files: {total_filler_words}"
|
| 173 |
-
summary += f"\n- Average transcript quality: {avg_quality:.1%}" if avg_quality > 0 else ""
|
| 174 |
-
|
| 175 |
-
if insights:
|
| 176 |
-
summary += f"\n- Key insights: {', '.join(insights[:3])}"
|
| 177 |
-
|
| 178 |
-
# Planning recommendations
|
| 179 |
-
if total_filler_words > 10:
|
| 180 |
-
summary += f"\n- **Recommendation**: High filler word count suggests need for silence trimming and cutting"
|
| 181 |
-
if avg_quality < 0.7:
|
| 182 |
-
summary += f"\n- **Recommendation**: Lower quality transcript suggests audio may need normalization"
|
| 183 |
-
|
| 184 |
-
return summary
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
def convert_plan_to_execution_format(execution_plan: ExecutionPlan) -> List[Dict[str, Any]]:
|
| 188 |
-
"""Convert LLM execution plan to format expected by audio processor."""
|
| 189 |
-
|
| 190 |
-
formatted_steps = []
|
| 191 |
-
|
| 192 |
-
for step in execution_plan.steps:
|
| 193 |
-
formatted_step = {
|
| 194 |
-
"step": step.step_id,
|
| 195 |
-
"tool": step.tool_name,
|
| 196 |
-
"params": step.parameters,
|
| 197 |
-
"description": step.description,
|
| 198 |
-
"reasoning": step.reasoning,
|
| 199 |
-
"priority": step.priority
|
| 200 |
-
}
|
| 201 |
-
formatted_steps.append(formatted_step)
|
| 202 |
-
|
| 203 |
-
return formatted_steps
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
def create_fallback_execution_plan(
|
| 207 |
-
user_request: str,
|
| 208 |
-
audio_files: List[str],
|
| 209 |
-
processing_type: str
|
| 210 |
-
) -> ExecutionPlan:
|
| 211 |
-
"""Create fallback execution plan if LLM planning fails."""
|
| 212 |
-
|
| 213 |
-
steps = []
|
| 214 |
-
|
| 215 |
-
for i, audio_file in enumerate(audio_files):
|
| 216 |
-
# Basic processing steps
|
| 217 |
-
steps.extend([
|
| 218 |
-
ExecutionStep(
|
| 219 |
-
step_id=f"update_info_{i}",
|
| 220 |
-
tool_name="update_audio_info",
|
| 221 |
-
parameters={"audio_file": audio_file},
|
| 222 |
-
description=f"Update audio information",
|
| 223 |
-
reasoning="Essential for proper file handling",
|
| 224 |
-
priority="high"
|
| 225 |
-
),
|
| 226 |
-
ExecutionStep(
|
| 227 |
-
step_id=f"trim_silence_{i}",
|
| 228 |
-
tool_name="apply_silence_trimming",
|
| 229 |
-
parameters={"audio_file": audio_file, "threshold_db": -40},
|
| 230 |
-
description="Remove silence and quiet sections",
|
| 231 |
-
reasoning="Improves audio quality and reduces file size",
|
| 232 |
-
priority="medium"
|
| 233 |
-
),
|
| 234 |
-
ExecutionStep(
|
| 235 |
-
step_id=f"normalize_{i}",
|
| 236 |
-
tool_name="apply_normalization",
|
| 237 |
-
parameters={"audio_file": audio_file, "target_level": -3},
|
| 238 |
-
description="Normalize audio levels",
|
| 239 |
-
reasoning="Ensures consistent volume levels",
|
| 240 |
-
priority="medium"
|
| 241 |
-
)
|
| 242 |
-
])
|
| 243 |
-
|
| 244 |
-
return ExecutionPlan(
|
| 245 |
-
steps=steps,
|
| 246 |
-
strategy="Fallback plan: basic audio enhancement with silence removal and normalization",
|
| 247 |
-
expected_outcomes=["Cleaner audio", "Consistent levels", "Reduced file size"],
|
| 248 |
-
estimated_duration="2-5 minutes",
|
| 249 |
-
risks_and_mitigations=["Minimal risk with basic processing steps"]
|
| 250 |
)
|
| 251 |
|
| 252 |
-
|
| 253 |
-
def create_plan_summary_message(execution_plan: ExecutionPlan, formatted_plan: List[Dict[str, Any]]) -> str:
|
| 254 |
-
"""Create comprehensive plan summary message."""
|
| 255 |
-
|
| 256 |
-
summary = "🎯 **Intelligent Execution Plan Created**\n\n"
|
| 257 |
-
|
| 258 |
-
# Strategy
|
| 259 |
-
summary += f"**📋 Strategy:** {execution_plan.strategy}\n\n"
|
| 260 |
-
|
| 261 |
-
# Plan overview
|
| 262 |
-
summary += f"**📊 Plan Overview:**\n"
|
| 263 |
-
summary += f"- Total steps: {len(execution_plan.steps)}\n"
|
| 264 |
-
summary += f"- Estimated duration: {execution_plan.estimated_duration}\n"
|
| 265 |
-
|
| 266 |
-
# Priority breakdown
|
| 267 |
-
high_priority = len([s for s in execution_plan.steps if s.priority == "high"])
|
| 268 |
-
medium_priority = len([s for s in execution_plan.steps if s.priority == "medium"])
|
| 269 |
-
low_priority = len([s for s in execution_plan.steps if s.priority == "low"])
|
| 270 |
-
|
| 271 |
-
summary += f"- Priority breakdown: {high_priority} high, {medium_priority} medium, {low_priority} low\n\n"
|
| 272 |
-
|
| 273 |
-
# Key steps
|
| 274 |
-
summary += "**🔧 Key Processing Steps:**\n"
|
| 275 |
-
for i, step in enumerate(execution_plan.steps[:5], 1): # Show first 5 steps
|
| 276 |
-
summary += f"{i}. **{step.tool_name}**: {step.description}\n"
|
| 277 |
-
|
| 278 |
-
if len(execution_plan.steps) > 5:
|
| 279 |
-
summary += f"... and {len(execution_plan.steps) - 5} more steps\n"
|
| 280 |
-
|
| 281 |
-
summary += "\n"
|
| 282 |
-
|
| 283 |
-
# Expected outcomes
|
| 284 |
-
if execution_plan.expected_outcomes:
|
| 285 |
-
summary += "**🎯 Expected Outcomes:**\n"
|
| 286 |
-
for outcome in execution_plan.expected_outcomes[:3]:
|
| 287 |
-
summary += f"- {outcome}\n"
|
| 288 |
-
summary += "\n"
|
| 289 |
-
|
| 290 |
-
# Risks and mitigations
|
| 291 |
-
if execution_plan.risks_and_mitigations:
|
| 292 |
-
summary += "**⚠️ Risk Management:**\n"
|
| 293 |
-
for risk in execution_plan.risks_and_mitigations[:2]:
|
| 294 |
-
summary += f"- {risk}\n"
|
| 295 |
-
summary += "\n"
|
| 296 |
-
|
| 297 |
-
summary += "✅ **Ready to execute intelligent plan...**"
|
| 298 |
-
return summary
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
def create_no_files_plan_response(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 302 |
-
"""Handle case when no audio files are available for planning."""
|
| 303 |
-
|
| 304 |
-
messages = state.get("messages", [])
|
| 305 |
-
messages.append(AIMessage(content="❌ **No Planning Possible**: No audio files available to process."))
|
| 306 |
-
|
| 307 |
-
return {
|
| 308 |
-
"execution_plan": [],
|
| 309 |
-
"messages": messages,
|
| 310 |
-
"errors": ["No audio files available for execution planning"]
|
| 311 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from langchain_openai import ChatOpenAI
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 3 |
+
from langchain_core.runnables import RunnableParallel
|
| 4 |
+
from src.state import AgentState
|
| 5 |
+
from operator import itemgetter
|
| 6 |
+
|
| 7 |
+
def planner_node(state: AgentState) -> AgentState:
|
| 8 |
+
llm = ChatOpenAI(model="gpt-4.1")
|
| 9 |
+
llm = llm.with_structured_output(AgentState)
|
| 10 |
+
|
| 11 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 12 |
+
("system", "You are planner that finds what user wants to do and how can we achieve it. Generate a comprehensive plan for the user."),
|
| 13 |
+
("user", "{state}")
|
| 14 |
+
])
|
| 15 |
+
|
| 16 |
+
chain = (
|
| 17 |
+
RunnableParallel({
|
| 18 |
+
"state": itemgetter("state")
|
| 19 |
+
})
|
| 20 |
+
| prompt
|
| 21 |
+
| llm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
+
return chain.invoke({"state": state})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/nodes/processor.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_openai import ChatOpenAI
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 3 |
+
from src.state import AgentState
|
| 4 |
+
from operator import itemgetter
|
| 5 |
+
from langchain_core.runnables import RunnableParallel
|
| 6 |
+
|
| 7 |
+
def processor_node(state: AgentState) -> AgentState:
|
| 8 |
+
llm = ChatOpenAI(model="gpt-4.1")
|
| 9 |
+
llm = llm.with_structured_output(AgentState)
|
| 10 |
+
|
| 11 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 12 |
+
("system", "You are processor that processes the plan and generates a final response to the user."),
|
| 13 |
+
("user", "Current state: {state}")
|
| 14 |
+
])
|
| 15 |
+
|
| 16 |
+
chain = (
|
| 17 |
+
RunnableParallel({
|
| 18 |
+
"state": itemgetter("state")
|
| 19 |
+
})
|
| 20 |
+
| prompt
|
| 21 |
+
| llm
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
return chain.invoke({"state": state})
|
src/nodes/prompts.py
DELETED
|
@@ -1,440 +0,0 @@
|
|
| 1 |
-
PLANNER_SYSTEM_PROMPT = """You are an expert audio processing strategist and execution planner. Your job is to create optimal, step-by-step execution plans for audio processing tasks.
|
| 2 |
-
|
| 3 |
-
**Available Audio Processing Tools:**
|
| 4 |
-
|
| 5 |
-
1. **Information & Metadata Tools:**
|
| 6 |
-
- update_audio_info: Updates general audio file information
|
| 7 |
-
- update_duration_info: Updates audio duration and timing information
|
| 8 |
-
- update_transcription_info: Updates transcription-related metadata
|
| 9 |
-
|
| 10 |
-
2. **Core Processing Tools:**
|
| 11 |
-
- process_cut_audio: Cuts/trims audio to specific time ranges (params: audio_file, _start_time, _end_time)
|
| 12 |
-
- apply_normalization: Normalizes audio levels (params: audio_file, target_level)
|
| 13 |
-
- apply_volume_adjustment: Adjusts volume by gain amount (params: audio_file, gain_db)
|
| 14 |
-
- apply_speed_adjustment: Changes playback speed (params: audio_file, speed_factor)
|
| 15 |
-
- apply_fades: Adds fade in/out effects (params: audio_file, fade_in_ms, fade_out_ms)
|
| 16 |
-
- apply_reverse: Reverses audio playback (params: audio_file)
|
| 17 |
-
- apply_silence_trimming: Removes silence/quiet sections (params: audio_file, threshold_db)
|
| 18 |
-
|
| 19 |
-
**Planning Principles:**
|
| 20 |
-
|
| 21 |
-
1. **Context-Aware**: Consider the user's specific goals, not just keywords
|
| 22 |
-
2. **Quality-First**: Prioritize steps that will most improve the final result
|
| 23 |
-
3. **Efficient**: Order steps logically to minimize processing time and quality loss
|
| 24 |
-
4. **Robust**: Include metadata updates and error-handling steps
|
| 25 |
-
5. **Adaptive**: Tailor approach based on transcript insights and file characteristics
|
| 26 |
-
|
| 27 |
-
**Step Ordering Best Practices:**
|
| 28 |
-
- Start with metadata updates (audio_info, duration_info)
|
| 29 |
-
- Apply destructive edits first (cutting, trimming)
|
| 30 |
-
- Then apply enhancement (normalization, volume, speed)
|
| 31 |
-
- Finish with aesthetic touches (fades, effects)
|
| 32 |
-
|
| 33 |
-
**User Intent Analysis:**
|
| 34 |
-
- "Clean up" / "improve" = silence trimming + normalization + possible filler removal
|
| 35 |
-
- "Remove filler words" = intelligent cutting based on transcript analysis
|
| 36 |
-
- "Cut" / "trim" = precise time-based cutting
|
| 37 |
-
- "Louder" / "quieter" = volume adjustment
|
| 38 |
-
- "Faster" / "slower" = speed adjustment
|
| 39 |
-
- "Professional" = normalization + fades + silence trimming
|
| 40 |
-
|
| 41 |
-
Be intelligent about combining the user's explicit requests with transcript insights to create a comprehensive plan that achieves their goals."""
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
PLANNER_USER_PROMPT_TEMPLATE = """
|
| 45 |
-
**Planning Request for Audio Processing**
|
| 46 |
-
|
| 47 |
-
**User's Original Request:**
|
| 48 |
-
{user_request}
|
| 49 |
-
|
| 50 |
-
**Processing Type:** {processing_type}
|
| 51 |
-
|
| 52 |
-
**Audio Files to Process:** {file_count} files
|
| 53 |
-
{file_list}
|
| 54 |
-
|
| 55 |
-
**Transcript Analysis:**
|
| 56 |
-
{transcript_summary}
|
| 57 |
-
|
| 58 |
-
**Processing Context:**
|
| 59 |
-
{processing_context}
|
| 60 |
-
|
| 61 |
-
**Planning Requirements:**
|
| 62 |
-
|
| 63 |
-
1. **Analyze the user's true intent** - what do they actually want to achieve?
|
| 64 |
-
2. **Consider transcript insights** - filler words, quality issues, content characteristics
|
| 65 |
-
3. **Create step-by-step execution plan** - specific tools with exact parameters
|
| 66 |
-
4. **Optimize for quality and efficiency** - best order for operations
|
| 67 |
-
5. **Include appropriate metadata steps** - ensure proper file handling
|
| 68 |
-
6. **Plan for potential issues** - what could go wrong and how to handle it
|
| 69 |
-
|
| 70 |
-
**Key Questions to Address:**
|
| 71 |
-
- What's the primary goal of this processing?
|
| 72 |
-
- Which transcript insights should influence the plan?
|
| 73 |
-
- What's the optimal order of operations?
|
| 74 |
-
- What parameters will achieve the best results?
|
| 75 |
-
- How can we ensure high-quality output?
|
| 76 |
-
|
| 77 |
-
Create a comprehensive execution plan that intelligently combines the user's requests with the insights from the transcript analysis.
|
| 78 |
-
"""
|
| 79 |
-
|
| 80 |
-
# Prompts for script_generator.py
|
| 81 |
-
|
| 82 |
-
SCRIPT_GENERATOR_SYSTEM_PROMPT = """You are an expert audio transcription strategist. Your job is to create optimal plans for transcribing audio files based on user needs and available tools.
|
| 83 |
-
|
| 84 |
-
**Available Tool Types:**
|
| 85 |
-
- transcribe_audio_sync: Main transcription tool for converting audio to text
|
| 86 |
-
- update_transcription_info: Updates transcription metadata and info
|
| 87 |
-
|
| 88 |
-
**Planning Considerations:**
|
| 89 |
-
- Order files by complexity/priority
|
| 90 |
-
- Choose appropriate tools based on file characteristics
|
| 91 |
-
- Consider user's specific goals (filler removal, cutting, quality improvement)
|
| 92 |
-
- Anticipate potential challenges (multiple speakers, background noise, etc.)
|
| 93 |
-
- Plan analysis goals that align with user intent
|
| 94 |
-
|
| 95 |
-
**Your planning should be:**
|
| 96 |
-
- Strategic: Consider the best order and approach
|
| 97 |
-
- Practical: Use available tools effectively
|
| 98 |
-
- Goal-oriented: Focus on what the user actually needs
|
| 99 |
-
- Robust: Anticipate and prepare for common issues
|
| 100 |
-
|
| 101 |
-
Be intelligent about the user's intent - if they want to remove filler words, prioritize filler detection. If they want to cut audio, focus on timestamp accuracy."""
|
| 102 |
-
|
| 103 |
-
SCRIPT_GENERATOR_USER_PROMPT_TEMPLATE = """
|
| 104 |
-
**Audio Files to Process:** {file_count} files
|
| 105 |
-
{file_list}
|
| 106 |
-
|
| 107 |
-
**User's Request:** {user_request}
|
| 108 |
-
|
| 109 |
-
**Available Tools:** {available_tools}
|
| 110 |
-
|
| 111 |
-
Create an optimal transcription plan that:
|
| 112 |
-
1. Determines the best order to process these files
|
| 113 |
-
2. Selects appropriate tools for the task
|
| 114 |
-
3. Defines analysis goals that align with the user's needs
|
| 115 |
-
4. Anticipates potential challenges
|
| 116 |
-
5. Provides clear reasoning for the approach
|
| 117 |
-
|
| 118 |
-
Consider the user's intent and optimize for their specific goals.
|
| 119 |
-
"""
|
| 120 |
-
|
| 121 |
-
ANALYZE_TRANSCRIPT_PROMPT_TEMPLATE = """
|
| 122 |
-
Analyze this audio transcript and provide structured insights:
|
| 123 |
-
|
| 124 |
-
**Audio File:** {audio_file}
|
| 125 |
-
**Transcript:** {transcript_content}
|
| 126 |
-
|
| 127 |
-
Please provide analysis in JSON format with these fields:
|
| 128 |
-
- "timestamps": Array of objects with start/end times and text segments (estimate based on content)
|
| 129 |
-
- "filler_words": Array of detected filler words with positions and context
|
| 130 |
-
- "quality_score": Float 0-1 indicating transcript quality
|
| 131 |
-
- "insights": Array of key insights about the content
|
| 132 |
-
- "speaker_analysis": Information about speakers if detectable
|
| 133 |
-
- "content_summary": Brief summary of what the audio contains
|
| 134 |
-
|
| 135 |
-
Focus on practical insights that would help with audio processing decisions.
|
| 136 |
-
"""
|
| 137 |
-
|
| 138 |
-
ANALYZE_TRANSCRIPTION_RESULTS_SYSTEM_PROMPT = """You are an expert audio transcription analyst. Analyze the transcription results and provide insights about success, quality, and recommendations for next steps."""
|
| 139 |
-
|
| 140 |
-
ANALYZE_TRANSCRIPTION_RESULTS_USER_PROMPT_TEMPLATE = """
|
| 141 |
-
**User's Original Request:** {user_request}
|
| 142 |
-
|
| 143 |
-
**Analysis Goals:** {analysis_goals}
|
| 144 |
-
|
| 145 |
-
**Transcription Results:**
|
| 146 |
-
- Successfully transcribed: {success_count} files
|
| 147 |
-
- Failed transcriptions: {failure_count} files
|
| 148 |
-
- Errors: {errors}
|
| 149 |
-
|
| 150 |
-
**Script Details:**
|
| 151 |
-
{script_details}
|
| 152 |
-
|
| 153 |
-
Provide analysis of the transcription quality, success rate, and specific recommendations for audio processing based on these results.
|
| 154 |
-
"""
|
| 155 |
-
|
| 156 |
-
# Prompts for chat.py
|
| 157 |
-
|
| 158 |
-
CHAT_SYSTEM_PROMPT_BASE = """You are an expert Audio Processing Assistant powered by advanced audio tools.
|
| 159 |
-
|
| 160 |
-
Your role is to:
|
| 161 |
-
1. Answer questions about audio processing capabilities
|
| 162 |
-
2. Provide guidance on how to use audio tools
|
| 163 |
-
3. Demonstrate tool usage only when explicitly requested
|
| 164 |
-
4. Explain audio concepts and best practices
|
| 165 |
-
5. Help users understand what's possible with audio processing
|
| 166 |
-
|
| 167 |
-
You have a conversational, helpful, and knowledgeable personality. You can discuss both technical and practical aspects of audio processing.
|
| 168 |
-
|
| 169 |
-
IMPORTANT: Only call tools when the user explicitly asks for a demonstration. For general questions about capabilities, explain the tools without calling them."""
|
| 170 |
-
|
| 171 |
-
CHAT_SYSTEM_PROMPT_TOOLS_HEADER = "\n\n**Available Audio Tools:**\n"
|
| 172 |
-
|
| 173 |
-
CHAT_SYSTEM_PROMPT_GUIDELINES = """
|
| 174 |
-
**Guidelines:**
|
| 175 |
-
- Provide clear, helpful explanations about audio processing
|
| 176 |
-
- Only demonstrate tools when explicitly asked to do so
|
| 177 |
-
- Explain tool capabilities without necessarily calling them
|
| 178 |
-
- Be encouraging about what's possible with audio processing
|
| 179 |
-
- Keep responses informative but concise
|
| 180 |
-
- Use emojis to make responses engaging (🎵 🔧 📊 ✨)
|
| 181 |
-
|
| 182 |
-
**For audio file processing requests:**
|
| 183 |
-
Suggest using the full audio processing workflow by providing audio file URLs and describing the desired outcome.
|
| 184 |
-
"""
|
| 185 |
-
|
| 186 |
-
# Prompts for validator.py
|
| 187 |
-
|
| 188 |
-
VALIDATOR_SYSTEM_PROMPT = """You are an expert audio processing validator. Your job is to assess the results of audio processing workflows and provide intelligent feedback.
|
| 189 |
-
|
| 190 |
-
Your role:
|
| 191 |
-
1. Analyze processing results against the user's original request
|
| 192 |
-
2. Evaluate the quality and completeness of the work done
|
| 193 |
-
3. Identify critical issues, warnings, and successes
|
| 194 |
-
4. Provide actionable recommendations
|
| 195 |
-
5. Determine if reprocessing would be beneficial
|
| 196 |
-
|
| 197 |
-
Assessment criteria:
|
| 198 |
-
- Did the processing achieve the user's goals?
|
| 199 |
-
- Are there any critical failures that prevent success?
|
| 200 |
-
- What is the overall quality of the results?
|
| 201 |
-
- Are there minor issues that could be improved?
|
| 202 |
-
- Would reprocessing with different parameters help?
|
| 203 |
-
|
| 204 |
-
Consider the context:
|
| 205 |
-
- User's original request and intent
|
| 206 |
-
- What processing steps were planned vs. completed
|
| 207 |
-
- Any errors or issues encountered
|
| 208 |
-
- The quality of transcripts and processing metadata
|
| 209 |
-
- Whether processed files were successfully generated
|
| 210 |
-
|
| 211 |
-
Be thorough but practical in your assessment. Focus on actionable insights that would help improve the audio processing results."""
|
| 212 |
-
|
| 213 |
-
VALIDATOR_USER_PROMPT_TEMPLATE = """
|
| 214 |
-
## Processing Assessment Request
|
| 215 |
-
|
| 216 |
-
**User's Original Request:**
|
| 217 |
-
{user_request}
|
| 218 |
-
|
| 219 |
-
**Execution Plan ({plan_steps} steps planned):**
|
| 220 |
-
{execution_plan}
|
| 221 |
-
|
| 222 |
-
**Completed Steps ({completed_steps_count}):**
|
| 223 |
-
{completed_steps}
|
| 224 |
-
|
| 225 |
-
**Processing Results:**
|
| 226 |
-
{processing_results}
|
| 227 |
-
|
| 228 |
-
**Transcript Analysis:**
|
| 229 |
-
{transcript_analysis}
|
| 230 |
-
|
| 231 |
-
**Errors Encountered ({error_count}):**
|
| 232 |
-
{errors}
|
| 233 |
-
{reprocessing_note}
|
| 234 |
-
## Assessment Task
|
| 235 |
-
|
| 236 |
-
Please analyze this processing workflow and provide a comprehensive validation assessment. Consider:
|
| 237 |
-
|
| 238 |
-
1. How well did the processing achieve the user's goals?
|
| 239 |
-
2. What is the overall quality and success rate?
|
| 240 |
-
3. Are there critical issues that prevent success?
|
| 241 |
-
4. What warnings or minor issues should be noted?
|
| 242 |
-
5. What specific recommendations would improve results?
|
| 243 |
-
6. Would reprocessing with adjustments be beneficial?
|
| 244 |
-
|
| 245 |
-
Provide honest, actionable feedback that would help improve the audio processing results.
|
| 246 |
-
"""
|
| 247 |
-
|
| 248 |
-
# Prompts for final_response.py
|
| 249 |
-
|
| 250 |
-
FINAL_RESPONSE_SYSTEM_PROMPT = """You are an expert audio processing communication specialist. Your job is to create engaging, informative, and personalized final responses for users who have completed audio processing workflows.
|
| 251 |
-
|
| 252 |
-
**Your Role:**
|
| 253 |
-
- Craft compelling, user-friendly summaries of what was accomplished
|
| 254 |
-
- Highlight key improvements and value delivered
|
| 255 |
-
- Provide clear information about processed files and how to access them
|
| 256 |
-
- Offer personalized recommendations based on the specific processing
|
| 257 |
-
- Suggest appropriate next steps
|
| 258 |
-
- Maintain an encouraging and professional tone
|
| 259 |
-
|
| 260 |
-
**Response Principles:**
|
| 261 |
-
1. **User-Centric**: Focus on what the user gained and achieved
|
| 262 |
-
2. **Clear and Actionable**: Provide specific, actionable information
|
| 263 |
-
3. **Celebratory**: Acknowledge accomplishments and improvements
|
| 264 |
-
4. **Helpful**: Offer valuable insights and next steps
|
| 265 |
-
5. **Professional**: Maintain expertise while being approachable
|
| 266 |
-
6. **Specific**: Reference actual results and improvements made
|
| 267 |
-
|
| 268 |
-
**Key Elements to Include:**
|
| 269 |
-
- Engaging title that captures what was accomplished
|
| 270 |
-
- Clear summary of processing results
|
| 271 |
-
- Specific improvements and enhancements made
|
| 272 |
-
- Quality assessment and success metrics
|
| 273 |
-
- Download information for processed files
|
| 274 |
-
- Personalized recommendations based on the processing
|
| 275 |
-
- Encouraging next steps or call to action
|
| 276 |
-
|
| 277 |
-
**Tone Guidelines:**
|
| 278 |
-
- Professional but friendly
|
| 279 |
-
- Confident in the results achieved
|
| 280 |
-
- Encouraging about next steps
|
| 281 |
-
- Specific about technical improvements
|
| 282 |
-
- Celebratory of success, honest about limitations
|
| 283 |
-
|
| 284 |
-
Be specific about the actual processing done and results achieved. Reference real file names, improvements made, and quality metrics when available."""
|
| 285 |
-
|
| 286 |
-
FINAL_RESPONSE_USER_PROMPT_TEMPLATE = """
|
| 287 |
-
**Create Final Response for Audio Processing Workflow**
|
| 288 |
-
|
| 289 |
-
**User's Original Request:**
|
| 290 |
-
{user_request}
|
| 291 |
-
|
| 292 |
-
**Processing Type:** {processing_type}
|
| 293 |
-
|
| 294 |
-
**Processing Results:**
|
| 295 |
-
- Successfully processed: {processed_files_count} files
|
| 296 |
-
- Completed steps: {completed_steps_count}
|
| 297 |
-
- Errors encountered: {error_count}
|
| 298 |
-
|
| 299 |
-
**Processed Files:**
|
| 300 |
-
{processed_files}
|
| 301 |
-
|
| 302 |
-
**Processing Context Analysis:**
|
| 303 |
-
{processing_summary}
|
| 304 |
-
|
| 305 |
-
**Execution Plan Summary:**
|
| 306 |
-
{plan_steps} steps planned
|
| 307 |
-
Key tools used: {tools_used}
|
| 308 |
-
|
| 309 |
-
**Completed Steps (last 5):**
|
| 310 |
-
{completed_steps}
|
| 311 |
-
|
| 312 |
-
**Errors (if any):**
|
| 313 |
-
{errors}
|
| 314 |
-
|
| 315 |
-
**Processing Metadata:**
|
| 316 |
-
{processing_metadata}
|
| 317 |
-
|
| 318 |
-
**Task:**
|
| 319 |
-
Create a comprehensive, engaging final response that:
|
| 320 |
-
1. Celebrates what was accomplished
|
| 321 |
-
2. Clearly explains the results and improvements
|
| 322 |
-
3. Provides specific download information for processed files
|
| 323 |
-
4. Offers personalized recommendations based on this specific processing
|
| 324 |
-
5. Suggests appropriate next steps
|
| 325 |
-
6. Maintains an encouraging and professional tone
|
| 326 |
-
|
| 327 |
-
Focus on the value delivered to the user and make it clear how to access and use their processed audio files.
|
| 328 |
-
"""
|
| 329 |
-
|
| 330 |
-
# Prompts for router.py
|
| 331 |
-
|
| 332 |
-
ROUTER_SYSTEM_PROMPT = """You are an intelligent routing agent for an audio processing system. Your job is to analyze user requests and determine the best processing path.
|
| 333 |
-
|
| 334 |
-
**Processing Types Available:**
|
| 335 |
-
|
| 336 |
-
1. **chat** - For general questions, help requests, or when no audio processing is needed
|
| 337 |
-
- User asking about capabilities, features, or how to use the system
|
| 338 |
-
- General conversation or questions
|
| 339 |
-
- No audio files present, or user just wants information
|
| 340 |
-
|
| 341 |
-
2. **audio_processing** - For single or multiple audio file processing tasks
|
| 342 |
-
- Removing filler words, cutting audio, improving quality
|
| 343 |
-
- Normalizing volume, adjusting speed, adding effects
|
| 344 |
-
- Transcription and analysis tasks
|
| 345 |
-
- Any audio enhancement or modification
|
| 346 |
-
|
| 347 |
-
3. **dialogue_generation** - For combining multiple audio files into conversations
|
| 348 |
-
- Creating interviews, podcasts, or conversations from separate files
|
| 349 |
-
- Merging voices or speakers into dialogue format
|
| 350 |
-
- Building composite audio experiences
|
| 351 |
-
|
| 352 |
-
**Audio File Detection:**
|
| 353 |
-
Extract any audio file URLs or paths from the user's message. Look for:
|
| 354 |
-
- HTTP/HTTPS URLs ending in .mp3, .wav, .m4a, .flac, .aac, .ogg
|
| 355 |
-
- Local file paths with audio extensions
|
| 356 |
-
- References to audio files even if not explicitly formatted as URLs
|
| 357 |
-
|
| 358 |
-
**Priority Assessment:**
|
| 359 |
-
- **high**: Urgent processing needs, multiple complex steps, time-sensitive
|
| 360 |
-
- **medium**: Standard processing requests, moderate complexity
|
| 361 |
-
- **low**: Simple questions, basic single-step tasks
|
| 362 |
-
|
| 363 |
-
**Your Analysis Should:**
|
| 364 |
-
- Understand the user's true intent behind their request
|
| 365 |
-
- Identify all audio files mentioned or linked
|
| 366 |
-
- Choose the most appropriate processing type
|
| 367 |
-
- Assess the complexity and urgency
|
| 368 |
-
- Provide clear reasoning for your decision
|
| 369 |
-
|
| 370 |
-
Be intelligent about context - a user saying "help me clean up this audio" with a file link clearly needs audio_processing, not chat."""
|
| 371 |
-
|
| 372 |
-
ROUTER_USER_PROMPT_TEMPLATE = """
|
| 373 |
-
Please analyze this user request and determine the appropriate routing:
|
| 374 |
-
|
| 375 |
-
**User Request:**
|
| 376 |
-
{user_content}
|
| 377 |
-
|
| 378 |
-
**Analysis Task:**
|
| 379 |
-
1. What is the user's primary intent?
|
| 380 |
-
2. Are there any audio files mentioned or linked?
|
| 381 |
-
3. What type of processing would best serve their needs?
|
| 382 |
-
4. How complex/urgent is this request?
|
| 383 |
-
5. What's the reasoning for your routing decision?
|
| 384 |
-
|
| 385 |
-
Provide a structured analysis with your routing decision.
|
| 386 |
-
"""
|
| 387 |
-
|
| 388 |
-
# Prompts for audio_processor.py
|
| 389 |
-
|
| 390 |
-
LLM_PROCESSING_DECISION_PROMPT_TEMPLATE = """
|
| 391 |
-
You are an intelligent audio processing engine. Decide whether to execute this processing step:
|
| 392 |
-
|
| 393 |
-
**Step to Consider:**
|
| 394 |
-
- Tool: {tool_name}
|
| 395 |
-
- Description: {description}
|
| 396 |
-
- Planned Parameters: {planned_parameters}
|
| 397 |
-
|
| 398 |
-
**Context:**
|
| 399 |
-
- User Request: {user_request}
|
| 400 |
-
- Completed Steps: {completed_steps_count}
|
| 401 |
-
- Errors So Far: {error_count}
|
| 402 |
-
- Available Tools: {available_tools}
|
| 403 |
-
- Current File URLs: {current_file_urls}
|
| 404 |
-
|
| 405 |
-
**Recent Activity:**
|
| 406 |
-
{recent_activity}
|
| 407 |
-
|
| 408 |
-
Should this step be executed? Respond with:
|
| 409 |
-
1. "EXECUTE" or "SKIP"
|
| 410 |
-
2. If EXECUTE, provide any parameter modifications in JSON format (or "NO_CHANGES")
|
| 411 |
-
|
| 412 |
-
Example response:
|
| 413 |
-
EXECUTE
|
| 414 |
-
{{"audio_file": "updated_url.mp3", "target_level": -6}}
|
| 415 |
-
|
| 416 |
-
Or:
|
| 417 |
-
SKIP - This step is redundant given previous processing
|
| 418 |
-
"""
|
| 419 |
-
|
| 420 |
-
LLM_PROCESSING_SUMMARY_PROMPT_TEMPLATE = """
|
| 421 |
-
Create a concise processing summary for the user:
|
| 422 |
-
|
| 423 |
-
**User's Request:** {user_request}
|
| 424 |
-
|
| 425 |
-
**Results:**
|
| 426 |
-
- Completed Steps: {completed_steps_count}
|
| 427 |
-
- Errors: {error_count}
|
| 428 |
-
- Files Processed: {processed_files_count}
|
| 429 |
-
|
| 430 |
-
**Step Details:**
|
| 431 |
-
{step_details}
|
| 432 |
-
|
| 433 |
-
**Processed Files:**
|
| 434 |
-
{processed_files}
|
| 435 |
-
|
| 436 |
-
**Errors:**
|
| 437 |
-
{errors}
|
| 438 |
-
|
| 439 |
-
Create a brief, encouraging summary focusing on what was accomplished and next steps.
|
| 440 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/nodes/router.py
DELETED
|
@@ -1,167 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Intelligent LLM-powered router node for determining processing type and extracting context.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
import re
|
| 6 |
-
from typing import Dict, Any, List
|
| 7 |
-
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
| 8 |
-
from langchain_openai import ChatOpenAI
|
| 9 |
-
from langchain_core.output_parsers import PydanticOutputParser
|
| 10 |
-
from pydantic import BaseModel, Field
|
| 11 |
-
|
| 12 |
-
from .prompts import ROUTER_SYSTEM_PROMPT, ROUTER_USER_PROMPT_TEMPLATE
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
class RouterDecision(BaseModel):
|
| 16 |
-
"""Structured output for router decisions."""
|
| 17 |
-
|
| 18 |
-
processing_type: str = Field(description="Type of processing needed: 'chat', 'audio_processing', or 'dialogue_generation'")
|
| 19 |
-
user_request: str = Field(description="Clean, parsed version of the user's request")
|
| 20 |
-
audio_files: List[str] = Field(description="List of audio file URLs/paths found in the message")
|
| 21 |
-
reasoning: str = Field(description="Brief explanation of why this processing type was chosen")
|
| 22 |
-
priority_level: str = Field(description="Priority level: 'low', 'medium', or 'high'")
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
async def router_node(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 26 |
-
"""
|
| 27 |
-
Intelligently route the conversation using LLM analysis.
|
| 28 |
-
|
| 29 |
-
The LLM analyzes user input to determine:
|
| 30 |
-
- Processing type needed
|
| 31 |
-
- Audio files to extract
|
| 32 |
-
- User intent and priority
|
| 33 |
-
"""
|
| 34 |
-
|
| 35 |
-
# Get the latest user message
|
| 36 |
-
latest_message = None
|
| 37 |
-
for msg in reversed(state.get("messages", [])):
|
| 38 |
-
if isinstance(msg, HumanMessage):
|
| 39 |
-
latest_message = msg
|
| 40 |
-
break
|
| 41 |
-
|
| 42 |
-
if not latest_message:
|
| 43 |
-
return create_default_routing()
|
| 44 |
-
|
| 45 |
-
# Use LLM to make routing decision
|
| 46 |
-
router_decision = await analyze_user_request_with_llm(latest_message.content)
|
| 47 |
-
|
| 48 |
-
# Create status message
|
| 49 |
-
status_message = f"🎯 **Routing Analysis**: {router_decision.reasoning}\n**Processing Type**: {router_decision.processing_type}\n**Priority**: {router_decision.priority_level}"
|
| 50 |
-
|
| 51 |
-
messages = state.get("messages", [])
|
| 52 |
-
messages.append(AIMessage(content=status_message))
|
| 53 |
-
|
| 54 |
-
return {
|
| 55 |
-
"processing_type": router_decision.processing_type,
|
| 56 |
-
"user_request": router_decision.user_request,
|
| 57 |
-
"audio_files": router_decision.audio_files,
|
| 58 |
-
"messages": messages,
|
| 59 |
-
"errors": [],
|
| 60 |
-
"needs_reprocessing": False,
|
| 61 |
-
"completed_steps": [],
|
| 62 |
-
"scripts": {},
|
| 63 |
-
"processed_files": {},
|
| 64 |
-
"processing_metadata": {
|
| 65 |
-
"router_reasoning": router_decision.reasoning,
|
| 66 |
-
"priority_level": router_decision.priority_level
|
| 67 |
-
}
|
| 68 |
-
}
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
async def analyze_user_request_with_llm(user_content: str) -> RouterDecision:
|
| 72 |
-
"""Use LLM to intelligently analyze user request and make routing decisions."""
|
| 73 |
-
|
| 74 |
-
system_message = create_router_system_message()
|
| 75 |
-
user_message = create_router_user_message(user_content)
|
| 76 |
-
|
| 77 |
-
# Set up LLM with structured output
|
| 78 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
|
| 79 |
-
parser = PydanticOutputParser(pydantic_object=RouterDecision)
|
| 80 |
-
|
| 81 |
-
prompt_messages = [
|
| 82 |
-
system_message,
|
| 83 |
-
HumanMessage(content=user_message),
|
| 84 |
-
HumanMessage(content=parser.get_format_instructions())
|
| 85 |
-
]
|
| 86 |
-
|
| 87 |
-
try:
|
| 88 |
-
response = await llm.ainvoke(prompt_messages)
|
| 89 |
-
router_decision = parser.parse(response.content)
|
| 90 |
-
return router_decision
|
| 91 |
-
except Exception as e:
|
| 92 |
-
# Fallback to simple analysis
|
| 93 |
-
return create_fallback_routing(user_content)
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
def create_router_system_message() -> SystemMessage:
|
| 97 |
-
"""Create system message for LLM routing analysis."""
|
| 98 |
-
return SystemMessage(content=ROUTER_SYSTEM_PROMPT)
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
def create_router_user_message(user_content: str) -> str:
|
| 102 |
-
"""Create user message for routing analysis."""
|
| 103 |
-
return ROUTER_USER_PROMPT_TEMPLATE.format(user_content=user_content)
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
def create_fallback_routing(user_content: str) -> RouterDecision:
|
| 107 |
-
"""Create fallback routing if LLM analysis fails."""
|
| 108 |
-
|
| 109 |
-
content_lower = user_content.lower()
|
| 110 |
-
|
| 111 |
-
# Simple pattern matching for fallback
|
| 112 |
-
audio_patterns = ['.mp3', '.wav', '.m4a', '.flac', '.aac', '.ogg', 'http']
|
| 113 |
-
dialogue_keywords = ['dialogue', 'conversation', 'combine', 'merge', 'interview']
|
| 114 |
-
|
| 115 |
-
has_audio = any(pattern in user_content for pattern in audio_patterns)
|
| 116 |
-
is_dialogue = any(keyword in content_lower for keyword in dialogue_keywords)
|
| 117 |
-
|
| 118 |
-
if has_audio and is_dialogue:
|
| 119 |
-
processing_type = "dialogue_generation"
|
| 120 |
-
elif has_audio:
|
| 121 |
-
processing_type = "audio_processing"
|
| 122 |
-
else:
|
| 123 |
-
processing_type = "chat"
|
| 124 |
-
|
| 125 |
-
# Extract audio files with simple regex
|
| 126 |
-
audio_files = extract_audio_files_simple(user_content)
|
| 127 |
-
|
| 128 |
-
return RouterDecision(
|
| 129 |
-
processing_type=processing_type,
|
| 130 |
-
user_request=user_content,
|
| 131 |
-
audio_files=audio_files,
|
| 132 |
-
reasoning=f"Fallback analysis: detected {processing_type} based on content patterns",
|
| 133 |
-
priority_level="medium"
|
| 134 |
-
)
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
def extract_audio_files_simple(content: str) -> List[str]:
|
| 138 |
-
"""Simple regex-based audio file extraction for fallback."""
|
| 139 |
-
|
| 140 |
-
# Look for URLs (http/https)
|
| 141 |
-
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+\.(mp3|wav|m4a|flac|aac|ogg)'
|
| 142 |
-
urls = re.findall(url_pattern, content, re.IGNORECASE)
|
| 143 |
-
|
| 144 |
-
# Look for file paths
|
| 145 |
-
path_pattern = r'[^\s<>"{}|\\^`\[\]]+\.(mp3|wav|m4a|flac|aac|ogg)'
|
| 146 |
-
paths = re.findall(path_pattern, content, re.IGNORECASE)
|
| 147 |
-
|
| 148 |
-
# Combine and deduplicate
|
| 149 |
-
audio_files = list(set(urls + [path for path in paths if not path.startswith('http')]))
|
| 150 |
-
|
| 151 |
-
return audio_files
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
def create_default_routing() -> Dict[str, Any]:
|
| 155 |
-
"""Create default routing when no user message found."""
|
| 156 |
-
|
| 157 |
-
return {
|
| 158 |
-
"processing_type": "chat",
|
| 159 |
-
"user_request": "",
|
| 160 |
-
"audio_files": [],
|
| 161 |
-
"errors": [],
|
| 162 |
-
"needs_reprocessing": False,
|
| 163 |
-
"completed_steps": [],
|
| 164 |
-
"scripts": {},
|
| 165 |
-
"processed_files": {},
|
| 166 |
-
"processing_metadata": {}
|
| 167 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/nodes/script_generator.py
DELETED
|
@@ -1,412 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Intelligent LLM-powered script generator for audio transcription and analysis.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
from typing import Dict, Any, List
|
| 6 |
-
from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
|
| 7 |
-
from langchain_openai import ChatOpenAI
|
| 8 |
-
from langchain_core.output_parsers import PydanticOutputParser
|
| 9 |
-
from pydantic import BaseModel, Field
|
| 10 |
-
import json
|
| 11 |
-
|
| 12 |
-
from .prompts import (
|
| 13 |
-
SCRIPT_GENERATOR_SYSTEM_PROMPT,
|
| 14 |
-
SCRIPT_GENERATOR_USER_PROMPT_TEMPLATE,
|
| 15 |
-
ANALYZE_TRANSCRIPT_PROMPT_TEMPLATE,
|
| 16 |
-
ANALYZE_TRANSCRIPTION_RESULTS_SYSTEM_PROMPT,
|
| 17 |
-
ANALYZE_TRANSCRIPTION_RESULTS_USER_PROMPT_TEMPLATE,
|
| 18 |
-
)
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
class TranscriptionPlan(BaseModel):
|
| 22 |
-
"""Plan for transcribing audio files."""
|
| 23 |
-
|
| 24 |
-
tools_to_use: List[str] = Field(description="List of tool names to use for transcription")
|
| 25 |
-
processing_order: List[str] = Field(description="Order to process audio files")
|
| 26 |
-
analysis_goals: List[str] = Field(description="What to analyze in the transcripts")
|
| 27 |
-
expected_challenges: List[str] = Field(description="Potential issues to watch for")
|
| 28 |
-
reasoning: str = Field(description="Reasoning for this transcription approach")
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
class TranscriptionResults(BaseModel):
|
| 32 |
-
"""Results of transcription analysis."""
|
| 33 |
-
|
| 34 |
-
success_files: List[str] = Field(description="Successfully transcribed files")
|
| 35 |
-
failed_files: List[str] = Field(description="Files that failed to transcribe")
|
| 36 |
-
insights: List[str] = Field(description="Key insights from the transcriptions")
|
| 37 |
-
quality_assessment: str = Field(description="Assessment of transcription quality")
|
| 38 |
-
recommendations: List[str] = Field(description="Recommendations for next steps")
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
async def script_generator_node(state: Dict[str, Any], tools: list) -> Dict[str, Any]:
|
| 42 |
-
"""
|
| 43 |
-
Intelligently generate transcripts using LLM-guided tool usage.
|
| 44 |
-
|
| 45 |
-
The LLM analyzes the audio files and user request to:
|
| 46 |
-
- Decide which transcription tools to use
|
| 47 |
-
- Determine the best processing approach
|
| 48 |
-
- Execute transcription with intelligent error handling
|
| 49 |
-
- Analyze results and extract insights
|
| 50 |
-
"""
|
| 51 |
-
|
| 52 |
-
audio_files = state.get("audio_files", [])
|
| 53 |
-
user_request = state.get("user_request", "")
|
| 54 |
-
|
| 55 |
-
if not audio_files:
|
| 56 |
-
return create_no_files_response(state)
|
| 57 |
-
|
| 58 |
-
# Get available transcription tools
|
| 59 |
-
available_tools = get_transcription_tools(tools)
|
| 60 |
-
if not available_tools:
|
| 61 |
-
return create_no_tools_response(state)
|
| 62 |
-
|
| 63 |
-
# LLM creates transcription plan
|
| 64 |
-
transcription_plan = await create_transcription_plan_with_llm(
|
| 65 |
-
audio_files, user_request, available_tools
|
| 66 |
-
)
|
| 67 |
-
|
| 68 |
-
# Execute transcription based on LLM plan
|
| 69 |
-
scripts = {}
|
| 70 |
-
errors = []
|
| 71 |
-
completed_steps = state.get("completed_steps", [])
|
| 72 |
-
|
| 73 |
-
for audio_file in transcription_plan.processing_order:
|
| 74 |
-
if audio_file in audio_files: # Ensure file is in our list
|
| 75 |
-
script_result = await execute_transcription_with_llm(
|
| 76 |
-
audio_file, transcription_plan.tools_to_use, tools
|
| 77 |
-
)
|
| 78 |
-
|
| 79 |
-
if script_result["success"]:
|
| 80 |
-
scripts[audio_file] = script_result["data"]
|
| 81 |
-
completed_steps.append(f"✅ Transcribed: {audio_file.split('/')[-1]}")
|
| 82 |
-
else:
|
| 83 |
-
errors.append(script_result["error"])
|
| 84 |
-
completed_steps.append(f"❌ Failed: {audio_file.split('/')[-1]}")
|
| 85 |
-
|
| 86 |
-
# LLM analyzes results and provides insights
|
| 87 |
-
analysis_results = await analyze_transcription_results_with_llm(
|
| 88 |
-
scripts, errors, user_request, transcription_plan.analysis_goals
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
-
# Create comprehensive response
|
| 92 |
-
response_message = create_transcription_response(
|
| 93 |
-
scripts, errors, transcription_plan, analysis_results
|
| 94 |
-
)
|
| 95 |
-
|
| 96 |
-
messages = state.get("messages", [])
|
| 97 |
-
messages.append(AIMessage(content=response_message))
|
| 98 |
-
|
| 99 |
-
return {
|
| 100 |
-
"scripts": scripts,
|
| 101 |
-
"completed_steps": completed_steps,
|
| 102 |
-
"errors": errors,
|
| 103 |
-
"messages": messages,
|
| 104 |
-
"processing_metadata": {
|
| 105 |
-
"transcription_plan": transcription_plan.dict(),
|
| 106 |
-
"analysis_results": analysis_results.dict()
|
| 107 |
-
}
|
| 108 |
-
}
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
async def create_transcription_plan_with_llm(
|
| 112 |
-
audio_files: List[str],
|
| 113 |
-
user_request: str,
|
| 114 |
-
available_tools: List[str]
|
| 115 |
-
) -> TranscriptionPlan:
|
| 116 |
-
"""Use LLM to create intelligent transcription plan."""
|
| 117 |
-
|
| 118 |
-
system_message = create_transcription_planning_system_message()
|
| 119 |
-
user_message = create_transcription_planning_user_message(audio_files, user_request, available_tools)
|
| 120 |
-
|
| 121 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
|
| 122 |
-
parser = PydanticOutputParser(pydantic_object=TranscriptionPlan)
|
| 123 |
-
|
| 124 |
-
prompt_messages = [
|
| 125 |
-
system_message,
|
| 126 |
-
HumanMessage(content=user_message),
|
| 127 |
-
HumanMessage(content=parser.get_format_instructions())
|
| 128 |
-
]
|
| 129 |
-
|
| 130 |
-
try:
|
| 131 |
-
response = await llm.ainvoke(prompt_messages)
|
| 132 |
-
plan = parser.parse(response.content)
|
| 133 |
-
return plan
|
| 134 |
-
except Exception as e:
|
| 135 |
-
# Fallback plan
|
| 136 |
-
return TranscriptionPlan(
|
| 137 |
-
tools_to_use=available_tools[:2], # Use first 2 available tools
|
| 138 |
-
processing_order=audio_files,
|
| 139 |
-
analysis_goals=["Basic transcription", "Filler word detection"],
|
| 140 |
-
expected_challenges=["Audio quality issues", "Multiple speakers"],
|
| 141 |
-
reasoning="Fallback plan due to LLM planning failure"
|
| 142 |
-
)
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
async def execute_transcription_with_llm(
|
| 146 |
-
audio_file: str,
|
| 147 |
-
tools_to_use: List[str],
|
| 148 |
-
available_tools: list
|
| 149 |
-
) -> Dict[str, Any]:
|
| 150 |
-
"""Execute transcription for a single file using planned tools."""
|
| 151 |
-
|
| 152 |
-
# Find the actual tool objects
|
| 153 |
-
tool_objects = {}
|
| 154 |
-
for tool in available_tools:
|
| 155 |
-
if tool.name in tools_to_use:
|
| 156 |
-
tool_objects[tool.name] = tool
|
| 157 |
-
|
| 158 |
-
transcript_data = {
|
| 159 |
-
"transcript": "",
|
| 160 |
-
"timestamps": [],
|
| 161 |
-
"filler_words": [],
|
| 162 |
-
"quality_score": 0.0
|
| 163 |
-
}
|
| 164 |
-
|
| 165 |
-
try:
|
| 166 |
-
# Use update_transcription_info first if available
|
| 167 |
-
if "update_transcription_info" in tool_objects:
|
| 168 |
-
await tool_objects["update_transcription_info"].ainvoke({"audio_file": audio_file})
|
| 169 |
-
|
| 170 |
-
# Use transcribe_audio_sync for main transcription
|
| 171 |
-
if "transcribe_audio_sync" in tool_objects:
|
| 172 |
-
transcript_result = await tool_objects["transcribe_audio_sync"].ainvoke({"audio_file": audio_file})
|
| 173 |
-
|
| 174 |
-
# Process the transcript result
|
| 175 |
-
if hasattr(transcript_result, 'content'):
|
| 176 |
-
transcript_content = transcript_result.content
|
| 177 |
-
else:
|
| 178 |
-
transcript_content = str(transcript_result)
|
| 179 |
-
|
| 180 |
-
# Use LLM to analyze the transcript
|
| 181 |
-
analysis = await analyze_transcript_with_llm(transcript_content, audio_file)
|
| 182 |
-
|
| 183 |
-
transcript_data.update({
|
| 184 |
-
"transcript": transcript_content,
|
| 185 |
-
"timestamps": analysis.get("timestamps", []),
|
| 186 |
-
"filler_words": analysis.get("filler_words", []),
|
| 187 |
-
"quality_score": analysis.get("quality_score", 0.5),
|
| 188 |
-
"insights": analysis.get("insights", [])
|
| 189 |
-
})
|
| 190 |
-
|
| 191 |
-
return {"success": True, "data": transcript_data}
|
| 192 |
-
|
| 193 |
-
else:
|
| 194 |
-
return {"success": False, "error": f"No suitable transcription tool found for {audio_file}"}
|
| 195 |
-
|
| 196 |
-
except Exception as e:
|
| 197 |
-
return {"success": False, "error": f"Transcription failed for {audio_file}: {str(e)}"}
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
async def analyze_transcript_with_llm(transcript_content: str, audio_file: str) -> Dict[str, Any]:
|
| 201 |
-
"""Use LLM to analyze transcript content and extract insights."""
|
| 202 |
-
|
| 203 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
|
| 204 |
-
|
| 205 |
-
analysis_prompt = ANALYZE_TRANSCRIPT_PROMPT_TEMPLATE.format(
|
| 206 |
-
audio_file=audio_file,
|
| 207 |
-
transcript_content=transcript_content
|
| 208 |
-
)
|
| 209 |
-
|
| 210 |
-
try:
|
| 211 |
-
response = await llm.ainvoke([SystemMessage(content=analysis_prompt)])
|
| 212 |
-
# Try to parse as JSON
|
| 213 |
-
analysis_data = json.loads(response.content)
|
| 214 |
-
return analysis_data
|
| 215 |
-
except Exception as e:
|
| 216 |
-
# Fallback to simple analysis
|
| 217 |
-
return {
|
| 218 |
-
"timestamps": create_simple_timestamps(transcript_content),
|
| 219 |
-
"filler_words": detect_simple_filler_words(transcript_content),
|
| 220 |
-
"quality_score": 0.7,
|
| 221 |
-
"insights": ["Basic transcript generated"],
|
| 222 |
-
"speaker_analysis": "Unable to analyze speakers",
|
| 223 |
-
"content_summary": transcript_content[:100] + "..." if len(transcript_content) > 100 else transcript_content
|
| 224 |
-
}
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
async def analyze_transcription_results_with_llm(
|
| 228 |
-
scripts: Dict[str, Any],
|
| 229 |
-
errors: List[str],
|
| 230 |
-
user_request: str,
|
| 231 |
-
analysis_goals: List[str]
|
| 232 |
-
) -> TranscriptionResults:
|
| 233 |
-
"""Use LLM to analyze overall transcription results."""
|
| 234 |
-
|
| 235 |
-
system_message = SystemMessage(content=ANALYZE_TRANSCRIPTION_RESULTS_SYSTEM_PROMPT)
|
| 236 |
-
|
| 237 |
-
script_details = json.dumps({
|
| 238 |
-
k: {
|
| 239 |
-
"length": len(v.get("transcript", "")),
|
| 240 |
-
"filler_count": len(v.get("filler_words", [])),
|
| 241 |
-
"quality": v.get("quality_score", 0)
|
| 242 |
-
} for k, v in scripts.items()
|
| 243 |
-
}, indent=2)
|
| 244 |
-
|
| 245 |
-
user_message_content = ANALYZE_TRANSCRIPTION_RESULTS_USER_PROMPT_TEMPLATE.format(
|
| 246 |
-
user_request=user_request,
|
| 247 |
-
analysis_goals=", ".join(analysis_goals),
|
| 248 |
-
success_count=len(scripts),
|
| 249 |
-
failure_count=len(errors),
|
| 250 |
-
errors=errors,
|
| 251 |
-
script_details=script_details
|
| 252 |
-
)
|
| 253 |
-
|
| 254 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
|
| 255 |
-
parser = PydanticOutputParser(pydantic_object=TranscriptionResults)
|
| 256 |
-
|
| 257 |
-
prompt_messages = [
|
| 258 |
-
system_message,
|
| 259 |
-
HumanMessage(content=user_message_content),
|
| 260 |
-
HumanMessage(content=parser.get_format_instructions())
|
| 261 |
-
]
|
| 262 |
-
|
| 263 |
-
try:
|
| 264 |
-
response = await llm.ainvoke(prompt_messages)
|
| 265 |
-
results = parser.parse(response.content)
|
| 266 |
-
return results
|
| 267 |
-
except Exception as e:
|
| 268 |
-
# Fallback analysis
|
| 269 |
-
return TranscriptionResults(
|
| 270 |
-
success_files=list(scripts.keys()),
|
| 271 |
-
failed_files=[f"Error occurred: {str(e)}"],
|
| 272 |
-
insights=["Basic transcription completed"],
|
| 273 |
-
quality_assessment="Unable to assess quality automatically",
|
| 274 |
-
recommendations=["Proceed with standard audio processing"]
|
| 275 |
-
)
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
def get_transcription_tools(tools: list) -> List[str]:
|
| 279 |
-
"""Extract transcription tool names from available tools."""
|
| 280 |
-
transcription_tool_names = []
|
| 281 |
-
|
| 282 |
-
for tool in tools:
|
| 283 |
-
if any(keyword in tool.name.lower() for keyword in ['transcribe', 'transcript']):
|
| 284 |
-
transcription_tool_names.append(tool.name)
|
| 285 |
-
|
| 286 |
-
return transcription_tool_names
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
def create_transcription_planning_system_message() -> SystemMessage:
|
| 290 |
-
"""Create system message for transcription planning."""
|
| 291 |
-
return SystemMessage(content=SCRIPT_GENERATOR_SYSTEM_PROMPT)
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
def create_transcription_planning_user_message(audio_files: List[str], user_request: str, available_tools: List[str]) -> str:
|
| 295 |
-
"""Create user message for transcription planning."""
|
| 296 |
-
file_list = "\n".join([f"- {file}" for file in audio_files])
|
| 297 |
-
return SCRIPT_GENERATOR_USER_PROMPT_TEMPLATE.format(
|
| 298 |
-
file_count=len(audio_files),
|
| 299 |
-
file_list=file_list,
|
| 300 |
-
user_request=user_request,
|
| 301 |
-
available_tools=", ".join(available_tools)
|
| 302 |
-
)
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
def create_simple_timestamps(transcript: str) -> List[Dict[str, Any]]:
|
| 306 |
-
"""Create simple timestamp estimates for fallback."""
|
| 307 |
-
timestamps = []
|
| 308 |
-
lines = [line.strip() for line in transcript.split('\n') if line.strip()]
|
| 309 |
-
|
| 310 |
-
for i, line in enumerate(lines):
|
| 311 |
-
start_time = i * 3.0
|
| 312 |
-
end_time = start_time + 3.0
|
| 313 |
-
timestamps.append({
|
| 314 |
-
"start": start_time,
|
| 315 |
-
"end": end_time,
|
| 316 |
-
"text": line
|
| 317 |
-
})
|
| 318 |
-
|
| 319 |
-
return timestamps
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
def detect_simple_filler_words(transcript: str) -> List[Dict[str, Any]]:
|
| 323 |
-
"""Simple filler word detection for fallback."""
|
| 324 |
-
filler_words = ["um", "uh", "like", "you know", "so", "well", "actually"]
|
| 325 |
-
found_fillers = []
|
| 326 |
-
words = transcript.lower().split()
|
| 327 |
-
|
| 328 |
-
for i, word in enumerate(words):
|
| 329 |
-
clean_word = word.strip('.,!?;:"()[]{}')
|
| 330 |
-
if clean_word in filler_words:
|
| 331 |
-
found_fillers.append({
|
| 332 |
-
"word": clean_word,
|
| 333 |
-
"position": i,
|
| 334 |
-
"context": " ".join(words[max(0, i-2):min(len(words), i+3)])
|
| 335 |
-
})
|
| 336 |
-
|
| 337 |
-
return found_fillers
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
def create_transcription_response(
|
| 341 |
-
scripts: Dict[str, Any],
|
| 342 |
-
errors: List[str],
|
| 343 |
-
plan: TranscriptionPlan,
|
| 344 |
-
analysis: TranscriptionResults
|
| 345 |
-
) -> str:
|
| 346 |
-
"""Create comprehensive transcription response message."""
|
| 347 |
-
|
| 348 |
-
if not scripts and errors:
|
| 349 |
-
return f"❌ **Transcription Failed**\n\n{chr(10).join(errors)}"
|
| 350 |
-
|
| 351 |
-
response = "🎙️ **Intelligent Transcription Complete**\n\n"
|
| 352 |
-
|
| 353 |
-
# Plan summary
|
| 354 |
-
response += f"**📋 Strategy Used:** {plan.reasoning}\n\n"
|
| 355 |
-
|
| 356 |
-
# Results summary
|
| 357 |
-
response += f"**📊 Results:**\n"
|
| 358 |
-
response += f"- ✅ Successfully transcribed: {len(scripts)} files\n"
|
| 359 |
-
response += f"- ❌ Failed: {len(errors)} files\n\n"
|
| 360 |
-
|
| 361 |
-
# File details
|
| 362 |
-
if scripts:
|
| 363 |
-
response += "**📝 Transcript Details:**\n"
|
| 364 |
-
for file_url, script_data in scripts.items():
|
| 365 |
-
filename = file_url.split('/')[-1] if '/' in file_url else file_url
|
| 366 |
-
transcript_len = len(script_data.get("transcript", ""))
|
| 367 |
-
filler_count = len(script_data.get("filler_words", []))
|
| 368 |
-
quality = script_data.get("quality_score", 0)
|
| 369 |
-
|
| 370 |
-
response += f"- **{filename}**: {transcript_len} chars, {filler_count} fillers, {quality:.1%} quality\n"
|
| 371 |
-
response += "\n"
|
| 372 |
-
|
| 373 |
-
# AI insights
|
| 374 |
-
if analysis.insights:
|
| 375 |
-
response += "**🤖 AI Insights:**\n"
|
| 376 |
-
for insight in analysis.insights[:3]:
|
| 377 |
-
response += f"- {insight}\n"
|
| 378 |
-
response += "\n"
|
| 379 |
-
|
| 380 |
-
# Next steps
|
| 381 |
-
if analysis.recommendations:
|
| 382 |
-
response += "**🎯 Recommendations:**\n"
|
| 383 |
-
for rec in analysis.recommendations[:2]:
|
| 384 |
-
response += f"- {rec}\n"
|
| 385 |
-
response += "\n"
|
| 386 |
-
|
| 387 |
-
response += "✅ **Ready for execution planning...**"
|
| 388 |
-
return response
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
def create_no_files_response(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 392 |
-
"""Handle case when no audio files are provided."""
|
| 393 |
-
messages = state.get("messages", [])
|
| 394 |
-
messages.append(AIMessage(content="❌ **No Audio Files**: Please provide audio files to transcribe."))
|
| 395 |
-
|
| 396 |
-
return {
|
| 397 |
-
"scripts": {},
|
| 398 |
-
"errors": ["No audio files provided for transcription"],
|
| 399 |
-
"messages": messages
|
| 400 |
-
}
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
def create_no_tools_response(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 404 |
-
"""Handle case when no transcription tools are available."""
|
| 405 |
-
messages = state.get("messages", [])
|
| 406 |
-
messages.append(AIMessage(content="❌ **Transcription Tools Unavailable**: Cannot proceed without transcription capabilities."))
|
| 407 |
-
|
| 408 |
-
return {
|
| 409 |
-
"scripts": {},
|
| 410 |
-
"errors": ["No transcription tools available"],
|
| 411 |
-
"messages": messages
|
| 412 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/nodes/validator.py
CHANGED
|
@@ -1,272 +1,30 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Validator node for intelligent LLM-powered validation of processing results.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
from typing import Dict, Any, List
|
| 6 |
-
from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
|
| 7 |
from langchain_openai import ChatOpenAI
|
| 8 |
-
from langchain_core.
|
| 9 |
-
from
|
| 10 |
-
import
|
| 11 |
-
from .
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
warnings: List[str] = Field(description="List of warnings or minor issues")
|
| 21 |
-
recommendations: List[str] = Field(description="Specific recommendations for improvement")
|
| 22 |
-
needs_reprocessing: bool = Field(description="Whether reprocessing is recommended")
|
| 23 |
-
success_indicators: List[str] = Field(description="What went well in the processing")
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
async def validator_node(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 27 |
-
"""
|
| 28 |
-
Intelligently validate processing results using LLM assessment.
|
| 29 |
-
"""
|
| 30 |
-
|
| 31 |
-
processed_files = state.get("processed_files", {})
|
| 32 |
-
errors = state.get("errors", [])
|
| 33 |
-
completed_steps = state.get("completed_steps", [])
|
| 34 |
-
execution_plan = state.get("execution_plan", [])
|
| 35 |
-
user_request = state.get("user_request", "")
|
| 36 |
-
scripts = state.get("scripts", {})
|
| 37 |
-
current_needs_reprocessing = state.get("needs_reprocessing", False)
|
| 38 |
-
|
| 39 |
-
# Use LLM to perform intelligent validation
|
| 40 |
-
validation_results = await perform_llm_validation(
|
| 41 |
-
user_request=user_request,
|
| 42 |
-
processed_files=processed_files,
|
| 43 |
-
errors=errors,
|
| 44 |
-
completed_steps=completed_steps,
|
| 45 |
-
execution_plan=execution_plan,
|
| 46 |
-
scripts=scripts,
|
| 47 |
-
current_needs_reprocessing=current_needs_reprocessing
|
| 48 |
-
)
|
| 49 |
-
|
| 50 |
-
# Create validation summary
|
| 51 |
-
validation_summary = create_validation_summary(validation_results)
|
| 52 |
-
messages = state.get("messages", [])
|
| 53 |
-
messages.append(AIMessage(content=validation_summary))
|
| 54 |
-
|
| 55 |
-
return {
|
| 56 |
-
"needs_reprocessing": validation_results.needs_reprocessing,
|
| 57 |
-
"processing_metadata": {
|
| 58 |
-
"validation_results": validation_results.model_dump(),
|
| 59 |
-
"validation_timestamp": get_current_timestamp()
|
| 60 |
-
},
|
| 61 |
-
"messages": messages
|
| 62 |
-
}
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
async def perform_llm_validation(
|
| 66 |
-
user_request: str,
|
| 67 |
-
processed_files: Dict[str, str],
|
| 68 |
-
errors: List[str],
|
| 69 |
-
completed_steps: List[str],
|
| 70 |
-
execution_plan: List[Dict[str, Any]],
|
| 71 |
-
scripts: Dict[str, Any],
|
| 72 |
-
current_needs_reprocessing: bool
|
| 73 |
-
) -> ValidationResults:
|
| 74 |
-
"""Use LLM to intelligently validate processing results."""
|
| 75 |
-
|
| 76 |
-
# Create system message for validation
|
| 77 |
-
system_message = create_validation_system_message()
|
| 78 |
-
|
| 79 |
-
# Create user message with processing context
|
| 80 |
-
user_message = create_validation_context_message(
|
| 81 |
-
user_request, processed_files, errors, completed_steps,
|
| 82 |
-
execution_plan, scripts, current_needs_reprocessing
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
# Set up LLM with structured output
|
| 86 |
-
llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
|
| 87 |
-
parser = PydanticOutputParser(pydantic_object=ValidationResults)
|
| 88 |
-
|
| 89 |
-
prompt_messages = [
|
| 90 |
-
system_message,
|
| 91 |
-
HumanMessage(content=user_message),
|
| 92 |
-
HumanMessage(content=parser.get_format_instructions())
|
| 93 |
-
]
|
| 94 |
-
|
| 95 |
-
# Get LLM assessment
|
| 96 |
-
try:
|
| 97 |
-
response = await llm.ainvoke(prompt_messages)
|
| 98 |
-
validation_results = parser.parse(response.content)
|
| 99 |
-
return validation_results
|
| 100 |
-
except Exception as e:
|
| 101 |
-
# Fallback validation if LLM fails
|
| 102 |
-
return create_fallback_validation(processed_files, errors, completed_steps, execution_plan)
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
def create_validation_system_message() -> SystemMessage:
|
| 106 |
-
"""Create system message for LLM validation."""
|
| 107 |
-
return SystemMessage(content=VALIDATOR_SYSTEM_PROMPT)
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
def create_validation_context_message(
|
| 111 |
-
user_request: str,
|
| 112 |
-
processed_files: Dict[str, str],
|
| 113 |
-
errors: List[str],
|
| 114 |
-
completed_steps: List[str],
|
| 115 |
-
execution_plan: List[Dict[str, Any]],
|
| 116 |
-
scripts: Dict[str, Any],
|
| 117 |
-
current_needs_reprocessing: bool
|
| 118 |
-
) -> str:
|
| 119 |
-
"""Create context message with all processing information."""
|
| 120 |
-
|
| 121 |
-
plan_str = "\n".join([
|
| 122 |
-
f"{i}. {step.get('tool', 'unknown')}: {step.get('description', 'No description')}"
|
| 123 |
-
for i, step in enumerate(execution_plan, 1)
|
| 124 |
])
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
processed_name = processed.split('/')[-1] if '/' in processed else processed
|
| 133 |
-
processing_results_str += f" • {filename} → {processed_name}\n"
|
| 134 |
-
else:
|
| 135 |
-
processing_results_str = "No files were successfully processed\n"
|
| 136 |
-
|
| 137 |
-
if scripts:
|
| 138 |
-
transcript_analysis_str = ""
|
| 139 |
-
for file_url, script_data in scripts.items():
|
| 140 |
-
filename = file_url.split('/')[-1] if '/' in file_url else file_url
|
| 141 |
-
transcript = script_data.get("transcript", "")
|
| 142 |
-
filler_count = len(script_data.get("filler_words", []))
|
| 143 |
-
transcript_analysis_str += f"- {filename}: {len(transcript)} chars, {filler_count} filler words detected\n"
|
| 144 |
-
else:
|
| 145 |
-
transcript_analysis_str = "No transcript data available.\n"
|
| 146 |
-
|
| 147 |
-
errors_str = "\n".join([f"- {error}" for error in errors]) if errors else "None"
|
| 148 |
-
|
| 149 |
-
reprocessing_note_str = "\n**Note:** This is already a reprocessing attempt.\n" if current_needs_reprocessing else ""
|
| 150 |
-
|
| 151 |
-
return VALIDATOR_USER_PROMPT_TEMPLATE.format(
|
| 152 |
-
user_request=user_request,
|
| 153 |
-
plan_steps=len(execution_plan),
|
| 154 |
-
execution_plan=plan_str,
|
| 155 |
-
completed_steps_count=len(completed_steps),
|
| 156 |
-
completed_steps=completed_steps_str,
|
| 157 |
-
processing_results=processing_results_str,
|
| 158 |
-
transcript_analysis=transcript_analysis_str,
|
| 159 |
-
error_count=len(errors),
|
| 160 |
-
errors=errors_str,
|
| 161 |
-
reprocessing_note=reprocessing_note_str
|
| 162 |
-
)
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
def create_fallback_validation(
|
| 166 |
-
processed_files: Dict[str, str],
|
| 167 |
-
errors: List[str],
|
| 168 |
-
completed_steps: List[str],
|
| 169 |
-
execution_plan: List[Dict[str, Any]]
|
| 170 |
-
) -> ValidationResults:
|
| 171 |
-
"""Create fallback validation if LLM assessment fails."""
|
| 172 |
-
|
| 173 |
-
total_steps = len(execution_plan) if execution_plan else 1
|
| 174 |
-
successful_steps = len([step for step in completed_steps if step.startswith("✅")])
|
| 175 |
-
completion_rate = successful_steps / total_steps if total_steps > 0 else 0
|
| 176 |
-
|
| 177 |
-
has_processed_files = len(processed_files) > 0
|
| 178 |
-
has_critical_errors = any("failed" in error.lower() or "error" in error.lower() for error in errors)
|
| 179 |
-
|
| 180 |
-
if has_processed_files and completion_rate >= 0.7:
|
| 181 |
-
overall_status = "success"
|
| 182 |
-
quality_score = 0.8
|
| 183 |
-
elif has_processed_files and completion_rate >= 0.3:
|
| 184 |
-
overall_status = "partial_success"
|
| 185 |
-
quality_score = 0.5
|
| 186 |
-
else:
|
| 187 |
-
overall_status = "failed"
|
| 188 |
-
quality_score = 0.2
|
| 189 |
-
|
| 190 |
-
return ValidationResults(
|
| 191 |
-
overall_status=overall_status,
|
| 192 |
-
completion_rate=completion_rate,
|
| 193 |
-
quality_score=quality_score,
|
| 194 |
-
critical_errors=errors if has_critical_errors else [],
|
| 195 |
-
warnings=errors if not has_critical_errors else [],
|
| 196 |
-
recommendations=[
|
| 197 |
-
"Check processing logs for detailed error information",
|
| 198 |
-
"Verify audio file formats and accessibility",
|
| 199 |
-
"Consider simplifying the processing request"
|
| 200 |
-
],
|
| 201 |
-
needs_reprocessing=has_critical_errors and completion_rate > 0.1 and completion_rate < 0.8,
|
| 202 |
-
success_indicators=["Some processing steps completed"] if completed_steps else []
|
| 203 |
)
|
| 204 |
|
|
|
|
| 205 |
|
| 206 |
-
def
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
# Status emoji mapping
|
| 210 |
-
status_emoji = {
|
| 211 |
-
"success": "✅",
|
| 212 |
-
"partial_success": "⚠️",
|
| 213 |
-
"failed": "❌"
|
| 214 |
-
}
|
| 215 |
-
|
| 216 |
-
emoji = status_emoji.get(validation_results.overall_status, "❓")
|
| 217 |
-
|
| 218 |
-
summary = f"{emoji} **Intelligent Validation Results**\n\n"
|
| 219 |
-
|
| 220 |
-
# Overall assessment
|
| 221 |
-
summary += f"**Overall Status**: {validation_results.overall_status.replace('_', ' ').title()}\n"
|
| 222 |
-
summary += f"**Completion Rate**: {validation_results.completion_rate:.1%}\n"
|
| 223 |
-
summary += f"**Quality Score**: {validation_results.quality_score:.1%}\n\n"
|
| 224 |
-
|
| 225 |
-
# Success indicators
|
| 226 |
-
if validation_results.success_indicators:
|
| 227 |
-
summary += "**✨ What Went Well:**\n"
|
| 228 |
-
for indicator in validation_results.success_indicators:
|
| 229 |
-
summary += f"- {indicator}\n"
|
| 230 |
-
summary += "\n"
|
| 231 |
-
|
| 232 |
-
# Critical errors
|
| 233 |
-
if validation_results.critical_errors:
|
| 234 |
-
summary += f"**🚨 Critical Issues ({len(validation_results.critical_errors)}):**\n"
|
| 235 |
-
for error in validation_results.critical_errors[:3]:
|
| 236 |
-
summary += f"- {error}\n"
|
| 237 |
-
if len(validation_results.critical_errors) > 3:
|
| 238 |
-
summary += f"- ... and {len(validation_results.critical_errors) - 3} more\n"
|
| 239 |
-
summary += "\n"
|
| 240 |
-
|
| 241 |
-
# Warnings
|
| 242 |
-
if validation_results.warnings:
|
| 243 |
-
summary += f"**⚠️ Warnings ({len(validation_results.warnings)}):**\n"
|
| 244 |
-
for warning in validation_results.warnings[:2]:
|
| 245 |
-
summary += f"- {warning}\n"
|
| 246 |
-
if len(validation_results.warnings) > 2:
|
| 247 |
-
summary += f"- ... and {len(validation_results.warnings) - 2} more\n"
|
| 248 |
-
summary += "\n"
|
| 249 |
-
|
| 250 |
-
# Recommendations
|
| 251 |
-
if validation_results.recommendations:
|
| 252 |
-
summary += "**🎯 Recommendations:**\n"
|
| 253 |
-
for rec in validation_results.recommendations[:4]:
|
| 254 |
-
summary += f"- {rec}\n"
|
| 255 |
-
if len(validation_results.recommendations) > 4:
|
| 256 |
-
summary += f"- ... and {len(validation_results.recommendations) - 4} more\n"
|
| 257 |
-
summary += "\n"
|
| 258 |
-
|
| 259 |
-
# Reprocessing decision
|
| 260 |
-
if validation_results.needs_reprocessing:
|
| 261 |
-
summary += "🔄 **Reprocessing Recommended**: The LLM assessment suggests reprocessing could improve results."
|
| 262 |
else:
|
| 263 |
-
|
| 264 |
-
summary += "🎉 **Processing Complete**: High-quality results achieved!"
|
| 265 |
-
else:
|
| 266 |
-
summary += "⏹️ **Processing Complete**: Reprocessing not recommended based on current assessment."
|
| 267 |
-
|
| 268 |
-
return summary
|
| 269 |
-
|
| 270 |
-
def get_current_timestamp() -> str:
|
| 271 |
-
"""Get current timestamp for metadata."""
|
| 272 |
-
return datetime.datetime.now().isoformat()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from langchain_openai import ChatOpenAI
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 3 |
+
from src.state import AgentState
|
| 4 |
+
from operator import itemgetter
|
| 5 |
+
from langchain_core.runnables import RunnableParallel
|
| 6 |
+
|
| 7 |
+
def validator_node(state: AgentState) -> AgentState:
|
| 8 |
+
llm = ChatOpenAI(model="gpt-4.1")
|
| 9 |
+
llm = llm.with_structured_output(AgentState)
|
| 10 |
+
|
| 11 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 12 |
+
("system", "You are validator that checks the steps taken and output if something is wrong. Give feedback to flow."),
|
| 13 |
+
("user", "Current state: {state}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
])
|
| 15 |
|
| 16 |
+
chain = (
|
| 17 |
+
RunnableParallel({
|
| 18 |
+
"state": itemgetter("state")
|
| 19 |
+
})
|
| 20 |
+
| prompt
|
| 21 |
+
| llm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
+
return chain.invoke({"state": state})
|
| 25 |
|
| 26 |
+
def validator_node_router(state: AgentState) -> str:
|
| 27 |
+
if state.validator_feedback == "":
|
| 28 |
+
return "chat"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
else:
|
| 30 |
+
return "planner"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/state.py
CHANGED
|
@@ -1,46 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
Graph state definition for the audio processing agent.
|
| 3 |
-
"""
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
"
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
messages: Annotated[List[BaseMessage], add_messages]
|
| 15 |
-
|
| 16 |
-
# Audio files provided by user
|
| 17 |
-
audio_files: List[str] # URLs or paths to audio files
|
| 18 |
-
|
| 19 |
-
# User's processing request
|
| 20 |
-
user_request: str
|
| 21 |
-
|
| 22 |
-
# Processing type determined by router
|
| 23 |
-
processing_type: str # "chat", "audio_processing", "dialogue_generation"
|
| 24 |
-
|
| 25 |
-
# Generated scripts with timestamps
|
| 26 |
-
scripts: Dict[str, Any] # {file_url: {transcript: str, timestamps: List}}
|
| 27 |
-
|
| 28 |
-
# Execution plan created by planner
|
| 29 |
-
execution_plan: List[Dict[str, Any]] # List of tool calls with parameters
|
| 30 |
-
|
| 31 |
-
# Processing results
|
| 32 |
-
processed_files: Dict[str, str] # {original_url: processed_url}
|
| 33 |
-
|
| 34 |
-
# Processing steps completed
|
| 35 |
-
completed_steps: List[str]
|
| 36 |
-
|
| 37 |
-
# Final output
|
| 38 |
-
final_audio_url: Optional[str]
|
| 39 |
-
final_response: str
|
| 40 |
-
|
| 41 |
-
# Error handling
|
| 42 |
-
errors: List[str]
|
| 43 |
-
needs_reprocessing: bool
|
| 44 |
-
|
| 45 |
-
# Metadata
|
| 46 |
-
processing_metadata: Dict[str, Any]
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
class AgentState(BaseModel):
|
| 4 |
+
steps_details: list[str] = Field(description="The steps that have been completed.", default=[])
|
| 5 |
+
user_input: str = Field(description="The user's input.", default="")
|
| 6 |
+
plan: str = Field(description="The plan for the user.", default="")
|
| 7 |
+
final_response: str = Field(description="The final response to the user.", default="")
|
| 8 |
+
requires_processing: bool = Field(description="Whether the response requires detailed audio processing.", default=False)
|
| 9 |
+
validator_feedback: str = Field(description="The feedback from the validator. Indicates steps must be taken again.", default="")
|
| 10 |
+
input_audio_files: list[str] = Field(description="The input audio files.", default=[])
|
| 11 |
+
output_audio_files: list[str] = Field(description="The output audio files.", default=[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|