YigitSekerci commited on
Commit
757decb
·
1 Parent(s): cc75613

simplify agent

Browse files
agent_graph.png ADDED
flow.svg DELETED
poetry.lock CHANGED
@@ -45,6 +45,22 @@ doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)",
45
  test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""]
46
  trio = ["trio (>=0.26.1)"]
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  [[package]]
49
  name = "audioop-lts"
50
  version = "0.2.1"
@@ -306,12 +322,24 @@ description = "Cross-platform colored terminal text."
306
  optional = false
307
  python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
308
  groups = ["main"]
309
- markers = "platform_system == \"Windows\""
310
  files = [
311
  {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
312
  {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
313
  ]
314
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  [[package]]
316
  name = "distro"
317
  version = "1.9.0"
@@ -338,6 +366,21 @@ files = [
338
  [package.dependencies]
339
  python-dotenv = "*"
340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  [[package]]
342
  name = "fastapi"
343
  version = "0.115.12"
@@ -729,6 +772,73 @@ files = [
729
  [package.extras]
730
  all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
732
  [[package]]
733
  name = "jinja2"
734
  version = "3.1.6"
@@ -1162,6 +1272,21 @@ files = [
1162
  {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
1163
  ]
1164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
  [[package]]
1166
  name = "mcp"
1167
  version = "1.9.3"
@@ -1523,6 +1648,38 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d
1523
  test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
1524
  xml = ["lxml (>=4.9.2)"]
1525
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1526
  [[package]]
1527
  name = "pillow"
1528
  version = "11.2.1"
@@ -1623,6 +1780,49 @@ tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "ole
1623
  typing = ["typing-extensions ; python_version < \"3.10\""]
1624
  xmp = ["defusedxml"]
1625
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1626
  [[package]]
1627
  name = "pycparser"
1628
  version = "2.22"
@@ -1813,7 +2013,6 @@ description = "Pygments is a syntax highlighting package written in Python."
1813
  optional = false
1814
  python-versions = ">=3.8"
1815
  groups = ["main"]
1816
- markers = "sys_platform != \"emscripten\""
1817
  files = [
1818
  {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
1819
  {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
@@ -2317,6 +2516,26 @@ examples = ["aiosqlite (>=0.21.0)", "fastapi (>=0.115.12)", "sqlalchemy[asyncio,
2317
  granian = ["granian (>=2.3.1)"]
2318
  uvicorn = ["uvicorn (>=0.34.0)"]
2319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2320
  [[package]]
2321
  name = "starlette"
2322
  version = "0.46.2"
@@ -2433,6 +2652,22 @@ notebook = ["ipywidgets (>=6)"]
2433
  slack = ["slack-sdk"]
2434
  telegram = ["requests"]
2435
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2436
  [[package]]
2437
  name = "typer"
2438
  version = "0.16.0"
@@ -2529,6 +2764,18 @@ h11 = ">=0.8"
2529
  [package.extras]
2530
  standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"]
2531
 
 
 
 
 
 
 
 
 
 
 
 
 
2532
  [[package]]
2533
  name = "websockets"
2534
  version = "15.0.1"
@@ -2857,4 +3104,4 @@ cffi = ["cffi (>=1.11)"]
2857
  [metadata]
2858
  lock-version = "2.1"
2859
  python-versions = ">=3.13,<4.0"
2860
- content-hash = "b0e69a9374ac8a038b59c00da37a793818e65a8e0b3601442de9a9758bea100b"
 
45
  test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""]
46
  trio = ["trio (>=0.26.1)"]
47
 
48
+ [[package]]
49
+ name = "asttokens"
50
+ version = "3.0.0"
51
+ description = "Annotate AST trees with source code positions"
52
+ optional = false
53
+ python-versions = ">=3.8"
54
+ groups = ["main"]
55
+ files = [
56
+ {file = "asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2"},
57
+ {file = "asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7"},
58
+ ]
59
+
60
+ [package.extras]
61
+ astroid = ["astroid (>=2,<4)"]
62
+ test = ["astroid (>=2,<4)", "pytest", "pytest-cov", "pytest-xdist"]
63
+
64
  [[package]]
65
  name = "audioop-lts"
66
  version = "0.2.1"
 
322
  optional = false
323
  python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
324
  groups = ["main"]
325
+ markers = "platform_system == \"Windows\" or sys_platform == \"win32\""
326
  files = [
327
  {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
328
  {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
329
  ]
330
 
331
+ [[package]]
332
+ name = "decorator"
333
+ version = "5.2.1"
334
+ description = "Decorators for Humans"
335
+ optional = false
336
+ python-versions = ">=3.8"
337
+ groups = ["main"]
338
+ files = [
339
+ {file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"},
340
+ {file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"},
341
+ ]
342
+
343
  [[package]]
344
  name = "distro"
345
  version = "1.9.0"
 
366
  [package.dependencies]
367
  python-dotenv = "*"
368
 
369
+ [[package]]
370
+ name = "executing"
371
+ version = "2.2.0"
372
+ description = "Get the currently executing AST node of a frame, and other information"
373
+ optional = false
374
+ python-versions = ">=3.8"
375
+ groups = ["main"]
376
+ files = [
377
+ {file = "executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa"},
378
+ {file = "executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755"},
379
+ ]
380
+
381
+ [package.extras]
382
+ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""]
383
+
384
  [[package]]
385
  name = "fastapi"
386
  version = "0.115.12"
 
772
  [package.extras]
773
  all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
774
 
775
+ [[package]]
776
+ name = "ipython"
777
+ version = "9.3.0"
778
+ description = "IPython: Productive Interactive Computing"
779
+ optional = false
780
+ python-versions = ">=3.11"
781
+ groups = ["main"]
782
+ files = [
783
+ {file = "ipython-9.3.0-py3-none-any.whl", hash = "sha256:1a0b6dd9221a1f5dddf725b57ac0cb6fddc7b5f470576231ae9162b9b3455a04"},
784
+ {file = "ipython-9.3.0.tar.gz", hash = "sha256:79eb896f9f23f50ad16c3bc205f686f6e030ad246cc309c6279a242b14afe9d8"},
785
+ ]
786
+
787
+ [package.dependencies]
788
+ colorama = {version = "*", markers = "sys_platform == \"win32\""}
789
+ decorator = "*"
790
+ ipython-pygments-lexers = "*"
791
+ jedi = ">=0.16"
792
+ matplotlib-inline = "*"
793
+ pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""}
794
+ prompt_toolkit = ">=3.0.41,<3.1.0"
795
+ pygments = ">=2.4.0"
796
+ stack_data = "*"
797
+ traitlets = ">=5.13.0"
798
+
799
+ [package.extras]
800
+ all = ["ipython[doc,matplotlib,test,test-extra]"]
801
+ black = ["black"]
802
+ doc = ["docrepr", "exceptiongroup", "intersphinx_registry", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinx_toml (==0.0.4)", "typing_extensions"]
803
+ matplotlib = ["matplotlib"]
804
+ test = ["packaging", "pytest", "pytest-asyncio (<0.22)", "testpath"]
805
+ test-extra = ["curio", "ipykernel", "ipython[test]", "jupyter_ai", "matplotlib (!=3.2.0)", "nbclient", "nbformat", "numpy (>=1.23)", "pandas", "trio"]
806
+
807
+ [[package]]
808
+ name = "ipython-pygments-lexers"
809
+ version = "1.1.1"
810
+ description = "Defines a variety of Pygments lexers for highlighting IPython code."
811
+ optional = false
812
+ python-versions = ">=3.8"
813
+ groups = ["main"]
814
+ files = [
815
+ {file = "ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c"},
816
+ {file = "ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81"},
817
+ ]
818
+
819
+ [package.dependencies]
820
+ pygments = "*"
821
+
822
+ [[package]]
823
+ name = "jedi"
824
+ version = "0.19.2"
825
+ description = "An autocompletion tool for Python that can be used for text editors."
826
+ optional = false
827
+ python-versions = ">=3.6"
828
+ groups = ["main"]
829
+ files = [
830
+ {file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"},
831
+ {file = "jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0"},
832
+ ]
833
+
834
+ [package.dependencies]
835
+ parso = ">=0.8.4,<0.9.0"
836
+
837
+ [package.extras]
838
+ docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx (==1.8.5)", "sphinx-rtd-theme (==0.4.3)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"]
839
+ qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
840
+ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"]
841
+
842
  [[package]]
843
  name = "jinja2"
844
  version = "3.1.6"
 
1272
  {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
1273
  ]
1274
 
1275
+ [[package]]
1276
+ name = "matplotlib-inline"
1277
+ version = "0.1.7"
1278
+ description = "Inline Matplotlib backend for Jupyter"
1279
+ optional = false
1280
+ python-versions = ">=3.8"
1281
+ groups = ["main"]
1282
+ files = [
1283
+ {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"},
1284
+ {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"},
1285
+ ]
1286
+
1287
+ [package.dependencies]
1288
+ traitlets = "*"
1289
+
1290
  [[package]]
1291
  name = "mcp"
1292
  version = "1.9.3"
 
1648
  test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
1649
  xml = ["lxml (>=4.9.2)"]
1650
 
1651
+ [[package]]
1652
+ name = "parso"
1653
+ version = "0.8.4"
1654
+ description = "A Python Parser"
1655
+ optional = false
1656
+ python-versions = ">=3.6"
1657
+ groups = ["main"]
1658
+ files = [
1659
+ {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"},
1660
+ {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"},
1661
+ ]
1662
+
1663
+ [package.extras]
1664
+ qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
1665
+ testing = ["docopt", "pytest"]
1666
+
1667
+ [[package]]
1668
+ name = "pexpect"
1669
+ version = "4.9.0"
1670
+ description = "Pexpect allows easy control of interactive console applications."
1671
+ optional = false
1672
+ python-versions = "*"
1673
+ groups = ["main"]
1674
+ markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""
1675
+ files = [
1676
+ {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"},
1677
+ {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"},
1678
+ ]
1679
+
1680
+ [package.dependencies]
1681
+ ptyprocess = ">=0.5"
1682
+
1683
  [[package]]
1684
  name = "pillow"
1685
  version = "11.2.1"
 
1780
  typing = ["typing-extensions ; python_version < \"3.10\""]
1781
  xmp = ["defusedxml"]
1782
 
1783
+ [[package]]
1784
+ name = "prompt-toolkit"
1785
+ version = "3.0.51"
1786
+ description = "Library for building powerful interactive command lines in Python"
1787
+ optional = false
1788
+ python-versions = ">=3.8"
1789
+ groups = ["main"]
1790
+ files = [
1791
+ {file = "prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07"},
1792
+ {file = "prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed"},
1793
+ ]
1794
+
1795
+ [package.dependencies]
1796
+ wcwidth = "*"
1797
+
1798
+ [[package]]
1799
+ name = "ptyprocess"
1800
+ version = "0.7.0"
1801
+ description = "Run a subprocess in a pseudo terminal"
1802
+ optional = false
1803
+ python-versions = "*"
1804
+ groups = ["main"]
1805
+ markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""
1806
+ files = [
1807
+ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
1808
+ {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
1809
+ ]
1810
+
1811
+ [[package]]
1812
+ name = "pure-eval"
1813
+ version = "0.2.3"
1814
+ description = "Safely evaluate AST nodes without side effects"
1815
+ optional = false
1816
+ python-versions = "*"
1817
+ groups = ["main"]
1818
+ files = [
1819
+ {file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"},
1820
+ {file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"},
1821
+ ]
1822
+
1823
+ [package.extras]
1824
+ tests = ["pytest"]
1825
+
1826
  [[package]]
1827
  name = "pycparser"
1828
  version = "2.22"
 
2013
  optional = false
2014
  python-versions = ">=3.8"
2015
  groups = ["main"]
 
2016
  files = [
2017
  {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
2018
  {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
 
2516
  granian = ["granian (>=2.3.1)"]
2517
  uvicorn = ["uvicorn (>=0.34.0)"]
2518
 
2519
+ [[package]]
2520
+ name = "stack-data"
2521
+ version = "0.6.3"
2522
+ description = "Extract data from python stack frames and tracebacks for informative displays"
2523
+ optional = false
2524
+ python-versions = "*"
2525
+ groups = ["main"]
2526
+ files = [
2527
+ {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"},
2528
+ {file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"},
2529
+ ]
2530
+
2531
+ [package.dependencies]
2532
+ asttokens = ">=2.1.0"
2533
+ executing = ">=1.2.0"
2534
+ pure-eval = "*"
2535
+
2536
+ [package.extras]
2537
+ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
2538
+
2539
  [[package]]
2540
  name = "starlette"
2541
  version = "0.46.2"
 
2652
  slack = ["slack-sdk"]
2653
  telegram = ["requests"]
2654
 
2655
+ [[package]]
2656
+ name = "traitlets"
2657
+ version = "5.14.3"
2658
+ description = "Traitlets Python configuration system"
2659
+ optional = false
2660
+ python-versions = ">=3.8"
2661
+ groups = ["main"]
2662
+ files = [
2663
+ {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"},
2664
+ {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"},
2665
+ ]
2666
+
2667
+ [package.extras]
2668
+ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
2669
+ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"]
2670
+
2671
  [[package]]
2672
  name = "typer"
2673
  version = "0.16.0"
 
2764
  [package.extras]
2765
  standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"]
2766
 
2767
+ [[package]]
2768
+ name = "wcwidth"
2769
+ version = "0.2.13"
2770
+ description = "Measures the displayed width of unicode strings in a terminal"
2771
+ optional = false
2772
+ python-versions = "*"
2773
+ groups = ["main"]
2774
+ files = [
2775
+ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
2776
+ {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
2777
+ ]
2778
+
2779
  [[package]]
2780
  name = "websockets"
2781
  version = "15.0.1"
 
3104
  [metadata]
3105
  lock-version = "2.1"
3106
  python-versions = ">=3.13,<4.0"
3107
+ content-hash = "1c1c843aa68874643d9202518e0a9f2b71885314c4007df9916b81ffb66a7d0d"
pyproject.toml CHANGED
@@ -13,7 +13,8 @@ dependencies = [
13
  "langchain-openai (>=0.3.21,<0.4.0)",
14
  "langchain-mcp-adapters (>=0.1.7,<0.2.0)",
15
  "dotenv (>=0.9.9,<0.10.0)",
16
- "langchain (>=0.3.25,<0.4.0)"
 
17
  ]
18
 
19
 
 
13
  "langchain-openai (>=0.3.21,<0.4.0)",
14
  "langchain-mcp-adapters (>=0.1.7,<0.2.0)",
15
  "dotenv (>=0.9.9,<0.10.0)",
16
+ "langchain (>=0.3.25,<0.4.0)",
17
+ "ipython (>=9.3.0,<10.0.0)"
18
  ]
19
 
20
 
src/agent.py CHANGED
@@ -1,77 +1,16 @@
1
  import asyncio
2
- from typing import Dict, Any, TypedDict, Annotated, List
3
  from dotenv import load_dotenv
4
 
5
- from langchain_core.messages import BaseMessage, AIMessage
6
  from langchain_mcp_adapters.client import MultiServerMCPClient
7
- from langgraph.graph import StateGraph, END
8
- from langgraph.graph.message import add_messages
9
- from langgraph.checkpoint.memory import MemorySaver
10
-
11
- from .nodes import (
12
- router_node,
13
- script_generator_node,
14
- planner_node,
15
- audio_processor_node,
16
- validator_node,
17
- final_response_node
18
- )
19
- from .nodes.chat import chat_node
20
-
21
-
22
- class AudioProcessingState(TypedDict):
23
- """State schema for the audio processing graph."""
24
-
25
- # Chat history
26
- messages: Annotated[List[BaseMessage], add_messages]
27
-
28
- # Audio files provided by user
29
- audio_files: List[str]
30
-
31
- # User's processing request
32
- user_request: str
33
-
34
- # Processing type determined by router
35
- processing_type: str
36
-
37
- # Generated scripts with timestamps
38
- scripts: Dict[str, Any]
39
-
40
- # Execution plan created by planner
41
- execution_plan: List[Dict[str, Any]]
42
-
43
- # Processing results
44
- processed_files: Dict[str, str]
45
-
46
- # Processing steps completed
47
- completed_steps: List[str]
48
-
49
- # Final output
50
- final_audio_url: str
51
- final_response: str
52
-
53
- # Error handling
54
- errors: List[str]
55
- needs_reprocessing: bool
56
-
57
- # Metadata
58
- processing_metadata: Dict[str, Any]
59
 
 
 
 
 
 
60
 
61
  class AudioAgent:
62
- """
63
- Advanced LangGraph-based audio processing agent with custom nodes.
64
-
65
- Handles audio file processing through a sophisticated workflow:
66
- 1. Router - Determines processing type
67
- 2. Chat or Audio Processing Pipeline
68
- 3. Script Generation - Creates timestamped transcripts
69
- 4. Planning - Creates execution plan
70
- 5. Processing - Executes audio tools
71
- 6. Validation - Checks results and determines reprocessing
72
- 7. Final Response - Formats output for user
73
- """
74
-
75
  def __init__(
76
  self,
77
  model_name: str = "gpt-4o",
@@ -80,229 +19,72 @@ class AudioAgent:
80
  load_dotenv()
81
  self.model_name = model_name
82
  self.server_url = server_url
 
83
 
84
- # SSE client for audio tools
85
  self._client = MultiServerMCPClient({
86
  "audio-tools": {"url": self.server_url, "transport": "sse"}
87
  })
88
 
89
- self._graph = None
90
- self._tools = []
91
-
92
  @property
93
  def is_initialized(self) -> bool:
94
- return self._graph is not None
95
-
96
- async def initialize(self) -> None:
97
- """Initialize the LangGraph workflow with audio tools."""
98
- if self.is_initialized:
99
- return
100
 
101
- # Get tools from MCP server
102
- self._tools = await self._client.get_tools()
103
- if not self._tools:
104
- raise RuntimeError("No tools available from MCP server")
105
 
106
- # Build the graph
107
- self._graph = self._build_graph()
108
 
109
- def _build_graph(self) -> StateGraph:
110
- """Build the LangGraph workflow."""
111
-
112
- # Create the state graph
113
- workflow = StateGraph(AudioProcessingState)
114
-
115
- # Add nodes
116
- workflow.add_node("router", self._router_async)
117
- workflow.add_node("chat", self._chat_with_tools)
118
- workflow.add_node("script_generator", self._script_generator_with_tools)
119
- workflow.add_node("planner", self._planner_async)
120
- workflow.add_node("audio_processor", self._audio_processor_with_tools)
121
- workflow.add_node("validator", self._validator_async)
122
- workflow.add_node("response_formatter", self._final_response_async)
123
-
124
- # Set entry point
125
- workflow.set_entry_point("router")
126
-
127
- # Add conditional edges based on processing type
128
- workflow.add_conditional_edges(
129
- "router",
130
- self._route_processing_type,
131
  {
132
- "chat": "chat",
133
- "audio_processing": "script_generator",
134
- "dialogue_generation": "script_generator"
135
  }
136
  )
137
-
138
- # Chat flow
139
- workflow.add_edge("chat", "response_formatter")
140
-
141
- # Audio processing flow
142
- workflow.add_edge("script_generator", "planner")
143
- workflow.add_edge("planner", "audio_processor")
144
- workflow.add_edge("audio_processor", "validator")
145
-
146
- # Validation flow with conditional reprocessing
147
- workflow.add_conditional_edges(
148
  "validator",
149
- self._check_reprocessing_need,
150
  {
151
- "reprocess": "planner", # Go back to planning
152
- "complete": "response_formatter"
153
  }
154
  )
155
-
156
- # Final response leads to end
157
- workflow.add_edge("response_formatter", END)
158
-
159
- # Compile with memory for conversation history
160
- memory = MemorySaver()
161
- return workflow.compile(checkpointer=memory)
162
 
163
- async def _chat_with_tools(self, state: Dict[str, Any]) -> Dict[str, Any]:
164
- """Chat node with tools access."""
165
- return await chat_node(state, self._tools)
166
 
167
- async def _script_generator_with_tools(self, state: Dict[str, Any]) -> Dict[str, Any]:
168
- """Script generator node with tools access."""
169
- return await script_generator_node(state, self._tools)
170
-
171
- async def _audio_processor_with_tools(self, state: Dict[str, Any]) -> Dict[str, Any]:
172
- """Audio processor node with tools access."""
173
- return await audio_processor_node(state, self._tools)
174
-
175
- async def _validator_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
176
- """Async validator node wrapper."""
177
- return await validator_node(state)
178
-
179
- async def _router_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
180
- """Async router node wrapper."""
181
- return await router_node(state)
182
-
183
- async def _planner_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
184
- """Async planner node wrapper."""
185
- return await planner_node(state)
186
-
187
- async def _final_response_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
188
- """Async final response node wrapper."""
189
- return await final_response_node(state)
190
-
191
- def _route_processing_type(self, state: Dict[str, Any]) -> str:
192
- """Route based on processing type."""
193
- return state.get("processing_type", "chat")
194
-
195
- def _check_reprocessing_need(self, state: Dict[str, Any]) -> str:
196
- """Check if reprocessing is needed."""
197
- if state.get("needs_reprocessing", False):
198
- return "reprocess"
199
- return "complete"
200
-
201
- def process_user_input(self, user_input: str) -> Dict[str, Any]:
202
- """Process user input and create initial state."""
203
- from langchain_core.messages import HumanMessage
204
-
205
- return {
206
- "messages": [HumanMessage(content=user_input)],
207
- "audio_files": [],
208
- "user_request": "",
209
- "processing_type": "",
210
- "scripts": {},
211
- "execution_plan": [],
212
- "processed_files": {},
213
- "completed_steps": [],
214
- "final_audio_url": "",
215
- "final_response": "",
216
- "errors": [],
217
- "needs_reprocessing": False,
218
- "processing_metadata": {}
219
- }
220
-
221
- async def chat(self, prompt: str) -> Dict[str, Any]:
222
- """
223
- One-shot chat: returns the full processing result.
224
- """
225
- if not self.is_initialized:
226
- await self.initialize()
227
-
228
- config = {"configurable": {"thread_id": "audio_agent_session"}}
229
- initial_state = self.process_user_input(prompt)
230
-
231
- result = await self._graph.ainvoke(initial_state, config)
232
- return result
233
-
234
- async def stream_chat(self, prompt: str):
235
- """
236
- Streaming chat: yields intermediate results as processing continues.
237
- """
238
- if not self.is_initialized:
239
- await self.initialize()
240
 
241
- config = {"configurable": {"thread_id": "audio_agent_session"}}
242
- initial_state = self.process_user_input(prompt)
243
-
244
- # Special handling for chat-only requests to enable streaming
245
- processing_type = None
246
-
247
- # First, run the router to determine processing type
248
- router_result = await self._graph.ainvoke(initial_state, config)
249
- processing_type = router_result.get("processing_type", "")
250
-
251
- if processing_type == "chat":
252
- # For chat requests, use direct streaming from the chat node
253
- from .nodes.chat import stream_chat_response
254
- messages = initial_state.get("messages", [])
255
-
256
- accumulated_content = ""
257
- async for chunk in stream_chat_response(messages, self._tools):
258
- accumulated_content += chunk
259
- yield chunk, "chat"
260
-
261
- # Update the state with the final response
262
- final_state = router_result.copy()
263
- final_state["messages"].append(AIMessage(content=accumulated_content))
264
- final_state["final_response"] = accumulated_content
265
-
266
- else:
267
- # For audio processing, use the normal graph streaming
268
- async for chunk in self._graph.astream(initial_state, config):
269
- # Extract the node name and content
270
- for node_name, node_output in chunk.items():
271
- if node_name == "__end__":
272
- continue
273
-
274
- # Get the latest message if available
275
- messages = node_output.get("messages", [])
276
- if messages and hasattr(messages[-1], 'content'):
277
- content = messages[-1].content
278
- if content:
279
- yield content, node_name
280
-
281
- # Also yield final audio URL if available
282
- final_audio_url = node_output.get("final_audio_url", "")
283
- if final_audio_url:
284
- yield f"\n🎵 **Audio Ready**: [{final_audio_url}]({final_audio_url})", node_name
285
 
 
286
 
287
  async def main():
288
  """Test the agent with various scenarios."""
289
  agent = AudioAgent()
290
-
291
- # Test 1: Chat about capabilities
292
- print("=== Test 1: Chat Query ===")
293
- result = await agent.chat("What audio tools are available?")
294
- print("Final Response:", result.get("final_response", ""))
295
- print()
296
-
297
- # Test 2: Audio processing request
298
- print("=== Test 2: Audio Processing ===")
299
- audio_request = "Process this audio file https://example.com/audio.mp3 - remove filler words and normalize volume"
300
-
301
- print("Streaming response:")
302
- async for content, node in agent.stream_chat(audio_request):
303
- print(f"[{node}] {content[:100]}..." if len(content) > 100 else f"[{node}] {content}")
304
- print()
305
 
 
306
 
307
  if __name__ == "__main__":
308
  asyncio.run(main())
 
1
  import asyncio
 
2
  from dotenv import load_dotenv
3
 
 
4
  from langchain_mcp_adapters.client import MultiServerMCPClient
5
+ from langgraph.graph import StateGraph, END, START
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ from .state import AgentState
8
+ from .nodes.chat import chat_node, chat_node_router
9
+ from .nodes.planner import planner_node
10
+ from .nodes.processor import processor_node
11
+ from .nodes.validator import validator_node, validator_node_router
12
 
13
  class AudioAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def __init__(
15
  self,
16
  model_name: str = "gpt-4o",
 
19
  load_dotenv()
20
  self.model_name = model_name
21
  self.server_url = server_url
22
+ self.graph = None
23
 
 
24
  self._client = MultiServerMCPClient({
25
  "audio-tools": {"url": self.server_url, "transport": "sse"}
26
  })
27
 
 
 
 
28
  @property
29
  def is_initialized(self) -> bool:
30
+ return self.graph is not None
 
 
 
 
 
31
 
32
+ async def _build_graph(self) -> None:
33
+ """Build the LangGraph workflow."""
 
 
34
 
35
+ _graph = StateGraph(AgentState)
 
36
 
37
+ _graph.add_node("chat", chat_node)
38
+ _graph.add_conditional_edges(
39
+ "chat",
40
+ chat_node_router,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  {
42
+ "planner": "planner",
43
+ "end": END
 
44
  }
45
  )
46
+
47
+ _graph.add_node("planner", planner_node)
48
+ _graph.add_edge("planner", "audio_processor")
49
+
50
+ _graph.add_node("audio_processor", processor_node)
51
+ _graph.add_edge("audio_processor", "validator")
52
+
53
+ _graph.add_node("validator", validator_node)
54
+ _graph.add_conditional_edges(
 
 
55
  "validator",
56
+ validator_node_router,
57
  {
58
+ "chat": "chat",
59
+ "planner": "planner"
60
  }
61
  )
 
 
 
 
 
 
 
62
 
63
+ _graph.add_edge(START, "chat")
64
+ _graph.add_edge("chat", END)
65
+ self.graph = _graph.compile()
66
 
67
+ async def initialize(self) -> None:
68
+ """Initialize the LangGraph workflow with audio tools."""
69
+ if self.is_initialized:
70
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ self.tools = await self._client.get_tools()
73
+ if not self.tools:
74
+ raise RuntimeError("No tools available from MCP server")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ await self._build_graph()
77
 
78
  async def main():
79
  """Test the agent with various scenarios."""
80
  agent = AudioAgent()
81
+ await agent.initialize()
82
+
83
+ res = agent.graph.invoke({
84
+ "user_input": "I want to edit my audio file",
85
+ })
 
 
 
 
 
 
 
 
 
 
86
 
87
+ print(res)
88
 
89
  if __name__ == "__main__":
90
  asyncio.run(main())
src/nodes/__init__.py DELETED
@@ -1,19 +0,0 @@
1
- """
2
- Audio processing graph nodes.
3
- """
4
-
5
- from .router import router_node
6
- from .script_generator import script_generator_node
7
- from .planner import planner_node
8
- from .audio_processor import audio_processor_node
9
- from .validator import validator_node
10
- from .final_response import final_response_node
11
-
12
- __all__ = [
13
- "router_node",
14
- "script_generator_node",
15
- "planner_node",
16
- "audio_processor_node",
17
- "validator_node",
18
- "final_response_node"
19
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/audio_processor.py DELETED
@@ -1,257 +0,0 @@
1
- """
2
- Intelligent LLM-powered audio processor for executing planned processing steps.
3
- """
4
-
5
- from typing import Dict, Any, List
6
- from langchain_core.messages import AIMessage, SystemMessage
7
- from langchain_openai import ChatOpenAI
8
- import json
9
- import re
10
-
11
- from .prompts import (
12
- LLM_PROCESSING_DECISION_PROMPT_TEMPLATE,
13
- LLM_PROCESSING_SUMMARY_PROMPT_TEMPLATE,
14
- )
15
-
16
-
17
- async def audio_processor_node(state: Dict[str, Any], tools: list) -> Dict[str, Any]:
18
- """
19
- Execute audio processing plan with intelligent LLM-guided decisions.
20
- """
21
-
22
- execution_plan = state.get("execution_plan", [])
23
- user_request = state.get("user_request", "")
24
- processed_files = state.get("processed_files", {})
25
- completed_steps = state.get("completed_steps", [])
26
- errors = state.get("errors", [])
27
-
28
- if not execution_plan:
29
- return create_no_plan_response(state)
30
-
31
- # Create tool lookup
32
- tool_lookup = {tool.name: tool for tool in tools}
33
-
34
- # Track current file URLs through processing
35
- current_file_urls = {}
36
-
37
- # Execute plan with LLM guidance
38
- for i, step in enumerate(execution_plan):
39
- # Get LLM decision for this step
40
- should_execute, adapted_params = await get_llm_processing_decision(
41
- step, user_request, current_file_urls, completed_steps, errors, list(tool_lookup.keys())
42
- )
43
-
44
- if not should_execute:
45
- completed_steps.append(f"⏭️ Skipped: {step.get('description', 'unknown')}")
46
- continue
47
-
48
- # Execute the step
49
- step_result = await execute_processing_step(
50
- step, adapted_params, tool_lookup, current_file_urls
51
- )
52
-
53
- if step_result["success"]:
54
- if step_result["new_file_url"]:
55
- original_file = step_result["original_file"]
56
- current_file_urls[original_file] = step_result["new_file_url"]
57
- processed_files[original_file] = step_result["new_file_url"]
58
- completed_steps.append(f"✅ {step_result['description']}")
59
- else:
60
- errors.append(step_result["error"])
61
- completed_steps.append(f"❌ Failed: {step.get('description', 'unknown')}")
62
-
63
- # Create processing summary
64
- processing_summary = await create_llm_processing_summary(
65
- user_request, completed_steps, errors, processed_files
66
- )
67
-
68
- messages = state.get("messages", [])
69
- messages.append(AIMessage(content=processing_summary))
70
-
71
- return {
72
- "processed_files": processed_files,
73
- "completed_steps": completed_steps,
74
- "errors": errors,
75
- "needs_reprocessing": len(errors) > 0 and len(completed_steps) > 0,
76
- "final_audio_url": get_primary_output_file(processed_files),
77
- "messages": messages
78
- }
79
-
80
-
81
- async def get_llm_processing_decision(
82
- step: Dict[str, Any],
83
- user_request: str,
84
- current_file_urls: Dict[str, str],
85
- completed_steps: List[str],
86
- errors: List[str],
87
- available_tools: List[str]
88
- ) -> tuple:
89
- """Use LLM to decide whether to execute step and with what parameters."""
90
-
91
- llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
92
-
93
- prompt = LLM_PROCESSING_DECISION_PROMPT_TEMPLATE.format(
94
- tool_name=step.get('tool', 'unknown'),
95
- description=step.get('description', 'No description'),
96
- planned_parameters=json.dumps(step.get('params', {}), indent=2),
97
- user_request=user_request,
98
- completed_steps_count=len(completed_steps),
99
- error_count=len(errors),
100
- available_tools=', '.join(available_tools),
101
- current_file_urls=json.dumps(current_file_urls, indent=2),
102
- recent_activity="\n".join(completed_steps[-3:]) if completed_steps else "No steps completed yet"
103
- )
104
-
105
- try:
106
- response = await llm.ainvoke([SystemMessage(content=prompt)])
107
- content = response.content.strip()
108
-
109
- if content.startswith("SKIP"):
110
- return False, {}
111
- elif content.startswith("EXECUTE"):
112
- lines = content.split('\n')
113
- if len(lines) > 1 and lines[1].strip() != "NO_CHANGES":
114
- try:
115
- adapted_params = json.loads(lines[1])
116
- return True, adapted_params
117
- except json.JSONDecodeError:
118
- return True, {}
119
- return True, {}
120
- else:
121
- return True, {} # Default to execute if unclear
122
-
123
- except Exception as e:
124
- return True, {} # Default to execute on error
125
-
126
-
127
- async def execute_processing_step(
128
- step: Dict[str, Any],
129
- adapted_params: Dict[str, Any],
130
- tool_lookup: Dict[str, Any],
131
- current_file_urls: Dict[str, str]
132
- ) -> Dict[str, Any]:
133
- """Execute a processing step with the given parameters."""
134
-
135
- tool_name = step.get("tool", "")
136
- params = step.get("params", {}).copy()
137
- params.update(adapted_params) # Apply LLM adaptations
138
-
139
- if tool_name not in tool_lookup:
140
- return {
141
- "success": False,
142
- "error": f"Tool '{tool_name}' not available",
143
- "description": f"Failed to find tool {tool_name}",
144
- "original_file": params.get("audio_file", ""),
145
- "new_file_url": None
146
- }
147
-
148
- try:
149
- # Update file URL if this file has been processed before
150
- original_file = params.get("audio_file", "")
151
- if original_file in current_file_urls:
152
- params["audio_file"] = current_file_urls[original_file]
153
-
154
- # Execute the tool
155
- tool = tool_lookup[tool_name]
156
- result = await tool.ainvoke(params)
157
-
158
- # Extract new file URL from result
159
- new_file_url = extract_file_url_from_result(result, params["audio_file"])
160
-
161
- return {
162
- "success": True,
163
- "description": f"{tool_name}: {step.get('description', '')}",
164
- "original_file": original_file,
165
- "new_file_url": new_file_url if new_file_url != params["audio_file"] else None,
166
- "result": result
167
- }
168
-
169
- except Exception as e:
170
- return {
171
- "success": False,
172
- "error": f"{tool_name} failed: {str(e)}",
173
- "description": f"Failed {tool_name}",
174
- "original_file": params.get("audio_file", ""),
175
- "new_file_url": None
176
- }
177
-
178
-
179
- async def create_llm_processing_summary(
180
- user_request: str,
181
- completed_steps: List[str],
182
- errors: List[str],
183
- processed_files: Dict[str, str]
184
- ) -> str:
185
- """Create LLM-generated processing summary."""
186
-
187
- llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
188
-
189
- prompt = LLM_PROCESSING_SUMMARY_PROMPT_TEMPLATE.format(
190
- user_request=user_request,
191
- completed_steps_count=len(completed_steps),
192
- error_count=len(errors),
193
- processed_files_count=len(processed_files),
194
- step_details="\n".join(completed_steps[-5:]) if completed_steps else "No steps completed",
195
- processed_files=json.dumps(processed_files, indent=2) if processed_files else "No files processed",
196
- errors="\n".join(errors) if errors else "No errors"
197
- )
198
-
199
- try:
200
- response = await llm.ainvoke([SystemMessage(content=prompt)])
201
- return f"🎛️ **Processing Summary**\n\n{response.content}"
202
- except Exception as e:
203
- # Fallback summary
204
- if processed_files:
205
- return f"🎛️ **Processing Complete**\n\nSuccessfully processed {len(processed_files)} file(s) with {len(completed_steps)} steps completed."
206
- else:
207
- return f"⚠️ **Processing Issues**\n\nEncountered {len(errors)} error(s) during processing. Please check the issues above."
208
-
209
-
210
- def extract_file_url_from_result(result, original_file: str) -> str:
211
- """Extract the new file URL from tool result."""
212
-
213
- if hasattr(result, 'artifact') and result.artifact:
214
- if hasattr(result.artifact, 'url'):
215
- return result.artifact.url
216
- elif hasattr(result.artifact, 'path'):
217
- return result.artifact.path
218
-
219
- if hasattr(result, 'content'):
220
- content = result.content
221
- # Look for URLs in the content
222
- url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+\.(mp3|wav|m4a|flac|aac|ogg)'
223
- urls = re.findall(url_pattern, content, re.IGNORECASE)
224
- if urls:
225
- return urls[0]
226
-
227
- return original_file
228
-
229
-
230
- def get_primary_output_file(processed_files: Dict[str, str]) -> str:
231
- """Get the primary output file URL."""
232
-
233
- if not processed_files:
234
- return ""
235
-
236
- # If there's a combined file, prioritize that
237
- for original, processed in processed_files.items():
238
- if "combined" in processed.lower():
239
- return processed
240
-
241
- # Otherwise return the first processed file
242
- return list(processed_files.values())[0]
243
-
244
-
245
- def create_no_plan_response(state: Dict[str, Any]) -> Dict[str, Any]:
246
- """Handle case when no execution plan is available."""
247
-
248
- messages = state.get("messages", [])
249
- messages.append(AIMessage(content="❌ **No Execution Plan**: Cannot process audio without a plan."))
250
-
251
- return {
252
- "processed_files": {},
253
- "completed_steps": [],
254
- "errors": ["No execution plan available"],
255
- "messages": messages,
256
- "needs_reprocessing": False
257
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/chat.py CHANGED
@@ -1,181 +1,34 @@
1
- """
2
- Chat node for handling general questions and conversations using streaming LLM.
3
- """
4
-
5
- from typing import Dict, Any, List, AsyncGenerator
6
- from langchain_core.messages import AIMessage, SystemMessage
7
  from langchain_openai import ChatOpenAI
 
 
 
 
8
 
9
- from .prompts import (
10
- CHAT_SYSTEM_PROMPT_BASE,
11
- CHAT_SYSTEM_PROMPT_TOOLS_HEADER,
12
- CHAT_SYSTEM_PROMPT_GUIDELINES,
13
- )
14
-
15
- # Export the streaming function for direct use
16
- __all__ = ["chat_node", "stream_chat_response"]
17
-
18
-
19
- async def chat_node(state: Dict[str, Any], tools: List = None) -> Dict[str, Any]:
20
  """
21
- Handle general chat messages and questions using streaming LLM with tool awareness.
22
- Returns the complete response after streaming is done.
23
  """
24
-
25
- messages = state.get("messages", [])
26
-
27
- if not messages:
28
- return {
29
- "messages": messages,
30
- "final_response": "No messages to process."
31
- }
32
-
33
- # Generate streaming response and collect it
34
- response_content = ""
35
- async for chunk in stream_chat_response(messages, tools):
36
- response_content += chunk
37
-
38
- # Add AI response to messages
39
- messages.append(AIMessage(content=response_content))
40
-
41
- return {
42
- "messages": messages,
43
- "final_response": response_content
44
- }
45
 
46
-
47
- async def stream_chat_response(messages: List, tools: List = None) -> AsyncGenerator[str, None]:
48
- """
49
- Stream chat response chunks as they're generated by the LLM.
50
- This is the core streaming function that yields content incrementally.
51
- """
52
-
53
- # Create system message with tool information
54
- system_message = create_system_message_with_tools(tools or [])
55
 
56
- # Prepare messages for LLM
57
- llm_messages = [system_message] + messages
58
-
59
- # Initialize LLM
60
- llm = ChatOpenAI(model="gpt-4o", temperature=0.7)
61
-
62
- response_content = ""
63
- full_response = None
64
-
65
- # Stream the main response
66
- if tools:
67
- llm_with_tools = llm.bind_tools(tools)
68
-
69
- async for chunk in llm_with_tools.astream(llm_messages):
70
- if chunk.content:
71
- response_content += chunk.content
72
- yield chunk.content # Yield each chunk as it comes
73
-
74
- # Keep track of the complete response for tool calls
75
- full_response = chunk
76
- else:
77
- async for chunk in llm.astream(llm_messages):
78
- if chunk.content:
79
- response_content += chunk.content
80
- yield chunk.content # Yield each chunk as it comes
81
-
82
- full_response = chunk
83
-
84
- # Handle tool calls if any (after main streaming is complete)
85
- if full_response and hasattr(full_response, 'tool_calls') and full_response.tool_calls:
86
- tool_results_content = await handle_tool_calls(full_response, tools)
87
- if tool_results_content:
88
- yield tool_results_content
89
-
90
-
91
- def create_system_message_with_tools(tools: List) -> SystemMessage:
92
- """Create a comprehensive system message that includes tool information."""
93
-
94
- # Basic system prompt
95
- system_content = CHAT_SYSTEM_PROMPT_BASE
96
-
97
- # Add tool descriptions if available
98
- if tools:
99
- system_content += CHAT_SYSTEM_PROMPT_TOOLS_HEADER
100
-
101
- for tool in tools:
102
- tool_name = getattr(tool, 'name', 'Unknown Tool')
103
- tool_description = getattr(tool, 'description', 'No description available')
104
-
105
- # Get tool parameters
106
- tool_args = getattr(tool, 'args_schema', None)
107
- if tool_args and hasattr(tool_args, 'schema'):
108
- schema = tool_args.schema()
109
- properties = schema.get('properties', {})
110
-
111
- system_content += f"\n**{tool_name}**:\n"
112
- system_content += f"- Description: {tool_description}\n"
113
-
114
- if properties:
115
- system_content += "- Parameters:\n"
116
- for param_name, param_info in properties.items():
117
- param_type = param_info.get('type', 'unknown')
118
- param_desc = param_info.get('description', 'No description')
119
- system_content += f" • {param_name} ({param_type}): {param_desc}\n"
120
-
121
- system_content += "\n"
122
-
123
- system_content += CHAT_SYSTEM_PROMPT_GUIDELINES
124
-
125
- return SystemMessage(content=system_content)
126
-
127
 
128
- async def handle_tool_calls(response, tools: List) -> str:
129
- """Handle tool calls made by the LLM during chat."""
130
-
131
- tool_lookup = {tool.name: tool for tool in tools}
132
- tool_results = []
133
-
134
- for tool_call in response.tool_calls:
135
- tool_name = tool_call["name"]
136
- tool_args = tool_call["args"]
137
-
138
- if tool_name in tool_lookup:
139
- try:
140
- tool = tool_lookup[tool_name]
141
- result = await tool.ainvoke(tool_args)
142
-
143
- # Format the tool result for display
144
- tool_result_text = format_tool_result(tool_name, tool_args, result)
145
- tool_results.append(tool_result_text)
146
-
147
- except Exception as e:
148
- error_msg = f"❌ Tool '{tool_name}' failed: {str(e)}"
149
- tool_results.append(error_msg)
150
- else:
151
- error_msg = f"❌ Tool '{tool_name}' not available"
152
- tool_results.append(error_msg)
153
-
154
- return "\n\n" + "\n\n".join(tool_results) if tool_results else ""
155
 
 
156
 
157
- def format_tool_result(tool_name: str, tool_args: Dict[str, Any], result) -> str:
158
- """Format tool execution results for display in chat."""
159
-
160
- formatted_result = f"\n\n🔧 **Tool Demo: {tool_name}**\n"
161
-
162
- # Show parameters used
163
- if tool_args:
164
- formatted_result += "**Parameters used:**\n"
165
- for key, value in tool_args.items():
166
- formatted_result += f"- {key}: {value}\n"
167
-
168
- # Show result
169
- formatted_result += "\n**Result:**\n"
170
-
171
- if hasattr(result, 'content'):
172
- formatted_result += f"{result.content}"
173
- elif hasattr(result, 'artifact'):
174
- if hasattr(result.artifact, 'url'):
175
- formatted_result += f"🎵 Audio processed: {result.artifact.url}"
176
- else:
177
- formatted_result += f"{result.artifact}"
178
  else:
179
- formatted_result += f"{str(result)}"
180
-
181
- return formatted_result
 
 
 
 
 
 
 
1
  from langchain_openai import ChatOpenAI
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from langchain_core.runnables import RunnableParallel
4
+ from src.state import AgentState
5
+ from operator import itemgetter
6
 
7
+ def chat_node(state: AgentState) -> AgentState:
 
 
 
 
 
 
 
 
 
 
8
  """
9
+ Handle general questions and conversations using streaming LLM.
 
10
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ llm = ChatOpenAI(model="gpt-4.1")
13
+ llm = llm.with_structured_output(AgentState)
 
 
 
 
 
 
 
14
 
15
+ prompt = ChatPromptTemplate.from_messages([
16
+ ("system", "You are a helpful assistant that can answer questions and help with tasks."),
17
+ ("user", "Current state: {state}")
18
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ chain = (
21
+ RunnableParallel({
22
+ "state": itemgetter("state")
23
+ })
24
+ | prompt
25
+ | llm
26
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ return chain.invoke({"state": state})
29
 
30
+ def chat_node_router(state: AgentState) -> str:
31
+ if state.requires_processing:
32
+ return "audio_processor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  else:
34
+ return "end"
 
 
src/nodes/final_response.py DELETED
@@ -1,299 +0,0 @@
1
- """
2
- Intelligent LLM-powered final response formatter for comprehensive user communication.
3
- """
4
-
5
- from typing import Dict, Any, List
6
- from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
7
- from langchain_openai import ChatOpenAI
8
- from langchain_core.output_parsers import PydanticOutputParser
9
- from pydantic import BaseModel, Field
10
- import json
11
-
12
- from .prompts import (
13
- FINAL_RESPONSE_SYSTEM_PROMPT,
14
- FINAL_RESPONSE_USER_PROMPT_TEMPLATE,
15
- )
16
-
17
-
18
- class FinalResponse(BaseModel):
19
- """Structured final response from LLM analysis."""
20
-
21
- response_title: str = Field(description="Engaging title for the response")
22
- main_message: str = Field(description="Primary message about what was accomplished")
23
- processed_files_summary: List[str] = Field(description="Summary of each processed file with download info")
24
- key_improvements: List[str] = Field(description="Key improvements and enhancements made")
25
- quality_assessment: str = Field(description="Assessment of final quality and success")
26
- user_recommendations: List[str] = Field(description="Personalized recommendations for the user")
27
- next_steps: str = Field(description="Suggested next steps or call to action")
28
- technical_summary: str = Field(description="Brief technical summary of what was done")
29
-
30
-
31
- async def final_response_node(state: Dict[str, Any]) -> Dict[str, Any]:
32
- """
33
- Generate intelligent, personalized final response using LLM analysis.
34
-
35
- The LLM creates a comprehensive response that:
36
- - Summarizes what was accomplished
37
- - Highlights key improvements and results
38
- - Provides download links for processed files
39
- - Offers personalized recommendations
40
- - Suggests appropriate next steps
41
- """
42
-
43
- processing_type = state.get("processing_type", "")
44
- processed_files = state.get("processed_files", {})
45
- scripts = state.get("scripts", {})
46
- errors = state.get("errors", [])
47
- processing_metadata = state.get("processing_metadata", {})
48
- user_request = state.get("user_request", "")
49
- completed_steps = state.get("completed_steps", [])
50
- execution_plan = state.get("execution_plan", [])
51
-
52
- # For chat responses, use existing final_response
53
- if processing_type == "chat":
54
- final_response = state.get("final_response", "")
55
- if not final_response:
56
- # Generate a chat response if none exists
57
- final_response = await create_chat_final_response(user_request, processing_metadata)
58
- else:
59
- # Generate intelligent audio processing response
60
- llm_response = await create_intelligent_final_response_with_llm(
61
- user_request, processing_type, processed_files, scripts,
62
- errors, processing_metadata, completed_steps, execution_plan
63
- )
64
-
65
- final_response = format_llm_response(llm_response, processed_files)
66
-
67
- # Add final response to messages if not already present
68
- messages = state.get("messages", [])
69
- if not any(msg.content == final_response for msg in messages if hasattr(msg, 'content')):
70
- messages.append(AIMessage(content=final_response))
71
-
72
- # Set final audio URL if available
73
- final_audio_url = get_final_audio_url(processed_files, processing_type)
74
-
75
- return {
76
- "final_response": final_response,
77
- "final_audio_url": final_audio_url,
78
- "messages": messages
79
- }
80
-
81
-
82
- async def create_intelligent_final_response_with_llm(
83
- user_request: str,
84
- processing_type: str,
85
- processed_files: Dict[str, str],
86
- scripts: Dict[str, Any],
87
- errors: List[str],
88
- processing_metadata: Dict[str, Any],
89
- completed_steps: List[str],
90
- execution_plan: List[Dict[str, Any]]
91
- ) -> FinalResponse:
92
- """Use LLM to create intelligent, personalized final response."""
93
-
94
- system_message = create_final_response_system_message()
95
- user_message = create_final_response_user_message(
96
- user_request, processing_type, processed_files, scripts,
97
- errors, processing_metadata, completed_steps, execution_plan
98
- )
99
-
100
- llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
101
- parser = PydanticOutputParser(pydantic_object=FinalResponse)
102
-
103
- prompt_messages = [
104
- SystemMessage(content=system_message.content),
105
- HumanMessage(content=user_message),
106
- HumanMessage(content=parser.get_format_instructions())
107
- ]
108
-
109
- try:
110
- response = await llm.ainvoke(prompt_messages)
111
- final_response = parser.parse(response.content)
112
- return final_response
113
- except Exception as e:
114
- # Fallback response
115
- return create_fallback_final_response(user_request, processed_files, errors)
116
-
117
-
118
- def create_final_response_system_message() -> SystemMessage:
119
- """Create system message for final response generation."""
120
- return SystemMessage(content=FINAL_RESPONSE_SYSTEM_PROMPT)
121
-
122
-
123
- def create_final_response_user_message(
124
- user_request: str,
125
- processing_type: str,
126
- processed_files: Dict[str, str],
127
- scripts: Dict[str, Any],
128
- errors: List[str],
129
- processing_metadata: Dict[str, Any],
130
- completed_steps: List[str],
131
- execution_plan: List[Dict[str, Any]]
132
- ) -> str:
133
- """Create user message for final response generation."""
134
-
135
- # Analyze processing context
136
- processing_summary = analyze_processing_context(
137
- processed_files, scripts, processing_metadata, completed_steps
138
- )
139
-
140
- return FINAL_RESPONSE_USER_PROMPT_TEMPLATE.format(
141
- user_request=user_request,
142
- processing_type=processing_type,
143
- processed_files_count=len(processed_files),
144
- completed_steps_count=len(completed_steps),
145
- error_count=len(errors),
146
- processed_files=json.dumps(processed_files, indent=2) if processed_files else "No files processed",
147
- processing_summary=processing_summary,
148
- plan_steps=len(execution_plan),
149
- tools_used=list(set([step.get('tool', 'unknown') for step in execution_plan])),
150
- completed_steps="\n".join(completed_steps[-5:]) if completed_steps else "No steps completed",
151
- errors="\n".join(errors) if errors else "No errors encountered",
152
- processing_metadata=json.dumps(processing_metadata, indent=2) if processing_metadata else "No additional metadata"
153
- )
154
-
155
-
156
- def analyze_processing_context(
157
- processed_files: Dict[str, str],
158
- scripts: Dict[str, Any],
159
- processing_metadata: Dict[str, Any],
160
- completed_steps: List[str]
161
- ) -> str:
162
- """Analyze processing context to inform final response."""
163
-
164
- analysis = "**Processing Analysis:**\n"
165
-
166
- # File analysis
167
- analysis += f"- Files processed: {len(processed_files)}\n"
168
- if processed_files:
169
- for original, processed in processed_files.items():
170
- original_name = original.split('/')[-1] if '/' in original else original
171
- processed_name = processed.split('/')[-1] if '/' in processed else processed
172
- analysis += f" • {original_name} → {processed_name}\n"
173
-
174
- # Script analysis
175
- if scripts:
176
- total_transcript_length = sum(len(script.get("transcript", "")) for script in scripts.values())
177
- total_filler_words = sum(len(script.get("filler_words", [])) for script in scripts.values())
178
- analysis += f"- Total transcript length: {total_transcript_length} characters\n"
179
- analysis += f"- Filler words detected: {total_filler_words}\n"
180
-
181
- # Quality assessment
182
- quality_score = processing_metadata.get("quality_score", 0)
183
- if quality_score > 0:
184
- analysis += f"- Estimated quality score: {quality_score:.1%}\n"
185
-
186
- # Processing insights
187
- final_analysis = processing_metadata.get("final_analysis", {})
188
- if final_analysis:
189
- analysis += f"- AI assessment: {final_analysis.get('success_assessment', 'N/A')}\n"
190
- quality_improvements = final_analysis.get("quality_improvements", [])
191
- if quality_improvements:
192
- analysis += f"- Key improvements: {', '.join(quality_improvements[:3])}\n"
193
-
194
- # Step analysis
195
- successful_steps = len([step for step in completed_steps if step.startswith("✅")])
196
- analysis += f"- Successful steps: {successful_steps}/{len(completed_steps)}\n"
197
-
198
- return analysis
199
-
200
-
201
- def create_fallback_final_response(
202
- user_request: str,
203
- processed_files: Dict[str, str],
204
- errors: List[str]
205
- ) -> FinalResponse:
206
- """Create fallback response if LLM generation fails."""
207
-
208
- if processed_files:
209
- return FinalResponse(
210
- response_title="Audio Processing Complete",
211
- main_message=f"Successfully processed {len(processed_files)} audio file(s) according to your request.",
212
- processed_files_summary=[f"{original.split('/')[-1]}: [Download]({processed})" for original, processed in processed_files.items()],
213
- key_improvements=["Audio processing completed", "Files enhanced and optimized"],
214
- quality_assessment="Processing completed successfully",
215
- user_recommendations=["Download your processed files", "Review the results"],
216
- next_steps="Your enhanced audio files are ready for download. Let me know if you need any adjustments!",
217
- technical_summary=f"Applied audio processing workflow to {len(processed_files)} file(s)"
218
- )
219
- else:
220
- return FinalResponse(
221
- response_title="Processing Attempt Complete",
222
- main_message="Audio processing encountered some challenges.",
223
- processed_files_summary=[],
224
- key_improvements=[],
225
- quality_assessment="Processing was not successful",
226
- user_recommendations=["Check your audio file URLs", "Try a simpler processing request"],
227
- next_steps="Please check the errors above and try again with valid audio files.",
228
- technical_summary=f"Processing attempted but encountered {len(errors)} error(s)"
229
- )
230
-
231
-
232
- def format_llm_response(llm_response: FinalResponse, processed_files: Dict[str, str]) -> str:
233
- """Format the LLM response into final markdown response."""
234
-
235
- response = f"🎵 **{llm_response.response_title}**\n\n"
236
-
237
- # Main message
238
- response += f"{llm_response.main_message}\n\n"
239
-
240
- # Processed files with actual download links
241
- if processed_files:
242
- response += "**🎵 Your Processed Audio Files:**\n"
243
- for original, processed in processed_files.items():
244
- filename = original.split('/')[-1] if '/' in original else original
245
- response += f"- **{filename}**: [Download]({processed})\n"
246
- response += "\n"
247
-
248
- # Key improvements
249
- if llm_response.key_improvements:
250
- response += "**✨ Key Improvements:**\n"
251
- for improvement in llm_response.key_improvements:
252
- response += f"- {improvement}\n"
253
- response += "\n"
254
-
255
- # Quality assessment
256
- response += f"**🎯 Quality Assessment:** {llm_response.quality_assessment}\n\n"
257
-
258
- # Recommendations
259
- if llm_response.user_recommendations:
260
- response += "**💡 Recommendations:**\n"
261
- for rec in llm_response.user_recommendations:
262
- response += f"- {rec}\n"
263
- response += "\n"
264
-
265
- # Technical summary
266
- if llm_response.technical_summary:
267
- response += f"**🔧 Technical Summary:** {llm_response.technical_summary}\n\n"
268
-
269
- # Next steps
270
- response += f"**🚀 Next Steps:** {llm_response.next_steps}"
271
-
272
- return response
273
-
274
-
275
- async def create_chat_final_response(user_request: str, processing_metadata: Dict[str, Any]) -> str:
276
- """Create final response for chat interactions."""
277
-
278
- # For chat, create a simple acknowledgment
279
- return f"I've provided information about our audio processing capabilities. Is there anything specific you'd like to know more about or any audio files you'd like me to help process?"
280
-
281
-
282
- def get_final_audio_url(processed_files: Dict[str, str], processing_type: str) -> str:
283
- """Get the final audio URL to return to the user."""
284
-
285
- if not processed_files:
286
- return ""
287
-
288
- # For dialogue generation, look for combined file
289
- if processing_type == "dialogue_generation":
290
- for original, processed in processed_files.items():
291
- if "combined" in processed or "dialogue" in processed:
292
- return processed
293
-
294
- # For single file processing, return the processed file
295
- if len(processed_files) == 1:
296
- return list(processed_files.values())[0]
297
-
298
- # For multiple files, return the first one
299
- return list(processed_files.values())[0] if processed_files else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/planner.py CHANGED
@@ -1,311 +1,24 @@
1
- """
2
- Intelligent LLM-powered planner for creating optimal audio processing execution plans.
3
- """
4
-
5
- from typing import Dict, Any, List
6
- from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
7
  from langchain_openai import ChatOpenAI
8
- from langchain_core.output_parsers import PydanticOutputParser
9
- from pydantic import BaseModel, Field
10
- import json
11
- from .prompts import PLANNER_SYSTEM_PROMPT, PLANNER_USER_PROMPT_TEMPLATE
12
-
13
-
14
- class ExecutionStep(BaseModel):
15
- """Single step in the execution plan."""
16
-
17
- step_id: str = Field(description="Unique identifier for this step")
18
- tool_name: str = Field(description="Name of the tool to use")
19
- parameters: Dict[str, Any] = Field(description="Parameters for the tool")
20
- description: str = Field(description="Human-readable description of what this step does")
21
- reasoning: str = Field(description="Why this step is needed")
22
- priority: str = Field(description="Priority level: high, medium, low")
23
-
24
-
25
- class ExecutionPlan(BaseModel):
26
- """Complete execution plan for audio processing."""
27
-
28
- steps: List[ExecutionStep] = Field(description="Ordered list of execution steps")
29
- strategy: str = Field(description="Overall strategy and approach")
30
- expected_outcomes: List[str] = Field(description="What outcomes to expect from this plan")
31
- estimated_duration: str = Field(description="Estimated time to complete")
32
- risks_and_mitigations: List[str] = Field(description="Potential issues and how to handle them")
33
-
34
-
35
- async def planner_node(state: Dict[str, Any]) -> Dict[str, Any]:
36
- """
37
- Create intelligent execution plan using LLM analysis.
38
-
39
- The LLM analyzes:
40
- - User request and intent
41
- - Available audio files and their characteristics
42
- - Transcript insights and quality
43
- - Available tools and capabilities
44
- - Processing type and requirements
45
- """
46
-
47
- user_request = state.get("user_request", "")
48
- audio_files = state.get("audio_files", [])
49
- scripts = state.get("scripts", {})
50
- processing_type = state.get("processing_type", "")
51
- processing_metadata = state.get("processing_metadata", {})
52
-
53
- if not audio_files:
54
- return create_no_files_plan_response(state)
55
-
56
- # Use LLM to create intelligent execution plan
57
- execution_plan = await create_execution_plan_with_llm(
58
- user_request, audio_files, scripts, processing_type, processing_metadata
59
- )
60
-
61
- # Convert to the format expected by audio processor
62
- formatted_plan = convert_plan_to_execution_format(execution_plan)
63
-
64
- # Create plan summary message
65
- plan_summary = create_plan_summary_message(execution_plan, formatted_plan)
66
- messages = state.get("messages", [])
67
- messages.append(AIMessage(content=plan_summary))
68
-
69
- return {
70
- "execution_plan": formatted_plan,
71
- "messages": messages,
72
- "processing_metadata": {
73
- **processing_metadata,
74
- "execution_strategy": execution_plan.strategy,
75
- "expected_outcomes": execution_plan.expected_outcomes,
76
- "plan_metadata": execution_plan.dict()
77
- }
78
- }
79
-
80
-
81
- async def create_execution_plan_with_llm(
82
- user_request: str,
83
- audio_files: List[str],
84
- scripts: Dict[str, Any],
85
- processing_type: str,
86
- processing_metadata: Dict[str, Any]
87
- ) -> ExecutionPlan:
88
- """Use LLM to create intelligent execution plan."""
89
-
90
- system_message = create_planning_system_message()
91
- user_message_content = create_planning_user_message(
92
- user_request, audio_files, scripts, processing_type, processing_metadata
93
- )
94
-
95
- llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
96
- parser = PydanticOutputParser(pydantic_object=ExecutionPlan)
97
-
98
- prompt_messages = [
99
- system_message,
100
- HumanMessage(content=user_message_content),
101
- HumanMessage(content=parser.get_format_instructions())
102
- ]
103
-
104
- try:
105
- response = await llm.ainvoke(prompt_messages)
106
- plan = parser.parse(response.content)
107
- return plan
108
- except Exception as e:
109
- # Fallback to simple plan
110
- return create_fallback_execution_plan(user_request, audio_files, processing_type)
111
-
112
-
113
- def create_planning_system_message() -> SystemMessage:
114
- """Create system message for execution planning."""
115
- return SystemMessage(content=PLANNER_SYSTEM_PROMPT)
116
-
117
-
118
- def create_planning_user_message(
119
- user_request: str,
120
- audio_files: List[str],
121
- scripts: Dict[str, Any],
122
- processing_type: str,
123
- processing_metadata: Dict[str, Any]
124
- ) -> str:
125
- """Create user message for execution planning."""
126
-
127
- # Analyze transcript data
128
- transcript_summary = analyze_transcript_data(scripts)
129
- file_list = "\n".join([f"- {file.split('/')[-1]}" for file in audio_files])
130
- processing_context = json.dumps(processing_metadata, indent=2) if processing_metadata else "No additional context"
131
-
132
- return PLANNER_USER_PROMPT_TEMPLATE.format(
133
- user_request=user_request,
134
- processing_type=processing_type,
135
- file_count=len(audio_files),
136
- file_list=file_list,
137
- transcript_summary=transcript_summary,
138
- processing_context=processing_context,
139
- )
140
-
141
-
142
- def analyze_transcript_data(scripts: Dict[str, Any]) -> str:
143
- """Analyze transcript data to inform planning decisions."""
144
-
145
- if not scripts:
146
- return "No transcript data available"
147
-
148
- summary = ""
149
- total_filler_words = 0
150
- quality_scores = []
151
- insights = []
152
-
153
- for file_url, script_data in scripts.items():
154
- filename = file_url.split('/')[-1] if '/' in file_url else file_url
155
- transcript = script_data.get("transcript", "")
156
- filler_words = script_data.get("filler_words", [])
157
- quality_score = script_data.get("quality_score", 0)
158
- file_insights = script_data.get("insights", [])
159
-
160
- total_filler_words += len(filler_words)
161
- if quality_score > 0:
162
- quality_scores.append(quality_score)
163
- insights.extend(file_insights)
164
-
165
- summary += f"\n- **{filename}**: {len(transcript)} chars, {len(filler_words)} fillers"
166
- if quality_score > 0:
167
- summary += f", {quality_score:.1%} quality"
168
-
169
- # Overall analysis
170
- avg_quality = sum(quality_scores) / len(quality_scores) if quality_scores else 0
171
- summary += f"\n\n**Overall Analysis:**"
172
- summary += f"\n- Total filler words across all files: {total_filler_words}"
173
- summary += f"\n- Average transcript quality: {avg_quality:.1%}" if avg_quality > 0 else ""
174
-
175
- if insights:
176
- summary += f"\n- Key insights: {', '.join(insights[:3])}"
177
-
178
- # Planning recommendations
179
- if total_filler_words > 10:
180
- summary += f"\n- **Recommendation**: High filler word count suggests need for silence trimming and cutting"
181
- if avg_quality < 0.7:
182
- summary += f"\n- **Recommendation**: Lower quality transcript suggests audio may need normalization"
183
-
184
- return summary
185
-
186
-
187
- def convert_plan_to_execution_format(execution_plan: ExecutionPlan) -> List[Dict[str, Any]]:
188
- """Convert LLM execution plan to format expected by audio processor."""
189
-
190
- formatted_steps = []
191
-
192
- for step in execution_plan.steps:
193
- formatted_step = {
194
- "step": step.step_id,
195
- "tool": step.tool_name,
196
- "params": step.parameters,
197
- "description": step.description,
198
- "reasoning": step.reasoning,
199
- "priority": step.priority
200
- }
201
- formatted_steps.append(formatted_step)
202
-
203
- return formatted_steps
204
-
205
-
206
- def create_fallback_execution_plan(
207
- user_request: str,
208
- audio_files: List[str],
209
- processing_type: str
210
- ) -> ExecutionPlan:
211
- """Create fallback execution plan if LLM planning fails."""
212
-
213
- steps = []
214
-
215
- for i, audio_file in enumerate(audio_files):
216
- # Basic processing steps
217
- steps.extend([
218
- ExecutionStep(
219
- step_id=f"update_info_{i}",
220
- tool_name="update_audio_info",
221
- parameters={"audio_file": audio_file},
222
- description=f"Update audio information",
223
- reasoning="Essential for proper file handling",
224
- priority="high"
225
- ),
226
- ExecutionStep(
227
- step_id=f"trim_silence_{i}",
228
- tool_name="apply_silence_trimming",
229
- parameters={"audio_file": audio_file, "threshold_db": -40},
230
- description="Remove silence and quiet sections",
231
- reasoning="Improves audio quality and reduces file size",
232
- priority="medium"
233
- ),
234
- ExecutionStep(
235
- step_id=f"normalize_{i}",
236
- tool_name="apply_normalization",
237
- parameters={"audio_file": audio_file, "target_level": -3},
238
- description="Normalize audio levels",
239
- reasoning="Ensures consistent volume levels",
240
- priority="medium"
241
- )
242
- ])
243
-
244
- return ExecutionPlan(
245
- steps=steps,
246
- strategy="Fallback plan: basic audio enhancement with silence removal and normalization",
247
- expected_outcomes=["Cleaner audio", "Consistent levels", "Reduced file size"],
248
- estimated_duration="2-5 minutes",
249
- risks_and_mitigations=["Minimal risk with basic processing steps"]
250
  )
251
 
252
-
253
- def create_plan_summary_message(execution_plan: ExecutionPlan, formatted_plan: List[Dict[str, Any]]) -> str:
254
- """Create comprehensive plan summary message."""
255
-
256
- summary = "🎯 **Intelligent Execution Plan Created**\n\n"
257
-
258
- # Strategy
259
- summary += f"**📋 Strategy:** {execution_plan.strategy}\n\n"
260
-
261
- # Plan overview
262
- summary += f"**📊 Plan Overview:**\n"
263
- summary += f"- Total steps: {len(execution_plan.steps)}\n"
264
- summary += f"- Estimated duration: {execution_plan.estimated_duration}\n"
265
-
266
- # Priority breakdown
267
- high_priority = len([s for s in execution_plan.steps if s.priority == "high"])
268
- medium_priority = len([s for s in execution_plan.steps if s.priority == "medium"])
269
- low_priority = len([s for s in execution_plan.steps if s.priority == "low"])
270
-
271
- summary += f"- Priority breakdown: {high_priority} high, {medium_priority} medium, {low_priority} low\n\n"
272
-
273
- # Key steps
274
- summary += "**🔧 Key Processing Steps:**\n"
275
- for i, step in enumerate(execution_plan.steps[:5], 1): # Show first 5 steps
276
- summary += f"{i}. **{step.tool_name}**: {step.description}\n"
277
-
278
- if len(execution_plan.steps) > 5:
279
- summary += f"... and {len(execution_plan.steps) - 5} more steps\n"
280
-
281
- summary += "\n"
282
-
283
- # Expected outcomes
284
- if execution_plan.expected_outcomes:
285
- summary += "**🎯 Expected Outcomes:**\n"
286
- for outcome in execution_plan.expected_outcomes[:3]:
287
- summary += f"- {outcome}\n"
288
- summary += "\n"
289
-
290
- # Risks and mitigations
291
- if execution_plan.risks_and_mitigations:
292
- summary += "**⚠️ Risk Management:**\n"
293
- for risk in execution_plan.risks_and_mitigations[:2]:
294
- summary += f"- {risk}\n"
295
- summary += "\n"
296
-
297
- summary += "✅ **Ready to execute intelligent plan...**"
298
- return summary
299
-
300
-
301
- def create_no_files_plan_response(state: Dict[str, Any]) -> Dict[str, Any]:
302
- """Handle case when no audio files are available for planning."""
303
-
304
- messages = state.get("messages", [])
305
- messages.append(AIMessage(content="❌ **No Planning Possible**: No audio files available to process."))
306
-
307
- return {
308
- "execution_plan": [],
309
- "messages": messages,
310
- "errors": ["No audio files available for execution planning"]
311
- }
 
 
 
 
 
 
 
1
  from langchain_openai import ChatOpenAI
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from langchain_core.runnables import RunnableParallel
4
+ from src.state import AgentState
5
+ from operator import itemgetter
6
+
7
+ def planner_node(state: AgentState) -> AgentState:
8
+ llm = ChatOpenAI(model="gpt-4.1")
9
+ llm = llm.with_structured_output(AgentState)
10
+
11
+ prompt = ChatPromptTemplate.from_messages([
12
+ ("system", "You are planner that finds what user wants to do and how can we achieve it. Generate a comprehensive plan for the user."),
13
+ ("user", "{state}")
14
+ ])
15
+
16
+ chain = (
17
+ RunnableParallel({
18
+ "state": itemgetter("state")
19
+ })
20
+ | prompt
21
+ | llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  )
23
 
24
+ return chain.invoke({"state": state})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/processor.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from src.state import AgentState
4
+ from operator import itemgetter
5
+ from langchain_core.runnables import RunnableParallel
6
+
7
+ def processor_node(state: AgentState) -> AgentState:
8
+ llm = ChatOpenAI(model="gpt-4.1")
9
+ llm = llm.with_structured_output(AgentState)
10
+
11
+ prompt = ChatPromptTemplate.from_messages([
12
+ ("system", "You are processor that processes the plan and generates a final response to the user."),
13
+ ("user", "Current state: {state}")
14
+ ])
15
+
16
+ chain = (
17
+ RunnableParallel({
18
+ "state": itemgetter("state")
19
+ })
20
+ | prompt
21
+ | llm
22
+ )
23
+
24
+ return chain.invoke({"state": state})
src/nodes/prompts.py DELETED
@@ -1,440 +0,0 @@
1
- PLANNER_SYSTEM_PROMPT = """You are an expert audio processing strategist and execution planner. Your job is to create optimal, step-by-step execution plans for audio processing tasks.
2
-
3
- **Available Audio Processing Tools:**
4
-
5
- 1. **Information & Metadata Tools:**
6
- - update_audio_info: Updates general audio file information
7
- - update_duration_info: Updates audio duration and timing information
8
- - update_transcription_info: Updates transcription-related metadata
9
-
10
- 2. **Core Processing Tools:**
11
- - process_cut_audio: Cuts/trims audio to specific time ranges (params: audio_file, _start_time, _end_time)
12
- - apply_normalization: Normalizes audio levels (params: audio_file, target_level)
13
- - apply_volume_adjustment: Adjusts volume by gain amount (params: audio_file, gain_db)
14
- - apply_speed_adjustment: Changes playback speed (params: audio_file, speed_factor)
15
- - apply_fades: Adds fade in/out effects (params: audio_file, fade_in_ms, fade_out_ms)
16
- - apply_reverse: Reverses audio playback (params: audio_file)
17
- - apply_silence_trimming: Removes silence/quiet sections (params: audio_file, threshold_db)
18
-
19
- **Planning Principles:**
20
-
21
- 1. **Context-Aware**: Consider the user's specific goals, not just keywords
22
- 2. **Quality-First**: Prioritize steps that will most improve the final result
23
- 3. **Efficient**: Order steps logically to minimize processing time and quality loss
24
- 4. **Robust**: Include metadata updates and error-handling steps
25
- 5. **Adaptive**: Tailor approach based on transcript insights and file characteristics
26
-
27
- **Step Ordering Best Practices:**
28
- - Start with metadata updates (audio_info, duration_info)
29
- - Apply destructive edits first (cutting, trimming)
30
- - Then apply enhancement (normalization, volume, speed)
31
- - Finish with aesthetic touches (fades, effects)
32
-
33
- **User Intent Analysis:**
34
- - "Clean up" / "improve" = silence trimming + normalization + possible filler removal
35
- - "Remove filler words" = intelligent cutting based on transcript analysis
36
- - "Cut" / "trim" = precise time-based cutting
37
- - "Louder" / "quieter" = volume adjustment
38
- - "Faster" / "slower" = speed adjustment
39
- - "Professional" = normalization + fades + silence trimming
40
-
41
- Be intelligent about combining the user's explicit requests with transcript insights to create a comprehensive plan that achieves their goals."""
42
-
43
-
44
- PLANNER_USER_PROMPT_TEMPLATE = """
45
- **Planning Request for Audio Processing**
46
-
47
- **User's Original Request:**
48
- {user_request}
49
-
50
- **Processing Type:** {processing_type}
51
-
52
- **Audio Files to Process:** {file_count} files
53
- {file_list}
54
-
55
- **Transcript Analysis:**
56
- {transcript_summary}
57
-
58
- **Processing Context:**
59
- {processing_context}
60
-
61
- **Planning Requirements:**
62
-
63
- 1. **Analyze the user's true intent** - what do they actually want to achieve?
64
- 2. **Consider transcript insights** - filler words, quality issues, content characteristics
65
- 3. **Create step-by-step execution plan** - specific tools with exact parameters
66
- 4. **Optimize for quality and efficiency** - best order for operations
67
- 5. **Include appropriate metadata steps** - ensure proper file handling
68
- 6. **Plan for potential issues** - what could go wrong and how to handle it
69
-
70
- **Key Questions to Address:**
71
- - What's the primary goal of this processing?
72
- - Which transcript insights should influence the plan?
73
- - What's the optimal order of operations?
74
- - What parameters will achieve the best results?
75
- - How can we ensure high-quality output?
76
-
77
- Create a comprehensive execution plan that intelligently combines the user's requests with the insights from the transcript analysis.
78
- """
79
-
80
- # Prompts for script_generator.py
81
-
82
- SCRIPT_GENERATOR_SYSTEM_PROMPT = """You are an expert audio transcription strategist. Your job is to create optimal plans for transcribing audio files based on user needs and available tools.
83
-
84
- **Available Tool Types:**
85
- - transcribe_audio_sync: Main transcription tool for converting audio to text
86
- - update_transcription_info: Updates transcription metadata and info
87
-
88
- **Planning Considerations:**
89
- - Order files by complexity/priority
90
- - Choose appropriate tools based on file characteristics
91
- - Consider user's specific goals (filler removal, cutting, quality improvement)
92
- - Anticipate potential challenges (multiple speakers, background noise, etc.)
93
- - Plan analysis goals that align with user intent
94
-
95
- **Your planning should be:**
96
- - Strategic: Consider the best order and approach
97
- - Practical: Use available tools effectively
98
- - Goal-oriented: Focus on what the user actually needs
99
- - Robust: Anticipate and prepare for common issues
100
-
101
- Be intelligent about the user's intent - if they want to remove filler words, prioritize filler detection. If they want to cut audio, focus on timestamp accuracy."""
102
-
103
- SCRIPT_GENERATOR_USER_PROMPT_TEMPLATE = """
104
- **Audio Files to Process:** {file_count} files
105
- {file_list}
106
-
107
- **User's Request:** {user_request}
108
-
109
- **Available Tools:** {available_tools}
110
-
111
- Create an optimal transcription plan that:
112
- 1. Determines the best order to process these files
113
- 2. Selects appropriate tools for the task
114
- 3. Defines analysis goals that align with the user's needs
115
- 4. Anticipates potential challenges
116
- 5. Provides clear reasoning for the approach
117
-
118
- Consider the user's intent and optimize for their specific goals.
119
- """
120
-
121
- ANALYZE_TRANSCRIPT_PROMPT_TEMPLATE = """
122
- Analyze this audio transcript and provide structured insights:
123
-
124
- **Audio File:** {audio_file}
125
- **Transcript:** {transcript_content}
126
-
127
- Please provide analysis in JSON format with these fields:
128
- - "timestamps": Array of objects with start/end times and text segments (estimate based on content)
129
- - "filler_words": Array of detected filler words with positions and context
130
- - "quality_score": Float 0-1 indicating transcript quality
131
- - "insights": Array of key insights about the content
132
- - "speaker_analysis": Information about speakers if detectable
133
- - "content_summary": Brief summary of what the audio contains
134
-
135
- Focus on practical insights that would help with audio processing decisions.
136
- """
137
-
138
- ANALYZE_TRANSCRIPTION_RESULTS_SYSTEM_PROMPT = """You are an expert audio transcription analyst. Analyze the transcription results and provide insights about success, quality, and recommendations for next steps."""
139
-
140
- ANALYZE_TRANSCRIPTION_RESULTS_USER_PROMPT_TEMPLATE = """
141
- **User's Original Request:** {user_request}
142
-
143
- **Analysis Goals:** {analysis_goals}
144
-
145
- **Transcription Results:**
146
- - Successfully transcribed: {success_count} files
147
- - Failed transcriptions: {failure_count} files
148
- - Errors: {errors}
149
-
150
- **Script Details:**
151
- {script_details}
152
-
153
- Provide analysis of the transcription quality, success rate, and specific recommendations for audio processing based on these results.
154
- """
155
-
156
- # Prompts for chat.py
157
-
158
- CHAT_SYSTEM_PROMPT_BASE = """You are an expert Audio Processing Assistant powered by advanced audio tools.
159
-
160
- Your role is to:
161
- 1. Answer questions about audio processing capabilities
162
- 2. Provide guidance on how to use audio tools
163
- 3. Demonstrate tool usage only when explicitly requested
164
- 4. Explain audio concepts and best practices
165
- 5. Help users understand what's possible with audio processing
166
-
167
- You have a conversational, helpful, and knowledgeable personality. You can discuss both technical and practical aspects of audio processing.
168
-
169
- IMPORTANT: Only call tools when the user explicitly asks for a demonstration. For general questions about capabilities, explain the tools without calling them."""
170
-
171
- CHAT_SYSTEM_PROMPT_TOOLS_HEADER = "\n\n**Available Audio Tools:**\n"
172
-
173
- CHAT_SYSTEM_PROMPT_GUIDELINES = """
174
- **Guidelines:**
175
- - Provide clear, helpful explanations about audio processing
176
- - Only demonstrate tools when explicitly asked to do so
177
- - Explain tool capabilities without necessarily calling them
178
- - Be encouraging about what's possible with audio processing
179
- - Keep responses informative but concise
180
- - Use emojis to make responses engaging (🎵 🔧 📊 ✨)
181
-
182
- **For audio file processing requests:**
183
- Suggest using the full audio processing workflow by providing audio file URLs and describing the desired outcome.
184
- """
185
-
186
- # Prompts for validator.py
187
-
188
- VALIDATOR_SYSTEM_PROMPT = """You are an expert audio processing validator. Your job is to assess the results of audio processing workflows and provide intelligent feedback.
189
-
190
- Your role:
191
- 1. Analyze processing results against the user's original request
192
- 2. Evaluate the quality and completeness of the work done
193
- 3. Identify critical issues, warnings, and successes
194
- 4. Provide actionable recommendations
195
- 5. Determine if reprocessing would be beneficial
196
-
197
- Assessment criteria:
198
- - Did the processing achieve the user's goals?
199
- - Are there any critical failures that prevent success?
200
- - What is the overall quality of the results?
201
- - Are there minor issues that could be improved?
202
- - Would reprocessing with different parameters help?
203
-
204
- Consider the context:
205
- - User's original request and intent
206
- - What processing steps were planned vs. completed
207
- - Any errors or issues encountered
208
- - The quality of transcripts and processing metadata
209
- - Whether processed files were successfully generated
210
-
211
- Be thorough but practical in your assessment. Focus on actionable insights that would help improve the audio processing results."""
212
-
213
- VALIDATOR_USER_PROMPT_TEMPLATE = """
214
- ## Processing Assessment Request
215
-
216
- **User's Original Request:**
217
- {user_request}
218
-
219
- **Execution Plan ({plan_steps} steps planned):**
220
- {execution_plan}
221
-
222
- **Completed Steps ({completed_steps_count}):**
223
- {completed_steps}
224
-
225
- **Processing Results:**
226
- {processing_results}
227
-
228
- **Transcript Analysis:**
229
- {transcript_analysis}
230
-
231
- **Errors Encountered ({error_count}):**
232
- {errors}
233
- {reprocessing_note}
234
- ## Assessment Task
235
-
236
- Please analyze this processing workflow and provide a comprehensive validation assessment. Consider:
237
-
238
- 1. How well did the processing achieve the user's goals?
239
- 2. What is the overall quality and success rate?
240
- 3. Are there critical issues that prevent success?
241
- 4. What warnings or minor issues should be noted?
242
- 5. What specific recommendations would improve results?
243
- 6. Would reprocessing with adjustments be beneficial?
244
-
245
- Provide honest, actionable feedback that would help improve the audio processing results.
246
- """
247
-
248
- # Prompts for final_response.py
249
-
250
- FINAL_RESPONSE_SYSTEM_PROMPT = """You are an expert audio processing communication specialist. Your job is to create engaging, informative, and personalized final responses for users who have completed audio processing workflows.
251
-
252
- **Your Role:**
253
- - Craft compelling, user-friendly summaries of what was accomplished
254
- - Highlight key improvements and value delivered
255
- - Provide clear information about processed files and how to access them
256
- - Offer personalized recommendations based on the specific processing
257
- - Suggest appropriate next steps
258
- - Maintain an encouraging and professional tone
259
-
260
- **Response Principles:**
261
- 1. **User-Centric**: Focus on what the user gained and achieved
262
- 2. **Clear and Actionable**: Provide specific, actionable information
263
- 3. **Celebratory**: Acknowledge accomplishments and improvements
264
- 4. **Helpful**: Offer valuable insights and next steps
265
- 5. **Professional**: Maintain expertise while being approachable
266
- 6. **Specific**: Reference actual results and improvements made
267
-
268
- **Key Elements to Include:**
269
- - Engaging title that captures what was accomplished
270
- - Clear summary of processing results
271
- - Specific improvements and enhancements made
272
- - Quality assessment and success metrics
273
- - Download information for processed files
274
- - Personalized recommendations based on the processing
275
- - Encouraging next steps or call to action
276
-
277
- **Tone Guidelines:**
278
- - Professional but friendly
279
- - Confident in the results achieved
280
- - Encouraging about next steps
281
- - Specific about technical improvements
282
- - Celebratory of success, honest about limitations
283
-
284
- Be specific about the actual processing done and results achieved. Reference real file names, improvements made, and quality metrics when available."""
285
-
286
- FINAL_RESPONSE_USER_PROMPT_TEMPLATE = """
287
- **Create Final Response for Audio Processing Workflow**
288
-
289
- **User's Original Request:**
290
- {user_request}
291
-
292
- **Processing Type:** {processing_type}
293
-
294
- **Processing Results:**
295
- - Successfully processed: {processed_files_count} files
296
- - Completed steps: {completed_steps_count}
297
- - Errors encountered: {error_count}
298
-
299
- **Processed Files:**
300
- {processed_files}
301
-
302
- **Processing Context Analysis:**
303
- {processing_summary}
304
-
305
- **Execution Plan Summary:**
306
- {plan_steps} steps planned
307
- Key tools used: {tools_used}
308
-
309
- **Completed Steps (last 5):**
310
- {completed_steps}
311
-
312
- **Errors (if any):**
313
- {errors}
314
-
315
- **Processing Metadata:**
316
- {processing_metadata}
317
-
318
- **Task:**
319
- Create a comprehensive, engaging final response that:
320
- 1. Celebrates what was accomplished
321
- 2. Clearly explains the results and improvements
322
- 3. Provides specific download information for processed files
323
- 4. Offers personalized recommendations based on this specific processing
324
- 5. Suggests appropriate next steps
325
- 6. Maintains an encouraging and professional tone
326
-
327
- Focus on the value delivered to the user and make it clear how to access and use their processed audio files.
328
- """
329
-
330
- # Prompts for router.py
331
-
332
- ROUTER_SYSTEM_PROMPT = """You are an intelligent routing agent for an audio processing system. Your job is to analyze user requests and determine the best processing path.
333
-
334
- **Processing Types Available:**
335
-
336
- 1. **chat** - For general questions, help requests, or when no audio processing is needed
337
- - User asking about capabilities, features, or how to use the system
338
- - General conversation or questions
339
- - No audio files present, or user just wants information
340
-
341
- 2. **audio_processing** - For single or multiple audio file processing tasks
342
- - Removing filler words, cutting audio, improving quality
343
- - Normalizing volume, adjusting speed, adding effects
344
- - Transcription and analysis tasks
345
- - Any audio enhancement or modification
346
-
347
- 3. **dialogue_generation** - For combining multiple audio files into conversations
348
- - Creating interviews, podcasts, or conversations from separate files
349
- - Merging voices or speakers into dialogue format
350
- - Building composite audio experiences
351
-
352
- **Audio File Detection:**
353
- Extract any audio file URLs or paths from the user's message. Look for:
354
- - HTTP/HTTPS URLs ending in .mp3, .wav, .m4a, .flac, .aac, .ogg
355
- - Local file paths with audio extensions
356
- - References to audio files even if not explicitly formatted as URLs
357
-
358
- **Priority Assessment:**
359
- - **high**: Urgent processing needs, multiple complex steps, time-sensitive
360
- - **medium**: Standard processing requests, moderate complexity
361
- - **low**: Simple questions, basic single-step tasks
362
-
363
- **Your Analysis Should:**
364
- - Understand the user's true intent behind their request
365
- - Identify all audio files mentioned or linked
366
- - Choose the most appropriate processing type
367
- - Assess the complexity and urgency
368
- - Provide clear reasoning for your decision
369
-
370
- Be intelligent about context - a user saying "help me clean up this audio" with a file link clearly needs audio_processing, not chat."""
371
-
372
- ROUTER_USER_PROMPT_TEMPLATE = """
373
- Please analyze this user request and determine the appropriate routing:
374
-
375
- **User Request:**
376
- {user_content}
377
-
378
- **Analysis Task:**
379
- 1. What is the user's primary intent?
380
- 2. Are there any audio files mentioned or linked?
381
- 3. What type of processing would best serve their needs?
382
- 4. How complex/urgent is this request?
383
- 5. What's the reasoning for your routing decision?
384
-
385
- Provide a structured analysis with your routing decision.
386
- """
387
-
388
- # Prompts for audio_processor.py
389
-
390
- LLM_PROCESSING_DECISION_PROMPT_TEMPLATE = """
391
- You are an intelligent audio processing engine. Decide whether to execute this processing step:
392
-
393
- **Step to Consider:**
394
- - Tool: {tool_name}
395
- - Description: {description}
396
- - Planned Parameters: {planned_parameters}
397
-
398
- **Context:**
399
- - User Request: {user_request}
400
- - Completed Steps: {completed_steps_count}
401
- - Errors So Far: {error_count}
402
- - Available Tools: {available_tools}
403
- - Current File URLs: {current_file_urls}
404
-
405
- **Recent Activity:**
406
- {recent_activity}
407
-
408
- Should this step be executed? Respond with:
409
- 1. "EXECUTE" or "SKIP"
410
- 2. If EXECUTE, provide any parameter modifications in JSON format (or "NO_CHANGES")
411
-
412
- Example response:
413
- EXECUTE
414
- {{"audio_file": "updated_url.mp3", "target_level": -6}}
415
-
416
- Or:
417
- SKIP - This step is redundant given previous processing
418
- """
419
-
420
- LLM_PROCESSING_SUMMARY_PROMPT_TEMPLATE = """
421
- Create a concise processing summary for the user:
422
-
423
- **User's Request:** {user_request}
424
-
425
- **Results:**
426
- - Completed Steps: {completed_steps_count}
427
- - Errors: {error_count}
428
- - Files Processed: {processed_files_count}
429
-
430
- **Step Details:**
431
- {step_details}
432
-
433
- **Processed Files:**
434
- {processed_files}
435
-
436
- **Errors:**
437
- {errors}
438
-
439
- Create a brief, encouraging summary focusing on what was accomplished and next steps.
440
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/router.py DELETED
@@ -1,167 +0,0 @@
1
- """
2
- Intelligent LLM-powered router node for determining processing type and extracting context.
3
- """
4
-
5
- import re
6
- from typing import Dict, Any, List
7
- from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
8
- from langchain_openai import ChatOpenAI
9
- from langchain_core.output_parsers import PydanticOutputParser
10
- from pydantic import BaseModel, Field
11
-
12
- from .prompts import ROUTER_SYSTEM_PROMPT, ROUTER_USER_PROMPT_TEMPLATE
13
-
14
-
15
- class RouterDecision(BaseModel):
16
- """Structured output for router decisions."""
17
-
18
- processing_type: str = Field(description="Type of processing needed: 'chat', 'audio_processing', or 'dialogue_generation'")
19
- user_request: str = Field(description="Clean, parsed version of the user's request")
20
- audio_files: List[str] = Field(description="List of audio file URLs/paths found in the message")
21
- reasoning: str = Field(description="Brief explanation of why this processing type was chosen")
22
- priority_level: str = Field(description="Priority level: 'low', 'medium', or 'high'")
23
-
24
-
25
- async def router_node(state: Dict[str, Any]) -> Dict[str, Any]:
26
- """
27
- Intelligently route the conversation using LLM analysis.
28
-
29
- The LLM analyzes user input to determine:
30
- - Processing type needed
31
- - Audio files to extract
32
- - User intent and priority
33
- """
34
-
35
- # Get the latest user message
36
- latest_message = None
37
- for msg in reversed(state.get("messages", [])):
38
- if isinstance(msg, HumanMessage):
39
- latest_message = msg
40
- break
41
-
42
- if not latest_message:
43
- return create_default_routing()
44
-
45
- # Use LLM to make routing decision
46
- router_decision = await analyze_user_request_with_llm(latest_message.content)
47
-
48
- # Create status message
49
- status_message = f"🎯 **Routing Analysis**: {router_decision.reasoning}\n**Processing Type**: {router_decision.processing_type}\n**Priority**: {router_decision.priority_level}"
50
-
51
- messages = state.get("messages", [])
52
- messages.append(AIMessage(content=status_message))
53
-
54
- return {
55
- "processing_type": router_decision.processing_type,
56
- "user_request": router_decision.user_request,
57
- "audio_files": router_decision.audio_files,
58
- "messages": messages,
59
- "errors": [],
60
- "needs_reprocessing": False,
61
- "completed_steps": [],
62
- "scripts": {},
63
- "processed_files": {},
64
- "processing_metadata": {
65
- "router_reasoning": router_decision.reasoning,
66
- "priority_level": router_decision.priority_level
67
- }
68
- }
69
-
70
-
71
- async def analyze_user_request_with_llm(user_content: str) -> RouterDecision:
72
- """Use LLM to intelligently analyze user request and make routing decisions."""
73
-
74
- system_message = create_router_system_message()
75
- user_message = create_router_user_message(user_content)
76
-
77
- # Set up LLM with structured output
78
- llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
79
- parser = PydanticOutputParser(pydantic_object=RouterDecision)
80
-
81
- prompt_messages = [
82
- system_message,
83
- HumanMessage(content=user_message),
84
- HumanMessage(content=parser.get_format_instructions())
85
- ]
86
-
87
- try:
88
- response = await llm.ainvoke(prompt_messages)
89
- router_decision = parser.parse(response.content)
90
- return router_decision
91
- except Exception as e:
92
- # Fallback to simple analysis
93
- return create_fallback_routing(user_content)
94
-
95
-
96
- def create_router_system_message() -> SystemMessage:
97
- """Create system message for LLM routing analysis."""
98
- return SystemMessage(content=ROUTER_SYSTEM_PROMPT)
99
-
100
-
101
- def create_router_user_message(user_content: str) -> str:
102
- """Create user message for routing analysis."""
103
- return ROUTER_USER_PROMPT_TEMPLATE.format(user_content=user_content)
104
-
105
-
106
- def create_fallback_routing(user_content: str) -> RouterDecision:
107
- """Create fallback routing if LLM analysis fails."""
108
-
109
- content_lower = user_content.lower()
110
-
111
- # Simple pattern matching for fallback
112
- audio_patterns = ['.mp3', '.wav', '.m4a', '.flac', '.aac', '.ogg', 'http']
113
- dialogue_keywords = ['dialogue', 'conversation', 'combine', 'merge', 'interview']
114
-
115
- has_audio = any(pattern in user_content for pattern in audio_patterns)
116
- is_dialogue = any(keyword in content_lower for keyword in dialogue_keywords)
117
-
118
- if has_audio and is_dialogue:
119
- processing_type = "dialogue_generation"
120
- elif has_audio:
121
- processing_type = "audio_processing"
122
- else:
123
- processing_type = "chat"
124
-
125
- # Extract audio files with simple regex
126
- audio_files = extract_audio_files_simple(user_content)
127
-
128
- return RouterDecision(
129
- processing_type=processing_type,
130
- user_request=user_content,
131
- audio_files=audio_files,
132
- reasoning=f"Fallback analysis: detected {processing_type} based on content patterns",
133
- priority_level="medium"
134
- )
135
-
136
-
137
- def extract_audio_files_simple(content: str) -> List[str]:
138
- """Simple regex-based audio file extraction for fallback."""
139
-
140
- # Look for URLs (http/https)
141
- url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+\.(mp3|wav|m4a|flac|aac|ogg)'
142
- urls = re.findall(url_pattern, content, re.IGNORECASE)
143
-
144
- # Look for file paths
145
- path_pattern = r'[^\s<>"{}|\\^`\[\]]+\.(mp3|wav|m4a|flac|aac|ogg)'
146
- paths = re.findall(path_pattern, content, re.IGNORECASE)
147
-
148
- # Combine and deduplicate
149
- audio_files = list(set(urls + [path for path in paths if not path.startswith('http')]))
150
-
151
- return audio_files
152
-
153
-
154
- def create_default_routing() -> Dict[str, Any]:
155
- """Create default routing when no user message found."""
156
-
157
- return {
158
- "processing_type": "chat",
159
- "user_request": "",
160
- "audio_files": [],
161
- "errors": [],
162
- "needs_reprocessing": False,
163
- "completed_steps": [],
164
- "scripts": {},
165
- "processed_files": {},
166
- "processing_metadata": {}
167
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/script_generator.py DELETED
@@ -1,412 +0,0 @@
1
- """
2
- Intelligent LLM-powered script generator for audio transcription and analysis.
3
- """
4
-
5
- from typing import Dict, Any, List
6
- from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
7
- from langchain_openai import ChatOpenAI
8
- from langchain_core.output_parsers import PydanticOutputParser
9
- from pydantic import BaseModel, Field
10
- import json
11
-
12
- from .prompts import (
13
- SCRIPT_GENERATOR_SYSTEM_PROMPT,
14
- SCRIPT_GENERATOR_USER_PROMPT_TEMPLATE,
15
- ANALYZE_TRANSCRIPT_PROMPT_TEMPLATE,
16
- ANALYZE_TRANSCRIPTION_RESULTS_SYSTEM_PROMPT,
17
- ANALYZE_TRANSCRIPTION_RESULTS_USER_PROMPT_TEMPLATE,
18
- )
19
-
20
-
21
- class TranscriptionPlan(BaseModel):
22
- """Plan for transcribing audio files."""
23
-
24
- tools_to_use: List[str] = Field(description="List of tool names to use for transcription")
25
- processing_order: List[str] = Field(description="Order to process audio files")
26
- analysis_goals: List[str] = Field(description="What to analyze in the transcripts")
27
- expected_challenges: List[str] = Field(description="Potential issues to watch for")
28
- reasoning: str = Field(description="Reasoning for this transcription approach")
29
-
30
-
31
- class TranscriptionResults(BaseModel):
32
- """Results of transcription analysis."""
33
-
34
- success_files: List[str] = Field(description="Successfully transcribed files")
35
- failed_files: List[str] = Field(description="Files that failed to transcribe")
36
- insights: List[str] = Field(description="Key insights from the transcriptions")
37
- quality_assessment: str = Field(description="Assessment of transcription quality")
38
- recommendations: List[str] = Field(description="Recommendations for next steps")
39
-
40
-
41
- async def script_generator_node(state: Dict[str, Any], tools: list) -> Dict[str, Any]:
42
- """
43
- Intelligently generate transcripts using LLM-guided tool usage.
44
-
45
- The LLM analyzes the audio files and user request to:
46
- - Decide which transcription tools to use
47
- - Determine the best processing approach
48
- - Execute transcription with intelligent error handling
49
- - Analyze results and extract insights
50
- """
51
-
52
- audio_files = state.get("audio_files", [])
53
- user_request = state.get("user_request", "")
54
-
55
- if not audio_files:
56
- return create_no_files_response(state)
57
-
58
- # Get available transcription tools
59
- available_tools = get_transcription_tools(tools)
60
- if not available_tools:
61
- return create_no_tools_response(state)
62
-
63
- # LLM creates transcription plan
64
- transcription_plan = await create_transcription_plan_with_llm(
65
- audio_files, user_request, available_tools
66
- )
67
-
68
- # Execute transcription based on LLM plan
69
- scripts = {}
70
- errors = []
71
- completed_steps = state.get("completed_steps", [])
72
-
73
- for audio_file in transcription_plan.processing_order:
74
- if audio_file in audio_files: # Ensure file is in our list
75
- script_result = await execute_transcription_with_llm(
76
- audio_file, transcription_plan.tools_to_use, tools
77
- )
78
-
79
- if script_result["success"]:
80
- scripts[audio_file] = script_result["data"]
81
- completed_steps.append(f"✅ Transcribed: {audio_file.split('/')[-1]}")
82
- else:
83
- errors.append(script_result["error"])
84
- completed_steps.append(f"❌ Failed: {audio_file.split('/')[-1]}")
85
-
86
- # LLM analyzes results and provides insights
87
- analysis_results = await analyze_transcription_results_with_llm(
88
- scripts, errors, user_request, transcription_plan.analysis_goals
89
- )
90
-
91
- # Create comprehensive response
92
- response_message = create_transcription_response(
93
- scripts, errors, transcription_plan, analysis_results
94
- )
95
-
96
- messages = state.get("messages", [])
97
- messages.append(AIMessage(content=response_message))
98
-
99
- return {
100
- "scripts": scripts,
101
- "completed_steps": completed_steps,
102
- "errors": errors,
103
- "messages": messages,
104
- "processing_metadata": {
105
- "transcription_plan": transcription_plan.dict(),
106
- "analysis_results": analysis_results.dict()
107
- }
108
- }
109
-
110
-
111
- async def create_transcription_plan_with_llm(
112
- audio_files: List[str],
113
- user_request: str,
114
- available_tools: List[str]
115
- ) -> TranscriptionPlan:
116
- """Use LLM to create intelligent transcription plan."""
117
-
118
- system_message = create_transcription_planning_system_message()
119
- user_message = create_transcription_planning_user_message(audio_files, user_request, available_tools)
120
-
121
- llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
122
- parser = PydanticOutputParser(pydantic_object=TranscriptionPlan)
123
-
124
- prompt_messages = [
125
- system_message,
126
- HumanMessage(content=user_message),
127
- HumanMessage(content=parser.get_format_instructions())
128
- ]
129
-
130
- try:
131
- response = await llm.ainvoke(prompt_messages)
132
- plan = parser.parse(response.content)
133
- return plan
134
- except Exception as e:
135
- # Fallback plan
136
- return TranscriptionPlan(
137
- tools_to_use=available_tools[:2], # Use first 2 available tools
138
- processing_order=audio_files,
139
- analysis_goals=["Basic transcription", "Filler word detection"],
140
- expected_challenges=["Audio quality issues", "Multiple speakers"],
141
- reasoning="Fallback plan due to LLM planning failure"
142
- )
143
-
144
-
145
- async def execute_transcription_with_llm(
146
- audio_file: str,
147
- tools_to_use: List[str],
148
- available_tools: list
149
- ) -> Dict[str, Any]:
150
- """Execute transcription for a single file using planned tools."""
151
-
152
- # Find the actual tool objects
153
- tool_objects = {}
154
- for tool in available_tools:
155
- if tool.name in tools_to_use:
156
- tool_objects[tool.name] = tool
157
-
158
- transcript_data = {
159
- "transcript": "",
160
- "timestamps": [],
161
- "filler_words": [],
162
- "quality_score": 0.0
163
- }
164
-
165
- try:
166
- # Use update_transcription_info first if available
167
- if "update_transcription_info" in tool_objects:
168
- await tool_objects["update_transcription_info"].ainvoke({"audio_file": audio_file})
169
-
170
- # Use transcribe_audio_sync for main transcription
171
- if "transcribe_audio_sync" in tool_objects:
172
- transcript_result = await tool_objects["transcribe_audio_sync"].ainvoke({"audio_file": audio_file})
173
-
174
- # Process the transcript result
175
- if hasattr(transcript_result, 'content'):
176
- transcript_content = transcript_result.content
177
- else:
178
- transcript_content = str(transcript_result)
179
-
180
- # Use LLM to analyze the transcript
181
- analysis = await analyze_transcript_with_llm(transcript_content, audio_file)
182
-
183
- transcript_data.update({
184
- "transcript": transcript_content,
185
- "timestamps": analysis.get("timestamps", []),
186
- "filler_words": analysis.get("filler_words", []),
187
- "quality_score": analysis.get("quality_score", 0.5),
188
- "insights": analysis.get("insights", [])
189
- })
190
-
191
- return {"success": True, "data": transcript_data}
192
-
193
- else:
194
- return {"success": False, "error": f"No suitable transcription tool found for {audio_file}"}
195
-
196
- except Exception as e:
197
- return {"success": False, "error": f"Transcription failed for {audio_file}: {str(e)}"}
198
-
199
-
200
- async def analyze_transcript_with_llm(transcript_content: str, audio_file: str) -> Dict[str, Any]:
201
- """Use LLM to analyze transcript content and extract insights."""
202
-
203
- llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
204
-
205
- analysis_prompt = ANALYZE_TRANSCRIPT_PROMPT_TEMPLATE.format(
206
- audio_file=audio_file,
207
- transcript_content=transcript_content
208
- )
209
-
210
- try:
211
- response = await llm.ainvoke([SystemMessage(content=analysis_prompt)])
212
- # Try to parse as JSON
213
- analysis_data = json.loads(response.content)
214
- return analysis_data
215
- except Exception as e:
216
- # Fallback to simple analysis
217
- return {
218
- "timestamps": create_simple_timestamps(transcript_content),
219
- "filler_words": detect_simple_filler_words(transcript_content),
220
- "quality_score": 0.7,
221
- "insights": ["Basic transcript generated"],
222
- "speaker_analysis": "Unable to analyze speakers",
223
- "content_summary": transcript_content[:100] + "..." if len(transcript_content) > 100 else transcript_content
224
- }
225
-
226
-
227
- async def analyze_transcription_results_with_llm(
228
- scripts: Dict[str, Any],
229
- errors: List[str],
230
- user_request: str,
231
- analysis_goals: List[str]
232
- ) -> TranscriptionResults:
233
- """Use LLM to analyze overall transcription results."""
234
-
235
- system_message = SystemMessage(content=ANALYZE_TRANSCRIPTION_RESULTS_SYSTEM_PROMPT)
236
-
237
- script_details = json.dumps({
238
- k: {
239
- "length": len(v.get("transcript", "")),
240
- "filler_count": len(v.get("filler_words", [])),
241
- "quality": v.get("quality_score", 0)
242
- } for k, v in scripts.items()
243
- }, indent=2)
244
-
245
- user_message_content = ANALYZE_TRANSCRIPTION_RESULTS_USER_PROMPT_TEMPLATE.format(
246
- user_request=user_request,
247
- analysis_goals=", ".join(analysis_goals),
248
- success_count=len(scripts),
249
- failure_count=len(errors),
250
- errors=errors,
251
- script_details=script_details
252
- )
253
-
254
- llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
255
- parser = PydanticOutputParser(pydantic_object=TranscriptionResults)
256
-
257
- prompt_messages = [
258
- system_message,
259
- HumanMessage(content=user_message_content),
260
- HumanMessage(content=parser.get_format_instructions())
261
- ]
262
-
263
- try:
264
- response = await llm.ainvoke(prompt_messages)
265
- results = parser.parse(response.content)
266
- return results
267
- except Exception as e:
268
- # Fallback analysis
269
- return TranscriptionResults(
270
- success_files=list(scripts.keys()),
271
- failed_files=[f"Error occurred: {str(e)}"],
272
- insights=["Basic transcription completed"],
273
- quality_assessment="Unable to assess quality automatically",
274
- recommendations=["Proceed with standard audio processing"]
275
- )
276
-
277
-
278
- def get_transcription_tools(tools: list) -> List[str]:
279
- """Extract transcription tool names from available tools."""
280
- transcription_tool_names = []
281
-
282
- for tool in tools:
283
- if any(keyword in tool.name.lower() for keyword in ['transcribe', 'transcript']):
284
- transcription_tool_names.append(tool.name)
285
-
286
- return transcription_tool_names
287
-
288
-
289
- def create_transcription_planning_system_message() -> SystemMessage:
290
- """Create system message for transcription planning."""
291
- return SystemMessage(content=SCRIPT_GENERATOR_SYSTEM_PROMPT)
292
-
293
-
294
- def create_transcription_planning_user_message(audio_files: List[str], user_request: str, available_tools: List[str]) -> str:
295
- """Create user message for transcription planning."""
296
- file_list = "\n".join([f"- {file}" for file in audio_files])
297
- return SCRIPT_GENERATOR_USER_PROMPT_TEMPLATE.format(
298
- file_count=len(audio_files),
299
- file_list=file_list,
300
- user_request=user_request,
301
- available_tools=", ".join(available_tools)
302
- )
303
-
304
-
305
- def create_simple_timestamps(transcript: str) -> List[Dict[str, Any]]:
306
- """Create simple timestamp estimates for fallback."""
307
- timestamps = []
308
- lines = [line.strip() for line in transcript.split('\n') if line.strip()]
309
-
310
- for i, line in enumerate(lines):
311
- start_time = i * 3.0
312
- end_time = start_time + 3.0
313
- timestamps.append({
314
- "start": start_time,
315
- "end": end_time,
316
- "text": line
317
- })
318
-
319
- return timestamps
320
-
321
-
322
- def detect_simple_filler_words(transcript: str) -> List[Dict[str, Any]]:
323
- """Simple filler word detection for fallback."""
324
- filler_words = ["um", "uh", "like", "you know", "so", "well", "actually"]
325
- found_fillers = []
326
- words = transcript.lower().split()
327
-
328
- for i, word in enumerate(words):
329
- clean_word = word.strip('.,!?;:"()[]{}')
330
- if clean_word in filler_words:
331
- found_fillers.append({
332
- "word": clean_word,
333
- "position": i,
334
- "context": " ".join(words[max(0, i-2):min(len(words), i+3)])
335
- })
336
-
337
- return found_fillers
338
-
339
-
340
- def create_transcription_response(
341
- scripts: Dict[str, Any],
342
- errors: List[str],
343
- plan: TranscriptionPlan,
344
- analysis: TranscriptionResults
345
- ) -> str:
346
- """Create comprehensive transcription response message."""
347
-
348
- if not scripts and errors:
349
- return f"❌ **Transcription Failed**\n\n{chr(10).join(errors)}"
350
-
351
- response = "🎙️ **Intelligent Transcription Complete**\n\n"
352
-
353
- # Plan summary
354
- response += f"**📋 Strategy Used:** {plan.reasoning}\n\n"
355
-
356
- # Results summary
357
- response += f"**📊 Results:**\n"
358
- response += f"- ✅ Successfully transcribed: {len(scripts)} files\n"
359
- response += f"- ❌ Failed: {len(errors)} files\n\n"
360
-
361
- # File details
362
- if scripts:
363
- response += "**📝 Transcript Details:**\n"
364
- for file_url, script_data in scripts.items():
365
- filename = file_url.split('/')[-1] if '/' in file_url else file_url
366
- transcript_len = len(script_data.get("transcript", ""))
367
- filler_count = len(script_data.get("filler_words", []))
368
- quality = script_data.get("quality_score", 0)
369
-
370
- response += f"- **{filename}**: {transcript_len} chars, {filler_count} fillers, {quality:.1%} quality\n"
371
- response += "\n"
372
-
373
- # AI insights
374
- if analysis.insights:
375
- response += "**🤖 AI Insights:**\n"
376
- for insight in analysis.insights[:3]:
377
- response += f"- {insight}\n"
378
- response += "\n"
379
-
380
- # Next steps
381
- if analysis.recommendations:
382
- response += "**🎯 Recommendations:**\n"
383
- for rec in analysis.recommendations[:2]:
384
- response += f"- {rec}\n"
385
- response += "\n"
386
-
387
- response += "✅ **Ready for execution planning...**"
388
- return response
389
-
390
-
391
- def create_no_files_response(state: Dict[str, Any]) -> Dict[str, Any]:
392
- """Handle case when no audio files are provided."""
393
- messages = state.get("messages", [])
394
- messages.append(AIMessage(content="❌ **No Audio Files**: Please provide audio files to transcribe."))
395
-
396
- return {
397
- "scripts": {},
398
- "errors": ["No audio files provided for transcription"],
399
- "messages": messages
400
- }
401
-
402
-
403
- def create_no_tools_response(state: Dict[str, Any]) -> Dict[str, Any]:
404
- """Handle case when no transcription tools are available."""
405
- messages = state.get("messages", [])
406
- messages.append(AIMessage(content="❌ **Transcription Tools Unavailable**: Cannot proceed without transcription capabilities."))
407
-
408
- return {
409
- "scripts": {},
410
- "errors": ["No transcription tools available"],
411
- "messages": messages
412
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/validator.py CHANGED
@@ -1,272 +1,30 @@
1
- """
2
- Validator node for intelligent LLM-powered validation of processing results.
3
- """
4
-
5
- from typing import Dict, Any, List
6
- from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
7
  from langchain_openai import ChatOpenAI
8
- from langchain_core.output_parsers import PydanticOutputParser
9
- from pydantic import BaseModel, Field
10
- import datetime
11
- from .prompts import VALIDATOR_SYSTEM_PROMPT, VALIDATOR_USER_PROMPT_TEMPLATE
12
-
13
- class ValidationResults(BaseModel):
14
- """Structured validation results from LLM assessment."""
15
-
16
- overall_status: str = Field(description="Overall status: 'success', 'partial_success', or 'failed'")
17
- completion_rate: float = Field(description="Estimated completion rate as a decimal (0.0 to 1.0)")
18
- quality_score: float = Field(description="Quality assessment score (0.0 to 1.0)")
19
- critical_errors: List[str] = Field(description="List of critical errors that need addressing")
20
- warnings: List[str] = Field(description="List of warnings or minor issues")
21
- recommendations: List[str] = Field(description="Specific recommendations for improvement")
22
- needs_reprocessing: bool = Field(description="Whether reprocessing is recommended")
23
- success_indicators: List[str] = Field(description="What went well in the processing")
24
-
25
-
26
- async def validator_node(state: Dict[str, Any]) -> Dict[str, Any]:
27
- """
28
- Intelligently validate processing results using LLM assessment.
29
- """
30
-
31
- processed_files = state.get("processed_files", {})
32
- errors = state.get("errors", [])
33
- completed_steps = state.get("completed_steps", [])
34
- execution_plan = state.get("execution_plan", [])
35
- user_request = state.get("user_request", "")
36
- scripts = state.get("scripts", {})
37
- current_needs_reprocessing = state.get("needs_reprocessing", False)
38
-
39
- # Use LLM to perform intelligent validation
40
- validation_results = await perform_llm_validation(
41
- user_request=user_request,
42
- processed_files=processed_files,
43
- errors=errors,
44
- completed_steps=completed_steps,
45
- execution_plan=execution_plan,
46
- scripts=scripts,
47
- current_needs_reprocessing=current_needs_reprocessing
48
- )
49
-
50
- # Create validation summary
51
- validation_summary = create_validation_summary(validation_results)
52
- messages = state.get("messages", [])
53
- messages.append(AIMessage(content=validation_summary))
54
-
55
- return {
56
- "needs_reprocessing": validation_results.needs_reprocessing,
57
- "processing_metadata": {
58
- "validation_results": validation_results.model_dump(),
59
- "validation_timestamp": get_current_timestamp()
60
- },
61
- "messages": messages
62
- }
63
-
64
-
65
- async def perform_llm_validation(
66
- user_request: str,
67
- processed_files: Dict[str, str],
68
- errors: List[str],
69
- completed_steps: List[str],
70
- execution_plan: List[Dict[str, Any]],
71
- scripts: Dict[str, Any],
72
- current_needs_reprocessing: bool
73
- ) -> ValidationResults:
74
- """Use LLM to intelligently validate processing results."""
75
-
76
- # Create system message for validation
77
- system_message = create_validation_system_message()
78
-
79
- # Create user message with processing context
80
- user_message = create_validation_context_message(
81
- user_request, processed_files, errors, completed_steps,
82
- execution_plan, scripts, current_needs_reprocessing
83
- )
84
-
85
- # Set up LLM with structured output
86
- llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
87
- parser = PydanticOutputParser(pydantic_object=ValidationResults)
88
-
89
- prompt_messages = [
90
- system_message,
91
- HumanMessage(content=user_message),
92
- HumanMessage(content=parser.get_format_instructions())
93
- ]
94
-
95
- # Get LLM assessment
96
- try:
97
- response = await llm.ainvoke(prompt_messages)
98
- validation_results = parser.parse(response.content)
99
- return validation_results
100
- except Exception as e:
101
- # Fallback validation if LLM fails
102
- return create_fallback_validation(processed_files, errors, completed_steps, execution_plan)
103
-
104
-
105
- def create_validation_system_message() -> SystemMessage:
106
- """Create system message for LLM validation."""
107
- return SystemMessage(content=VALIDATOR_SYSTEM_PROMPT)
108
-
109
-
110
- def create_validation_context_message(
111
- user_request: str,
112
- processed_files: Dict[str, str],
113
- errors: List[str],
114
- completed_steps: List[str],
115
- execution_plan: List[Dict[str, Any]],
116
- scripts: Dict[str, Any],
117
- current_needs_reprocessing: bool
118
- ) -> str:
119
- """Create context message with all processing information."""
120
-
121
- plan_str = "\n".join([
122
- f"{i}. {step.get('tool', 'unknown')}: {step.get('description', 'No description')}"
123
- for i, step in enumerate(execution_plan, 1)
124
  ])
125
 
126
- completed_steps_str = "\n".join([f"- {step}" for step in completed_steps])
127
-
128
- if processed_files:
129
- processing_results_str = f"Successfully processed {len(processed_files)} file(s)\n"
130
- for original, processed in processed_files.items():
131
- filename = original.split('/')[-1] if '/' in original else original
132
- processed_name = processed.split('/')[-1] if '/' in processed else processed
133
- processing_results_str += f" • {filename} → {processed_name}\n"
134
- else:
135
- processing_results_str = "No files were successfully processed\n"
136
-
137
- if scripts:
138
- transcript_analysis_str = ""
139
- for file_url, script_data in scripts.items():
140
- filename = file_url.split('/')[-1] if '/' in file_url else file_url
141
- transcript = script_data.get("transcript", "")
142
- filler_count = len(script_data.get("filler_words", []))
143
- transcript_analysis_str += f"- {filename}: {len(transcript)} chars, {filler_count} filler words detected\n"
144
- else:
145
- transcript_analysis_str = "No transcript data available.\n"
146
-
147
- errors_str = "\n".join([f"- {error}" for error in errors]) if errors else "None"
148
-
149
- reprocessing_note_str = "\n**Note:** This is already a reprocessing attempt.\n" if current_needs_reprocessing else ""
150
-
151
- return VALIDATOR_USER_PROMPT_TEMPLATE.format(
152
- user_request=user_request,
153
- plan_steps=len(execution_plan),
154
- execution_plan=plan_str,
155
- completed_steps_count=len(completed_steps),
156
- completed_steps=completed_steps_str,
157
- processing_results=processing_results_str,
158
- transcript_analysis=transcript_analysis_str,
159
- error_count=len(errors),
160
- errors=errors_str,
161
- reprocessing_note=reprocessing_note_str
162
- )
163
-
164
-
165
- def create_fallback_validation(
166
- processed_files: Dict[str, str],
167
- errors: List[str],
168
- completed_steps: List[str],
169
- execution_plan: List[Dict[str, Any]]
170
- ) -> ValidationResults:
171
- """Create fallback validation if LLM assessment fails."""
172
-
173
- total_steps = len(execution_plan) if execution_plan else 1
174
- successful_steps = len([step for step in completed_steps if step.startswith("✅")])
175
- completion_rate = successful_steps / total_steps if total_steps > 0 else 0
176
-
177
- has_processed_files = len(processed_files) > 0
178
- has_critical_errors = any("failed" in error.lower() or "error" in error.lower() for error in errors)
179
-
180
- if has_processed_files and completion_rate >= 0.7:
181
- overall_status = "success"
182
- quality_score = 0.8
183
- elif has_processed_files and completion_rate >= 0.3:
184
- overall_status = "partial_success"
185
- quality_score = 0.5
186
- else:
187
- overall_status = "failed"
188
- quality_score = 0.2
189
-
190
- return ValidationResults(
191
- overall_status=overall_status,
192
- completion_rate=completion_rate,
193
- quality_score=quality_score,
194
- critical_errors=errors if has_critical_errors else [],
195
- warnings=errors if not has_critical_errors else [],
196
- recommendations=[
197
- "Check processing logs for detailed error information",
198
- "Verify audio file formats and accessibility",
199
- "Consider simplifying the processing request"
200
- ],
201
- needs_reprocessing=has_critical_errors and completion_rate > 0.1 and completion_rate < 0.8,
202
- success_indicators=["Some processing steps completed"] if completed_steps else []
203
  )
204
 
 
205
 
206
- def create_validation_summary(validation_results: ValidationResults) -> str:
207
- """Create a human-readable summary of validation results."""
208
-
209
- # Status emoji mapping
210
- status_emoji = {
211
- "success": "✅",
212
- "partial_success": "⚠️",
213
- "failed": "❌"
214
- }
215
-
216
- emoji = status_emoji.get(validation_results.overall_status, "❓")
217
-
218
- summary = f"{emoji} **Intelligent Validation Results**\n\n"
219
-
220
- # Overall assessment
221
- summary += f"**Overall Status**: {validation_results.overall_status.replace('_', ' ').title()}\n"
222
- summary += f"**Completion Rate**: {validation_results.completion_rate:.1%}\n"
223
- summary += f"**Quality Score**: {validation_results.quality_score:.1%}\n\n"
224
-
225
- # Success indicators
226
- if validation_results.success_indicators:
227
- summary += "**✨ What Went Well:**\n"
228
- for indicator in validation_results.success_indicators:
229
- summary += f"- {indicator}\n"
230
- summary += "\n"
231
-
232
- # Critical errors
233
- if validation_results.critical_errors:
234
- summary += f"**🚨 Critical Issues ({len(validation_results.critical_errors)}):**\n"
235
- for error in validation_results.critical_errors[:3]:
236
- summary += f"- {error}\n"
237
- if len(validation_results.critical_errors) > 3:
238
- summary += f"- ... and {len(validation_results.critical_errors) - 3} more\n"
239
- summary += "\n"
240
-
241
- # Warnings
242
- if validation_results.warnings:
243
- summary += f"**⚠️ Warnings ({len(validation_results.warnings)}):**\n"
244
- for warning in validation_results.warnings[:2]:
245
- summary += f"- {warning}\n"
246
- if len(validation_results.warnings) > 2:
247
- summary += f"- ... and {len(validation_results.warnings) - 2} more\n"
248
- summary += "\n"
249
-
250
- # Recommendations
251
- if validation_results.recommendations:
252
- summary += "**🎯 Recommendations:**\n"
253
- for rec in validation_results.recommendations[:4]:
254
- summary += f"- {rec}\n"
255
- if len(validation_results.recommendations) > 4:
256
- summary += f"- ... and {len(validation_results.recommendations) - 4} more\n"
257
- summary += "\n"
258
-
259
- # Reprocessing decision
260
- if validation_results.needs_reprocessing:
261
- summary += "🔄 **Reprocessing Recommended**: The LLM assessment suggests reprocessing could improve results."
262
  else:
263
- if validation_results.overall_status == "success":
264
- summary += "🎉 **Processing Complete**: High-quality results achieved!"
265
- else:
266
- summary += "⏹️ **Processing Complete**: Reprocessing not recommended based on current assessment."
267
-
268
- return summary
269
-
270
- def get_current_timestamp() -> str:
271
- """Get current timestamp for metadata."""
272
- return datetime.datetime.now().isoformat()
 
 
 
 
 
 
 
1
  from langchain_openai import ChatOpenAI
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from src.state import AgentState
4
+ from operator import itemgetter
5
+ from langchain_core.runnables import RunnableParallel
6
+
7
+ def validator_node(state: AgentState) -> AgentState:
8
+ llm = ChatOpenAI(model="gpt-4.1")
9
+ llm = llm.with_structured_output(AgentState)
10
+
11
+ prompt = ChatPromptTemplate.from_messages([
12
+ ("system", "You are validator that checks the steps taken and output if something is wrong. Give feedback to flow."),
13
+ ("user", "Current state: {state}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ])
15
 
16
+ chain = (
17
+ RunnableParallel({
18
+ "state": itemgetter("state")
19
+ })
20
+ | prompt
21
+ | llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  )
23
 
24
+ return chain.invoke({"state": state})
25
 
26
+ def validator_node_router(state: AgentState) -> str:
27
+ if state.validator_feedback == "":
28
+ return "chat"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  else:
30
+ return "planner"
 
 
 
 
 
 
 
 
 
src/state.py CHANGED
@@ -1,46 +1,11 @@
1
- """
2
- Graph state definition for the audio processing agent.
3
- """
4
 
5
- from typing import List, Dict, Any, Optional, Annotated
6
- from langchain_core.messages import BaseMessage
7
- from langgraph.graph.message import add_messages
8
-
9
-
10
- class AudioProcessingState:
11
- """State schema for the audio processing graph."""
12
-
13
- # Chat history
14
- messages: Annotated[List[BaseMessage], add_messages]
15
-
16
- # Audio files provided by user
17
- audio_files: List[str] # URLs or paths to audio files
18
-
19
- # User's processing request
20
- user_request: str
21
-
22
- # Processing type determined by router
23
- processing_type: str # "chat", "audio_processing", "dialogue_generation"
24
-
25
- # Generated scripts with timestamps
26
- scripts: Dict[str, Any] # {file_url: {transcript: str, timestamps: List}}
27
-
28
- # Execution plan created by planner
29
- execution_plan: List[Dict[str, Any]] # List of tool calls with parameters
30
-
31
- # Processing results
32
- processed_files: Dict[str, str] # {original_url: processed_url}
33
-
34
- # Processing steps completed
35
- completed_steps: List[str]
36
-
37
- # Final output
38
- final_audio_url: Optional[str]
39
- final_response: str
40
-
41
- # Error handling
42
- errors: List[str]
43
- needs_reprocessing: bool
44
-
45
- # Metadata
46
- processing_metadata: Dict[str, Any]
 
1
+ from pydantic import BaseModel, Field
 
 
2
 
3
+ class AgentState(BaseModel):
4
+ steps_details: list[str] = Field(description="The steps that have been completed.", default=[])
5
+ user_input: str = Field(description="The user's input.", default="")
6
+ plan: str = Field(description="The plan for the user.", default="")
7
+ final_response: str = Field(description="The final response to the user.", default="")
8
+ requires_processing: bool = Field(description="Whether the response requires detailed audio processing.", default=False)
9
+ validator_feedback: str = Field(description="The feedback from the validator. Indicates steps must be taken again.", default="")
10
+ input_audio_files: list[str] = Field(description="The input audio files.", default=[])
11
+ output_audio_files: list[str] = Field(description="The output audio files.", default=[])