Spaces:

Agents-MCP-Hackathon
/

Audio-Agent

Sleeping

App Files Files Community

YigitSekerci commited on Jun 7, 2025

Commit

757decb

1 Parent(s): cc75613

simplify agent

Browse files

Files changed (16) hide show

agent_graph.png +0 -0
flow.svg +0 -1
poetry.lock +250 -3
pyproject.toml +2 -1
src/agent.py +46 -264
src/nodes/__init__.py +0 -19
src/nodes/audio_processor.py +0 -257
src/nodes/chat.py +24 -171
src/nodes/final_response.py +0 -299
src/nodes/planner.py +21 -308
src/nodes/processor.py +24 -0
src/nodes/prompts.py +0 -440
src/nodes/router.py +0 -167
src/nodes/script_generator.py +0 -412
src/nodes/validator.py +23 -265
src/state.py +10 -45

agent_graph.png ADDED Viewed

flow.svg DELETED Viewed

poetry.lock CHANGED Viewed

@@ -45,6 +45,22 @@ doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)",
 test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""]
 trio = ["trio (>=0.26.1)"]
 [[package]]
 name = "audioop-lts"
 version = "0.2.1"
@@ -306,12 +322,24 @@ description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["main"]
-markers = "platform_system == \"Windows\""
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
 [[package]]
 name = "distro"
 version = "1.9.0"
@@ -338,6 +366,21 @@ files = [
 [package.dependencies]
 python-dotenv = "*"
 [[package]]
 name = "fastapi"
 version = "0.115.12"
@@ -729,6 +772,73 @@ files = [
 [package.extras]
 all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -1162,6 +1272,21 @@ files = [
     {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
 ]
 [[package]]
 name = "mcp"
 version = "1.9.3"
@@ -1523,6 +1648,38 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d
 test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
 xml = ["lxml (>=4.9.2)"]
 [[package]]
 name = "pillow"
 version = "11.2.1"
@@ -1623,6 +1780,49 @@ tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "ole
 typing = ["typing-extensions ; python_version < \"3.10\""]
 xmp = ["defusedxml"]
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -1813,7 +2013,6 @@ description = "Pygments is a syntax highlighting package written in Python."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "sys_platform != \"emscripten\""
 files = [
     {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
     {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
@@ -2317,6 +2516,26 @@ examples = ["aiosqlite (>=0.21.0)", "fastapi (>=0.115.12)", "sqlalchemy[asyncio,
 granian = ["granian (>=2.3.1)"]
 uvicorn = ["uvicorn (>=0.34.0)"]
 [[package]]
 name = "starlette"
 version = "0.46.2"
@@ -2433,6 +2652,22 @@ notebook = ["ipywidgets (>=6)"]
 slack = ["slack-sdk"]
 telegram = ["requests"]
 [[package]]
 name = "typer"
 version = "0.16.0"
@@ -2529,6 +2764,18 @@ h11 = ">=0.8"
 [package.extras]
 standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"]
 [[package]]
 name = "websockets"
 version = "15.0.1"
@@ -2857,4 +3104,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.13,<4.0"
-content-hash = "b0e69a9374ac8a038b59c00da37a793818e65a8e0b3601442de9a9758bea100b"

 test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""]
 trio = ["trio (>=0.26.1)"]
+[[package]]
+name = "asttokens"
+version = "3.0.0"
+description = "Annotate AST trees with source code positions"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2"},
+    {file = "asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7"},
+]
+[package.extras]
+astroid = ["astroid (>=2,<4)"]
+test = ["astroid (>=2,<4)", "pytest", "pytest-cov", "pytest-xdist"]
 [[package]]
 name = "audioop-lts"
 version = "0.2.1"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["main"]
+markers = "platform_system == \"Windows\" or sys_platform == \"win32\""
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
+[[package]]
+name = "decorator"
+version = "5.2.1"
+description = "Decorators for Humans"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"},
+    {file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"},
+]
 [[package]]
 name = "distro"
 version = "1.9.0"
 [package.dependencies]
 python-dotenv = "*"
+[[package]]
+name = "executing"
+version = "2.2.0"
+description = "Get the currently executing AST node of a frame, and other information"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa"},
+    {file = "executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755"},
+]
+[package.extras]
+tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""]
 [[package]]
 name = "fastapi"
 version = "0.115.12"
 [package.extras]
 all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+[[package]]
+name = "ipython"
+version = "9.3.0"
+description = "IPython: Productive Interactive Computing"
+optional = false
+python-versions = ">=3.11"
+groups = ["main"]
+files = [
+    {file = "ipython-9.3.0-py3-none-any.whl", hash = "sha256:1a0b6dd9221a1f5dddf725b57ac0cb6fddc7b5f470576231ae9162b9b3455a04"},
+    {file = "ipython-9.3.0.tar.gz", hash = "sha256:79eb896f9f23f50ad16c3bc205f686f6e030ad246cc309c6279a242b14afe9d8"},
+]
+[package.dependencies]
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+decorator = "*"
+ipython-pygments-lexers = "*"
+jedi = ">=0.16"
+matplotlib-inline = "*"
+pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""}
+prompt_toolkit = ">=3.0.41,<3.1.0"
+pygments = ">=2.4.0"
+stack_data = "*"
+traitlets = ">=5.13.0"
+[package.extras]
+all = ["ipython[doc,matplotlib,test,test-extra]"]
+black = ["black"]
+doc = ["docrepr", "exceptiongroup", "intersphinx_registry", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinx_toml (==0.0.4)", "typing_extensions"]
+matplotlib = ["matplotlib"]
+test = ["packaging", "pytest", "pytest-asyncio (<0.22)", "testpath"]
+test-extra = ["curio", "ipykernel", "ipython[test]", "jupyter_ai", "matplotlib (!=3.2.0)", "nbclient", "nbformat", "numpy (>=1.23)", "pandas", "trio"]
+[[package]]
+name = "ipython-pygments-lexers"
+version = "1.1.1"
+description = "Defines a variety of Pygments lexers for highlighting IPython code."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c"},
+    {file = "ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81"},
+]
+[package.dependencies]
+pygments = "*"
+[[package]]
+name = "jedi"
+version = "0.19.2"
+description = "An autocompletion tool for Python that can be used for text editors."
+optional = false
+python-versions = ">=3.6"
+groups = ["main"]
+files = [
+    {file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"},
+    {file = "jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0"},
+]
+[package.dependencies]
+parso = ">=0.8.4,<0.9.0"
+[package.extras]
+docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx (==1.8.5)", "sphinx-rtd-theme (==0.4.3)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"]
+qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
+testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"]
 [[package]]
 name = "jinja2"
 version = "3.1.6"
     {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
 ]
+[[package]]
+name = "matplotlib-inline"
+version = "0.1.7"
+description = "Inline Matplotlib backend for Jupyter"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"},
+    {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"},
+]
+[package.dependencies]
+traitlets = "*"
 [[package]]
 name = "mcp"
 version = "1.9.3"
 test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
 xml = ["lxml (>=4.9.2)"]
+[[package]]
+name = "parso"
+version = "0.8.4"
+description = "A Python Parser"
+optional = false
+python-versions = ">=3.6"
+groups = ["main"]
+files = [
+    {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"},
+    {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"},
+]
+[package.extras]
+qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
+testing = ["docopt", "pytest"]
+[[package]]
+name = "pexpect"
+version = "4.9.0"
+description = "Pexpect allows easy control of interactive console applications."
+optional = false
+python-versions = "*"
+groups = ["main"]
+markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""
+files = [
+    {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"},
+    {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"},
+]
+[package.dependencies]
+ptyprocess = ">=0.5"
 [[package]]
 name = "pillow"
 version = "11.2.1"
 typing = ["typing-extensions ; python_version < \"3.10\""]
 xmp = ["defusedxml"]
+[[package]]
+name = "prompt-toolkit"
+version = "3.0.51"
+description = "Library for building powerful interactive command lines in Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07"},
+    {file = "prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed"},
+]
+[package.dependencies]
+wcwidth = "*"
+[[package]]
+name = "ptyprocess"
+version = "0.7.0"
+description = "Run a subprocess in a pseudo terminal"
+optional = false
+python-versions = "*"
+groups = ["main"]
+markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""
+files = [
+    {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
+    {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
+]
+[[package]]
+name = "pure-eval"
+version = "0.2.3"
+description = "Safely evaluate AST nodes without side effects"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"},
+    {file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"},
+]
+[package.extras]
+tests = ["pytest"]
 [[package]]
 name = "pycparser"
 version = "2.22"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
     {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
     {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
 granian = ["granian (>=2.3.1)"]
 uvicorn = ["uvicorn (>=0.34.0)"]
+[[package]]
+name = "stack-data"
+version = "0.6.3"
+description = "Extract data from python stack frames and tracebacks for informative displays"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"},
+    {file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"},
+]
+[package.dependencies]
+asttokens = ">=2.1.0"
+executing = ">=1.2.0"
+pure-eval = "*"
+[package.extras]
+tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
 [[package]]
 name = "starlette"
 version = "0.46.2"
 slack = ["slack-sdk"]
 telegram = ["requests"]
+[[package]]
+name = "traitlets"
+version = "5.14.3"
+description = "Traitlets Python configuration system"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"},
+    {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"},
+]
+[package.extras]
+docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
+test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"]
 [[package]]
 name = "typer"
 version = "0.16.0"
 [package.extras]
 standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"]
+[[package]]
+name = "wcwidth"
+version = "0.2.13"
+description = "Measures the displayed width of unicode strings in a terminal"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
+    {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
+]
 [[package]]
 name = "websockets"
 version = "15.0.1"
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.13,<4.0"
+content-hash = "1c1c843aa68874643d9202518e0a9f2b71885314c4007df9916b81ffb66a7d0d"

pyproject.toml CHANGED Viewed

@@ -13,7 +13,8 @@ dependencies = [
     "langchain-openai (>=0.3.21,<0.4.0)",
     "langchain-mcp-adapters (>=0.1.7,<0.2.0)",
     "dotenv (>=0.9.9,<0.10.0)",
-    "langchain (>=0.3.25,<0.4.0)"
 ]

     "langchain-openai (>=0.3.21,<0.4.0)",
     "langchain-mcp-adapters (>=0.1.7,<0.2.0)",
     "dotenv (>=0.9.9,<0.10.0)",
+    "langchain (>=0.3.25,<0.4.0)",
+    "ipython (>=9.3.0,<10.0.0)"
 ]

src/agent.py CHANGED Viewed

@@ -1,77 +1,16 @@
 import asyncio
-from typing import Dict, Any, TypedDict, Annotated, List
 from dotenv import load_dotenv
-from langchain_core.messages import BaseMessage, AIMessage
 from langchain_mcp_adapters.client import MultiServerMCPClient
-from langgraph.graph import StateGraph, END
-from langgraph.graph.message import add_messages
-from langgraph.checkpoint.memory import MemorySaver
-from .nodes import (
-    router_node,
-    script_generator_node,
-    planner_node,
-    audio_processor_node,
-    validator_node,
-    final_response_node
-)
-from .nodes.chat import chat_node
-class AudioProcessingState(TypedDict):
-    """State schema for the audio processing graph."""
-    # Chat history
-    messages: Annotated[List[BaseMessage], add_messages]
-    # Audio files provided by user
-    audio_files: List[str]
-    # User's processing request
-    user_request: str
-    # Processing type determined by router
-    processing_type: str
-    # Generated scripts with timestamps
-    scripts: Dict[str, Any]
-    # Execution plan created by planner
-    execution_plan: List[Dict[str, Any]]
-    # Processing results
-    processed_files: Dict[str, str]
-    # Processing steps completed
-    completed_steps: List[str]
-    # Final output
-    final_audio_url: str
-    final_response: str
-    # Error handling
-    errors: List[str]
-    needs_reprocessing: bool
-    # Metadata
-    processing_metadata: Dict[str, Any]
 class AudioAgent:
-    """
-    Advanced LangGraph-based audio processing agent with custom nodes.
-    Handles audio file processing through a sophisticated workflow:
-    1. Router - Determines processing type
-    2. Chat or Audio Processing Pipeline
-    3. Script Generation - Creates timestamped transcripts
-    4. Planning - Creates execution plan
-    5. Processing - Executes audio tools
-    6. Validation - Checks results and determines reprocessing
-    7. Final Response - Formats output for user
-    """
     def __init__(
         self,
         model_name: str = "gpt-4o",
@@ -80,229 +19,72 @@ class AudioAgent:
         load_dotenv()
         self.model_name = model_name
         self.server_url = server_url
-        # SSE client for audio tools
         self._client = MultiServerMCPClient({
             "audio-tools": {"url": self.server_url, "transport": "sse"}
         })
-        self._graph = None
-        self._tools = []
     @property
     def is_initialized(self) -> bool:
-        return self._graph is not None
-    async def initialize(self) -> None:
-        """Initialize the LangGraph workflow with audio tools."""
-        if self.is_initialized:
-            return
-        # Get tools from MCP server
-        self._tools = await self._client.get_tools()
-        if not self._tools:
-            raise RuntimeError("No tools available from MCP server")
-        # Build the graph
-        self._graph = self._build_graph()
-    def _build_graph(self) -> StateGraph:
-        """Build the LangGraph workflow."""
-        # Create the state graph
-        workflow = StateGraph(AudioProcessingState)
-        # Add nodes
-        workflow.add_node("router", self._router_async)
-        workflow.add_node("chat", self._chat_with_tools)
-        workflow.add_node("script_generator", self._script_generator_with_tools)
-        workflow.add_node("planner", self._planner_async)
-        workflow.add_node("audio_processor", self._audio_processor_with_tools)
-        workflow.add_node("validator", self._validator_async)
-        workflow.add_node("response_formatter", self._final_response_async)
-        # Set entry point
-        workflow.set_entry_point("router")
-        # Add conditional edges based on processing type
-        workflow.add_conditional_edges(
-            "router",
-            self._route_processing_type,
             {
-                "chat": "chat",
-                "audio_processing": "script_generator",
-                "dialogue_generation": "script_generator"
             }
         )
-        # Chat flow
-        workflow.add_edge("chat", "response_formatter")
-        # Audio processing flow
-        workflow.add_edge("script_generator", "planner")
-        workflow.add_edge("planner", "audio_processor")
-        workflow.add_edge("audio_processor", "validator")
-        # Validation flow with conditional reprocessing
-        workflow.add_conditional_edges(
             "validator",
-            self._check_reprocessing_need,
             {
-                "reprocess": "planner",  # Go back to planning
-                "complete": "response_formatter"
             }
         )
-        # Final response leads to end
-        workflow.add_edge("response_formatter", END)
-        # Compile with memory for conversation history
-        memory = MemorySaver()
-        return workflow.compile(checkpointer=memory)
-    async def _chat_with_tools(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Chat node with tools access."""
-        return await chat_node(state, self._tools)
-    async def _script_generator_with_tools(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Script generator node with tools access."""
-        return await script_generator_node(state, self._tools)
-    async def _audio_processor_with_tools(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Audio processor node with tools access."""
-        return await audio_processor_node(state, self._tools)
-    async def _validator_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Async validator node wrapper."""
-        return await validator_node(state)
-    async def _router_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Async router node wrapper."""
-        return await router_node(state)
-    async def _planner_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Async planner node wrapper."""
-        return await planner_node(state)
-    async def _final_response_async(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Async final response node wrapper."""
-        return await final_response_node(state)
-    def _route_processing_type(self, state: Dict[str, Any]) -> str:
-        """Route based on processing type."""
-        return state.get("processing_type", "chat")
-    def _check_reprocessing_need(self, state: Dict[str, Any]) -> str:
-        """Check if reprocessing is needed."""
-        if state.get("needs_reprocessing", False):
-            return "reprocess"
-        return "complete"
-    def process_user_input(self, user_input: str) -> Dict[str, Any]:
-        """Process user input and create initial state."""
-        from langchain_core.messages import HumanMessage
-        return {
-            "messages": [HumanMessage(content=user_input)],
-            "audio_files": [],
-            "user_request": "",
-            "processing_type": "",
-            "scripts": {},
-            "execution_plan": [],
-            "processed_files": {},
-            "completed_steps": [],
-            "final_audio_url": "",
-            "final_response": "",
-            "errors": [],
-            "needs_reprocessing": False,
-            "processing_metadata": {}
-        }
-    async def chat(self, prompt: str) -> Dict[str, Any]:
-        """
-        One-shot chat: returns the full processing result.
-        """
-        if not self.is_initialized:
-            await self.initialize()
-        config = {"configurable": {"thread_id": "audio_agent_session"}}
-        initial_state = self.process_user_input(prompt)
-        result = await self._graph.ainvoke(initial_state, config)
-        return result
-    async def stream_chat(self, prompt: str):
-        """
-        Streaming chat: yields intermediate results as processing continues.
-        """
-        if not self.is_initialized:
-            await self.initialize()
-        config = {"configurable": {"thread_id": "audio_agent_session"}}
-        initial_state = self.process_user_input(prompt)
-        # Special handling for chat-only requests to enable streaming
-        processing_type = None
-        # First, run the router to determine processing type
-        router_result = await self._graph.ainvoke(initial_state, config)
-        processing_type = router_result.get("processing_type", "")
-        if processing_type == "chat":
-            # For chat requests, use direct streaming from the chat node
-            from .nodes.chat import stream_chat_response
-            messages = initial_state.get("messages", [])
-            accumulated_content = ""
-            async for chunk in stream_chat_response(messages, self._tools):
-                accumulated_content += chunk
-                yield chunk, "chat"
-            # Update the state with the final response
-            final_state = router_result.copy()
-            final_state["messages"].append(AIMessage(content=accumulated_content))
-            final_state["final_response"] = accumulated_content
-        else:
-            # For audio processing, use the normal graph streaming
-            async for chunk in self._graph.astream(initial_state, config):
-                # Extract the node name and content
-                for node_name, node_output in chunk.items():
-                    if node_name == "__end__":
-                        continue
-                    # Get the latest message if available
-                    messages = node_output.get("messages", [])
-                    if messages and hasattr(messages[-1], 'content'):
-                        content = messages[-1].content
-                        if content:
-                            yield content, node_name
-                    # Also yield final audio URL if available
-                    final_audio_url = node_output.get("final_audio_url", "")
-                    if final_audio_url:
-                        yield f"\n🎵 **Audio Ready**: [{final_audio_url}]({final_audio_url})", node_name
 async def main():
     """Test the agent with various scenarios."""
     agent = AudioAgent()
-    # Test 1: Chat about capabilities
-    print("=== Test 1: Chat Query ===")
-    result = await agent.chat("What audio tools are available?")
-    print("Final Response:", result.get("final_response", ""))
-    print()
-    # Test 2: Audio processing request
-    print("=== Test 2: Audio Processing ===")
-    audio_request = "Process this audio file https://example.com/audio.mp3 - remove filler words and normalize volume"
-    print("Streaming response:")
-    async for content, node in agent.stream_chat(audio_request):
-        print(f"[{node}] {content[:100]}..." if len(content) > 100 else f"[{node}] {content}")
-    print()
 if __name__ == "__main__":
     asyncio.run(main())

 import asyncio
 from dotenv import load_dotenv
 from langchain_mcp_adapters.client import MultiServerMCPClient
+from langgraph.graph import StateGraph, END, START
+from .state import AgentState
+from .nodes.chat import chat_node, chat_node_router
+from .nodes.planner import planner_node
+from .nodes.processor import processor_node
+from .nodes.validator import validator_node, validator_node_router
 class AudioAgent:
     def __init__(
         self,
         model_name: str = "gpt-4o",
         load_dotenv()
         self.model_name = model_name
         self.server_url = server_url
+        self.graph = None
         self._client = MultiServerMCPClient({
             "audio-tools": {"url": self.server_url, "transport": "sse"}
         })
     @property
     def is_initialized(self) -> bool:
+        return self.graph is not None
+    async def _build_graph(self) -> None:
+        """Build the LangGraph workflow."""
+        _graph = StateGraph(AgentState)
+        _graph.add_node("chat", chat_node)
+        _graph.add_conditional_edges(
+            "chat",
+            chat_node_router,
             {
+                "planner": "planner",
+                "end": END
             }
         )
+        _graph.add_node("planner", planner_node)
+        _graph.add_edge("planner", "audio_processor")
+        _graph.add_node("audio_processor", processor_node)
+        _graph.add_edge("audio_processor", "validator")
+        _graph.add_node("validator", validator_node)
+        _graph.add_conditional_edges(
             "validator",
+            validator_node_router,
             {
+                "chat": "chat",
+                "planner": "planner"
             }
         )
+        _graph.add_edge(START, "chat")
+        _graph.add_edge("chat", END)
+        self.graph = _graph.compile()
+    async def initialize(self) -> None:
+        """Initialize the LangGraph workflow with audio tools."""
+        if self.is_initialized:
+            return
+        self.tools = await self._client.get_tools()
+        if not self.tools:
+            raise RuntimeError("No tools available from MCP server")
+        await self._build_graph()
 async def main():
     """Test the agent with various scenarios."""
     agent = AudioAgent()
+    await agent.initialize()
+    res = agent.graph.invoke({
+        "user_input": "I want to edit my audio file",
+    })
+    print(res)
 if __name__ == "__main__":
     asyncio.run(main())

src/nodes/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-"""
-Audio processing graph nodes.
-"""
-from .router import router_node
-from .script_generator import script_generator_node
-from .planner import planner_node
-from .audio_processor import audio_processor_node
-from .validator import validator_node
-from .final_response import final_response_node
-__all__ = [
-    "router_node",
-    "script_generator_node",
-    "planner_node",
-    "audio_processor_node",
-    "validator_node",
-    "final_response_node"
-]

src/nodes/audio_processor.py DELETED Viewed

@@ -1,257 +0,0 @@
-"""
-Intelligent LLM-powered audio processor for executing planned processing steps.
-"""
-from typing import Dict, Any, List
-from langchain_core.messages import AIMessage, SystemMessage
-from langchain_openai import ChatOpenAI
-import json
-import re
-from .prompts import (
-    LLM_PROCESSING_DECISION_PROMPT_TEMPLATE,
-    LLM_PROCESSING_SUMMARY_PROMPT_TEMPLATE,
-)
-async def audio_processor_node(state: Dict[str, Any], tools: list) -> Dict[str, Any]:
-    """
-    Execute audio processing plan with intelligent LLM-guided decisions.
-    """
-    execution_plan = state.get("execution_plan", [])
-    user_request = state.get("user_request", "")
-    processed_files = state.get("processed_files", {})
-    completed_steps = state.get("completed_steps", [])
-    errors = state.get("errors", [])
-    if not execution_plan:
-        return create_no_plan_response(state)
-    # Create tool lookup
-    tool_lookup = {tool.name: tool for tool in tools}
-    # Track current file URLs through processing
-    current_file_urls = {}
-    # Execute plan with LLM guidance
-    for i, step in enumerate(execution_plan):
-        # Get LLM decision for this step
-        should_execute, adapted_params = await get_llm_processing_decision(
-            step, user_request, current_file_urls, completed_steps, errors, list(tool_lookup.keys())
-        )
-        if not should_execute:
-            completed_steps.append(f"⏭️ Skipped: {step.get('description', 'unknown')}")
-            continue
-        # Execute the step
-        step_result = await execute_processing_step(
-            step, adapted_params, tool_lookup, current_file_urls
-        )
-        if step_result["success"]:
-            if step_result["new_file_url"]:
-                original_file = step_result["original_file"]
-                current_file_urls[original_file] = step_result["new_file_url"]
-                processed_files[original_file] = step_result["new_file_url"]
-            completed_steps.append(f"✅ {step_result['description']}")
-        else:
-            errors.append(step_result["error"])
-            completed_steps.append(f"❌ Failed: {step.get('description', 'unknown')}")
-    # Create processing summary
-    processing_summary = await create_llm_processing_summary(
-        user_request, completed_steps, errors, processed_files
-    )
-    messages = state.get("messages", [])
-    messages.append(AIMessage(content=processing_summary))
-    return {
-        "processed_files": processed_files,
-        "completed_steps": completed_steps,
-        "errors": errors,
-        "needs_reprocessing": len(errors) > 0 and len(completed_steps) > 0,
-        "final_audio_url": get_primary_output_file(processed_files),
-        "messages": messages
-    }
-async def get_llm_processing_decision(
-    step: Dict[str, Any],
-    user_request: str,
-    current_file_urls: Dict[str, str],
-    completed_steps: List[str],
-    errors: List[str],
-    available_tools: List[str]
-) -> tuple:
-    """Use LLM to decide whether to execute step and with what parameters."""
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
-    prompt = LLM_PROCESSING_DECISION_PROMPT_TEMPLATE.format(
-        tool_name=step.get('tool', 'unknown'),
-        description=step.get('description', 'No description'),
-        planned_parameters=json.dumps(step.get('params', {}), indent=2),
-        user_request=user_request,
-        completed_steps_count=len(completed_steps),
-        error_count=len(errors),
-        available_tools=', '.join(available_tools),
-        current_file_urls=json.dumps(current_file_urls, indent=2),
-        recent_activity="\n".join(completed_steps[-3:]) if completed_steps else "No steps completed yet"
-    )
-    try:
-        response = await llm.ainvoke([SystemMessage(content=prompt)])
-        content = response.content.strip()
-        if content.startswith("SKIP"):
-            return False, {}
-        elif content.startswith("EXECUTE"):
-            lines = content.split('\n')
-            if len(lines) > 1 and lines[1].strip() != "NO_CHANGES":
-                try:
-                    adapted_params = json.loads(lines[1])
-                    return True, adapted_params
-                except json.JSONDecodeError:
-                    return True, {}
-            return True, {}
-        else:
-            return True, {}  # Default to execute if unclear
-    except Exception as e:
-        return True, {}  # Default to execute on error
-async def execute_processing_step(
-    step: Dict[str, Any],
-    adapted_params: Dict[str, Any],
-    tool_lookup: Dict[str, Any],
-    current_file_urls: Dict[str, str]
-) -> Dict[str, Any]:
-    """Execute a processing step with the given parameters."""
-    tool_name = step.get("tool", "")
-    params = step.get("params", {}).copy()
-    params.update(adapted_params)  # Apply LLM adaptations
-    if tool_name not in tool_lookup:
-        return {
-            "success": False,
-            "error": f"Tool '{tool_name}' not available",
-            "description": f"Failed to find tool {tool_name}",
-            "original_file": params.get("audio_file", ""),
-            "new_file_url": None
-        }
-    try:
-        # Update file URL if this file has been processed before
-        original_file = params.get("audio_file", "")
-        if original_file in current_file_urls:
-            params["audio_file"] = current_file_urls[original_file]
-        # Execute the tool
-        tool = tool_lookup[tool_name]
-        result = await tool.ainvoke(params)
-        # Extract new file URL from result
-        new_file_url = extract_file_url_from_result(result, params["audio_file"])
-        return {
-            "success": True,
-            "description": f"{tool_name}: {step.get('description', '')}",
-            "original_file": original_file,
-            "new_file_url": new_file_url if new_file_url != params["audio_file"] else None,
-            "result": result
-        }
-    except Exception as e:
-        return {
-            "success": False,
-            "error": f"{tool_name} failed: {str(e)}",
-            "description": f"Failed {tool_name}",
-            "original_file": params.get("audio_file", ""),
-            "new_file_url": None
-        }
-async def create_llm_processing_summary(
-    user_request: str,
-    completed_steps: List[str],
-    errors: List[str],
-    processed_files: Dict[str, str]
-) -> str:
-    """Create LLM-generated processing summary."""
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
-    prompt = LLM_PROCESSING_SUMMARY_PROMPT_TEMPLATE.format(
-        user_request=user_request,
-        completed_steps_count=len(completed_steps),
-        error_count=len(errors),
-        processed_files_count=len(processed_files),
-        step_details="\n".join(completed_steps[-5:]) if completed_steps else "No steps completed",
-        processed_files=json.dumps(processed_files, indent=2) if processed_files else "No files processed",
-        errors="\n".join(errors) if errors else "No errors"
-    )
-    try:
-        response = await llm.ainvoke([SystemMessage(content=prompt)])
-        return f"🎛️ **Processing Summary**\n\n{response.content}"
-    except Exception as e:
-        # Fallback summary
-        if processed_files:
-            return f"🎛️ **Processing Complete**\n\nSuccessfully processed {len(processed_files)} file(s) with {len(completed_steps)} steps completed."
-        else:
-            return f"⚠️ **Processing Issues**\n\nEncountered {len(errors)} error(s) during processing. Please check the issues above."
-def extract_file_url_from_result(result, original_file: str) -> str:
-    """Extract the new file URL from tool result."""
-    if hasattr(result, 'artifact') and result.artifact:
-        if hasattr(result.artifact, 'url'):
-            return result.artifact.url
-        elif hasattr(result.artifact, 'path'):
-            return result.artifact.path
-    if hasattr(result, 'content'):
-        content = result.content
-        # Look for URLs in the content
-        url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+\.(mp3|wav|m4a|flac|aac|ogg)'
-        urls = re.findall(url_pattern, content, re.IGNORECASE)
-        if urls:
-            return urls[0]
-    return original_file
-def get_primary_output_file(processed_files: Dict[str, str]) -> str:
-    """Get the primary output file URL."""
-    if not processed_files:
-        return ""
-    # If there's a combined file, prioritize that
-    for original, processed in processed_files.items():
-        if "combined" in processed.lower():
-            return processed
-    # Otherwise return the first processed file
-    return list(processed_files.values())[0]
-def create_no_plan_response(state: Dict[str, Any]) -> Dict[str, Any]:
-    """Handle case when no execution plan is available."""
-    messages = state.get("messages", [])
-    messages.append(AIMessage(content="❌ **No Execution Plan**: Cannot process audio without a plan."))
-    return {
-        "processed_files": {},
-        "completed_steps": [],
-        "errors": ["No execution plan available"],
-        "messages": messages,
-        "needs_reprocessing": False
-    }

src/nodes/chat.py CHANGED Viewed

@@ -1,181 +1,34 @@
-"""
-Chat node for handling general questions and conversations using streaming LLM.
-"""
-from typing import Dict, Any, List, AsyncGenerator
-from langchain_core.messages import AIMessage, SystemMessage
 from langchain_openai import ChatOpenAI
-from .prompts import (
-    CHAT_SYSTEM_PROMPT_BASE,
-    CHAT_SYSTEM_PROMPT_TOOLS_HEADER,
-    CHAT_SYSTEM_PROMPT_GUIDELINES,
-)
-# Export the streaming function for direct use
-__all__ = ["chat_node", "stream_chat_response"]
-async def chat_node(state: Dict[str, Any], tools: List = None) -> Dict[str, Any]:
     """
-    Handle general chat messages and questions using streaming LLM with tool awareness.
-    Returns the complete response after streaming is done.
     """
-    messages = state.get("messages", [])
-    if not messages:
-        return {
-            "messages": messages,
-            "final_response": "No messages to process."
-        }
-    # Generate streaming response and collect it
-    response_content = ""
-    async for chunk in stream_chat_response(messages, tools):
-        response_content += chunk
-    # Add AI response to messages
-    messages.append(AIMessage(content=response_content))
-    return {
-        "messages": messages,
-        "final_response": response_content
-    }
-async def stream_chat_response(messages: List, tools: List = None) -> AsyncGenerator[str, None]:
-    """
-    Stream chat response chunks as they're generated by the LLM.
-    This is the core streaming function that yields content incrementally.
-    """
-    # Create system message with tool information
-    system_message = create_system_message_with_tools(tools or [])
-    # Prepare messages for LLM
-    llm_messages = [system_message] + messages
-    # Initialize LLM
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.7)
-    response_content = ""
-    full_response = None
-    # Stream the main response
-    if tools:
-        llm_with_tools = llm.bind_tools(tools)
-        async for chunk in llm_with_tools.astream(llm_messages):
-            if chunk.content:
-                response_content += chunk.content
-                yield chunk.content  # Yield each chunk as it comes
-            # Keep track of the complete response for tool calls
-            full_response = chunk
-    else:
-        async for chunk in llm.astream(llm_messages):
-            if chunk.content:
-                response_content += chunk.content
-                yield chunk.content  # Yield each chunk as it comes
-            full_response = chunk
-    # Handle tool calls if any (after main streaming is complete)
-    if full_response and hasattr(full_response, 'tool_calls') and full_response.tool_calls:
-        tool_results_content = await handle_tool_calls(full_response, tools)
-        if tool_results_content:
-            yield tool_results_content
-def create_system_message_with_tools(tools: List) -> SystemMessage:
-    """Create a comprehensive system message that includes tool information."""
-    # Basic system prompt
-    system_content = CHAT_SYSTEM_PROMPT_BASE
-    # Add tool descriptions if available
-    if tools:
-        system_content += CHAT_SYSTEM_PROMPT_TOOLS_HEADER
-        for tool in tools:
-            tool_name = getattr(tool, 'name', 'Unknown Tool')
-            tool_description = getattr(tool, 'description', 'No description available')
-            # Get tool parameters
-            tool_args = getattr(tool, 'args_schema', None)
-            if tool_args and hasattr(tool_args, 'schema'):
-                schema = tool_args.schema()
-                properties = schema.get('properties', {})
-                system_content += f"\n**{tool_name}**:\n"
-                system_content += f"- Description: {tool_description}\n"
-                if properties:
-                    system_content += "- Parameters:\n"
-                    for param_name, param_info in properties.items():
-                        param_type = param_info.get('type', 'unknown')
-                        param_desc = param_info.get('description', 'No description')
-                        system_content += f"  • {param_name} ({param_type}): {param_desc}\n"
-                system_content += "\n"
-    system_content += CHAT_SYSTEM_PROMPT_GUIDELINES
-    return SystemMessage(content=system_content)
-async def handle_tool_calls(response, tools: List) -> str:
-    """Handle tool calls made by the LLM during chat."""
-    tool_lookup = {tool.name: tool for tool in tools}
-    tool_results = []
-    for tool_call in response.tool_calls:
-        tool_name = tool_call["name"]
-        tool_args = tool_call["args"]
-        if tool_name in tool_lookup:
-            try:
-                tool = tool_lookup[tool_name]
-                result = await tool.ainvoke(tool_args)
-                # Format the tool result for display
-                tool_result_text = format_tool_result(tool_name, tool_args, result)
-                tool_results.append(tool_result_text)
-            except Exception as e:
-                error_msg = f"❌ Tool '{tool_name}' failed: {str(e)}"
-                tool_results.append(error_msg)
-        else:
-            error_msg = f"❌ Tool '{tool_name}' not available"
-            tool_results.append(error_msg)
-    return "\n\n" + "\n\n".join(tool_results) if tool_results else ""
-def format_tool_result(tool_name: str, tool_args: Dict[str, Any], result) -> str:
-    """Format tool execution results for display in chat."""
-    formatted_result = f"\n\n🔧 **Tool Demo: {tool_name}**\n"
-    # Show parameters used
-    if tool_args:
-        formatted_result += "**Parameters used:**\n"
-        for key, value in tool_args.items():
-            formatted_result += f"- {key}: {value}\n"
-    # Show result
-    formatted_result += "\n**Result:**\n"
-    if hasattr(result, 'content'):
-        formatted_result += f"{result.content}"
-    elif hasattr(result, 'artifact'):
-        if hasattr(result.artifact, 'url'):
-            formatted_result += f"🎵 Audio processed: {result.artifact.url}"
-        else:
-            formatted_result += f"{result.artifact}"
     else:
-        formatted_result += f"{str(result)}"
-    return formatted_result

 from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnableParallel
+from src.state import AgentState
+from operator import itemgetter
+def chat_node(state: AgentState) -> AgentState:
     """
+    Handle general questions and conversations using streaming LLM.
     """
+    llm = ChatOpenAI(model="gpt-4.1")
+    llm = llm.with_structured_output(AgentState)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a helpful assistant that can answer questions and help with tasks."),
+        ("user", "Current state: {state}")
+    ])
+    chain = (
+        RunnableParallel({
+            "state": itemgetter("state")
+        })
+        | prompt
+        | llm
+    )
+    return chain.invoke({"state": state})
+def chat_node_router(state: AgentState) -> str:
+    if state.requires_processing:
+        return "audio_processor"
     else:
+        return "end"

src/nodes/final_response.py DELETED Viewed

@@ -1,299 +0,0 @@
-"""
-Intelligent LLM-powered final response formatter for comprehensive user communication.
-"""
-from typing import Dict, Any, List
-from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
-from langchain_openai import ChatOpenAI
-from langchain_core.output_parsers import PydanticOutputParser
-from pydantic import BaseModel, Field
-import json
-from .prompts import (
-    FINAL_RESPONSE_SYSTEM_PROMPT,
-    FINAL_RESPONSE_USER_PROMPT_TEMPLATE,
-)
-class FinalResponse(BaseModel):
-    """Structured final response from LLM analysis."""
-    response_title: str = Field(description="Engaging title for the response")
-    main_message: str = Field(description="Primary message about what was accomplished")
-    processed_files_summary: List[str] = Field(description="Summary of each processed file with download info")
-    key_improvements: List[str] = Field(description="Key improvements and enhancements made")
-    quality_assessment: str = Field(description="Assessment of final quality and success")
-    user_recommendations: List[str] = Field(description="Personalized recommendations for the user")
-    next_steps: str = Field(description="Suggested next steps or call to action")
-    technical_summary: str = Field(description="Brief technical summary of what was done")
-async def final_response_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Generate intelligent, personalized final response using LLM analysis.
-    The LLM creates a comprehensive response that:
-    - Summarizes what was accomplished
-    - Highlights key improvements and results
-    - Provides download links for processed files
-    - Offers personalized recommendations
-    - Suggests appropriate next steps
-    """
-    processing_type = state.get("processing_type", "")
-    processed_files = state.get("processed_files", {})
-    scripts = state.get("scripts", {})
-    errors = state.get("errors", [])
-    processing_metadata = state.get("processing_metadata", {})
-    user_request = state.get("user_request", "")
-    completed_steps = state.get("completed_steps", [])
-    execution_plan = state.get("execution_plan", [])
-    # For chat responses, use existing final_response
-    if processing_type == "chat":
-        final_response = state.get("final_response", "")
-        if not final_response:
-            # Generate a chat response if none exists
-            final_response = await create_chat_final_response(user_request, processing_metadata)
-    else:
-        # Generate intelligent audio processing response
-        llm_response = await create_intelligent_final_response_with_llm(
-            user_request, processing_type, processed_files, scripts,
-            errors, processing_metadata, completed_steps, execution_plan
-        )
-        final_response = format_llm_response(llm_response, processed_files)
-    # Add final response to messages if not already present
-    messages = state.get("messages", [])
-    if not any(msg.content == final_response for msg in messages if hasattr(msg, 'content')):
-        messages.append(AIMessage(content=final_response))
-    # Set final audio URL if available
-    final_audio_url = get_final_audio_url(processed_files, processing_type)
-    return {
-        "final_response": final_response,
-        "final_audio_url": final_audio_url,
-        "messages": messages
-    }
-async def create_intelligent_final_response_with_llm(
-    user_request: str,
-    processing_type: str,
-    processed_files: Dict[str, str],
-    scripts: Dict[str, Any],
-    errors: List[str],
-    processing_metadata: Dict[str, Any],
-    completed_steps: List[str],
-    execution_plan: List[Dict[str, Any]]
-) -> FinalResponse:
-    """Use LLM to create intelligent, personalized final response."""
-    system_message = create_final_response_system_message()
-    user_message = create_final_response_user_message(
-        user_request, processing_type, processed_files, scripts,
-        errors, processing_metadata, completed_steps, execution_plan
-    )
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
-    parser = PydanticOutputParser(pydantic_object=FinalResponse)
-    prompt_messages = [
-        SystemMessage(content=system_message.content),
-        HumanMessage(content=user_message),
-        HumanMessage(content=parser.get_format_instructions())
-    ]
-    try:
-        response = await llm.ainvoke(prompt_messages)
-        final_response = parser.parse(response.content)
-        return final_response
-    except Exception as e:
-        # Fallback response
-        return create_fallback_final_response(user_request, processed_files, errors)
-def create_final_response_system_message() -> SystemMessage:
-    """Create system message for final response generation."""
-    return SystemMessage(content=FINAL_RESPONSE_SYSTEM_PROMPT)
-def create_final_response_user_message(
-    user_request: str,
-    processing_type: str,
-    processed_files: Dict[str, str],
-    scripts: Dict[str, Any],
-    errors: List[str],
-    processing_metadata: Dict[str, Any],
-    completed_steps: List[str],
-    execution_plan: List[Dict[str, Any]]
-) -> str:
-    """Create user message for final response generation."""
-    # Analyze processing context
-    processing_summary = analyze_processing_context(
-        processed_files, scripts, processing_metadata, completed_steps
-    )
-    return FINAL_RESPONSE_USER_PROMPT_TEMPLATE.format(
-        user_request=user_request,
-        processing_type=processing_type,
-        processed_files_count=len(processed_files),
-        completed_steps_count=len(completed_steps),
-        error_count=len(errors),
-        processed_files=json.dumps(processed_files, indent=2) if processed_files else "No files processed",
-        processing_summary=processing_summary,
-        plan_steps=len(execution_plan),
-        tools_used=list(set([step.get('tool', 'unknown') for step in execution_plan])),
-        completed_steps="\n".join(completed_steps[-5:]) if completed_steps else "No steps completed",
-        errors="\n".join(errors) if errors else "No errors encountered",
-        processing_metadata=json.dumps(processing_metadata, indent=2) if processing_metadata else "No additional metadata"
-    )
-def analyze_processing_context(
-    processed_files: Dict[str, str],
-    scripts: Dict[str, Any],
-    processing_metadata: Dict[str, Any],
-    completed_steps: List[str]
-) -> str:
-    """Analyze processing context to inform final response."""
-    analysis = "**Processing Analysis:**\n"
-    # File analysis
-    analysis += f"- Files processed: {len(processed_files)}\n"
-    if processed_files:
-        for original, processed in processed_files.items():
-            original_name = original.split('/')[-1] if '/' in original else original
-            processed_name = processed.split('/')[-1] if '/' in processed else processed
-            analysis += f"  • {original_name} → {processed_name}\n"
-    # Script analysis
-    if scripts:
-        total_transcript_length = sum(len(script.get("transcript", "")) for script in scripts.values())
-        total_filler_words = sum(len(script.get("filler_words", [])) for script in scripts.values())
-        analysis += f"- Total transcript length: {total_transcript_length} characters\n"
-        analysis += f"- Filler words detected: {total_filler_words}\n"
-    # Quality assessment
-    quality_score = processing_metadata.get("quality_score", 0)
-    if quality_score > 0:
-        analysis += f"- Estimated quality score: {quality_score:.1%}\n"
-    # Processing insights
-    final_analysis = processing_metadata.get("final_analysis", {})
-    if final_analysis:
-        analysis += f"- AI assessment: {final_analysis.get('success_assessment', 'N/A')}\n"
-        quality_improvements = final_analysis.get("quality_improvements", [])
-        if quality_improvements:
-            analysis += f"- Key improvements: {', '.join(quality_improvements[:3])}\n"
-    # Step analysis
-    successful_steps = len([step for step in completed_steps if step.startswith("✅")])
-    analysis += f"- Successful steps: {successful_steps}/{len(completed_steps)}\n"
-    return analysis
-def create_fallback_final_response(
-    user_request: str,
-    processed_files: Dict[str, str],
-    errors: List[str]
-) -> FinalResponse:
-    """Create fallback response if LLM generation fails."""
-    if processed_files:
-        return FinalResponse(
-            response_title="Audio Processing Complete",
-            main_message=f"Successfully processed {len(processed_files)} audio file(s) according to your request.",
-            processed_files_summary=[f"{original.split('/')[-1]}: [Download]({processed})" for original, processed in processed_files.items()],
-            key_improvements=["Audio processing completed", "Files enhanced and optimized"],
-            quality_assessment="Processing completed successfully",
-            user_recommendations=["Download your processed files", "Review the results"],
-            next_steps="Your enhanced audio files are ready for download. Let me know if you need any adjustments!",
-            technical_summary=f"Applied audio processing workflow to {len(processed_files)} file(s)"
-        )
-    else:
-        return FinalResponse(
-            response_title="Processing Attempt Complete",
-            main_message="Audio processing encountered some challenges.",
-            processed_files_summary=[],
-            key_improvements=[],
-            quality_assessment="Processing was not successful",
-            user_recommendations=["Check your audio file URLs", "Try a simpler processing request"],
-            next_steps="Please check the errors above and try again with valid audio files.",
-            technical_summary=f"Processing attempted but encountered {len(errors)} error(s)"
-        )
-def format_llm_response(llm_response: FinalResponse, processed_files: Dict[str, str]) -> str:
-    """Format the LLM response into final markdown response."""
-    response = f"🎵 **{llm_response.response_title}**\n\n"
-    # Main message
-    response += f"{llm_response.main_message}\n\n"
-    # Processed files with actual download links
-    if processed_files:
-        response += "**🎵 Your Processed Audio Files:**\n"
-        for original, processed in processed_files.items():
-            filename = original.split('/')[-1] if '/' in original else original
-            response += f"- **{filename}**: [Download]({processed})\n"
-        response += "\n"
-    # Key improvements
-    if llm_response.key_improvements:
-        response += "**✨ Key Improvements:**\n"
-        for improvement in llm_response.key_improvements:
-            response += f"- {improvement}\n"
-        response += "\n"
-    # Quality assessment
-    response += f"**🎯 Quality Assessment:** {llm_response.quality_assessment}\n\n"
-    # Recommendations
-    if llm_response.user_recommendations:
-        response += "**💡 Recommendations:**\n"
-        for rec in llm_response.user_recommendations:
-            response += f"- {rec}\n"
-        response += "\n"
-    # Technical summary
-    if llm_response.technical_summary:
-        response += f"**🔧 Technical Summary:** {llm_response.technical_summary}\n\n"
-    # Next steps
-    response += f"**🚀 Next Steps:** {llm_response.next_steps}"
-    return response
-async def create_chat_final_response(user_request: str, processing_metadata: Dict[str, Any]) -> str:
-    """Create final response for chat interactions."""
-    # For chat, create a simple acknowledgment
-    return f"I've provided information about our audio processing capabilities. Is there anything specific you'd like to know more about or any audio files you'd like me to help process?"
-def get_final_audio_url(processed_files: Dict[str, str], processing_type: str) -> str:
-    """Get the final audio URL to return to the user."""
-    if not processed_files:
-        return ""
-    # For dialogue generation, look for combined file
-    if processing_type == "dialogue_generation":
-        for original, processed in processed_files.items():
-            if "combined" in processed or "dialogue" in processed:
-                return processed
-    # For single file processing, return the processed file
-    if len(processed_files) == 1:
-        return list(processed_files.values())[0]
-    # For multiple files, return the first one
-    return list(processed_files.values())[0] if processed_files else ""

src/nodes/planner.py CHANGED Viewed

@@ -1,311 +1,24 @@
-"""
-Intelligent LLM-powered planner for creating optimal audio processing execution plans.
-"""
-from typing import Dict, Any, List
-from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
 from langchain_openai import ChatOpenAI
-from langchain_core.output_parsers import PydanticOutputParser
-from pydantic import BaseModel, Field
-import json
-from .prompts import PLANNER_SYSTEM_PROMPT, PLANNER_USER_PROMPT_TEMPLATE
-class ExecutionStep(BaseModel):
-    """Single step in the execution plan."""
-    step_id: str = Field(description="Unique identifier for this step")
-    tool_name: str = Field(description="Name of the tool to use")
-    parameters: Dict[str, Any] = Field(description="Parameters for the tool")
-    description: str = Field(description="Human-readable description of what this step does")
-    reasoning: str = Field(description="Why this step is needed")
-    priority: str = Field(description="Priority level: high, medium, low")
-class ExecutionPlan(BaseModel):
-    """Complete execution plan for audio processing."""
-    steps: List[ExecutionStep] = Field(description="Ordered list of execution steps")
-    strategy: str = Field(description="Overall strategy and approach")
-    expected_outcomes: List[str] = Field(description="What outcomes to expect from this plan")
-    estimated_duration: str = Field(description="Estimated time to complete")
-    risks_and_mitigations: List[str] = Field(description="Potential issues and how to handle them")
-async def planner_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Create intelligent execution plan using LLM analysis.
-    The LLM analyzes:
-    - User request and intent
-    - Available audio files and their characteristics
-    - Transcript insights and quality
-    - Available tools and capabilities
-    - Processing type and requirements
-    """
-    user_request = state.get("user_request", "")
-    audio_files = state.get("audio_files", [])
-    scripts = state.get("scripts", {})
-    processing_type = state.get("processing_type", "")
-    processing_metadata = state.get("processing_metadata", {})
-    if not audio_files:
-        return create_no_files_plan_response(state)
-    # Use LLM to create intelligent execution plan
-    execution_plan = await create_execution_plan_with_llm(
-        user_request, audio_files, scripts, processing_type, processing_metadata
-    )
-    # Convert to the format expected by audio processor
-    formatted_plan = convert_plan_to_execution_format(execution_plan)
-    # Create plan summary message
-    plan_summary = create_plan_summary_message(execution_plan, formatted_plan)
-    messages = state.get("messages", [])
-    messages.append(AIMessage(content=plan_summary))
-    return {
-        "execution_plan": formatted_plan,
-        "messages": messages,
-        "processing_metadata": {
-            **processing_metadata,
-            "execution_strategy": execution_plan.strategy,
-            "expected_outcomes": execution_plan.expected_outcomes,
-            "plan_metadata": execution_plan.dict()
-        }
-    }
-async def create_execution_plan_with_llm(
-    user_request: str,
-    audio_files: List[str],
-    scripts: Dict[str, Any],
-    processing_type: str,
-    processing_metadata: Dict[str, Any]
-) -> ExecutionPlan:
-    """Use LLM to create intelligent execution plan."""
-    system_message = create_planning_system_message()
-    user_message_content = create_planning_user_message(
-        user_request, audio_files, scripts, processing_type, processing_metadata
-    )
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
-    parser = PydanticOutputParser(pydantic_object=ExecutionPlan)
-    prompt_messages = [
-        system_message,
-        HumanMessage(content=user_message_content),
-        HumanMessage(content=parser.get_format_instructions())
-    ]
-    try:
-        response = await llm.ainvoke(prompt_messages)
-        plan = parser.parse(response.content)
-        return plan
-    except Exception as e:
-        # Fallback to simple plan
-        return create_fallback_execution_plan(user_request, audio_files, processing_type)
-def create_planning_system_message() -> SystemMessage:
-    """Create system message for execution planning."""
-    return SystemMessage(content=PLANNER_SYSTEM_PROMPT)
-def create_planning_user_message(
-    user_request: str,
-    audio_files: List[str],
-    scripts: Dict[str, Any],
-    processing_type: str,
-    processing_metadata: Dict[str, Any]
-) -> str:
-    """Create user message for execution planning."""
-    # Analyze transcript data
-    transcript_summary = analyze_transcript_data(scripts)
-    file_list = "\n".join([f"- {file.split('/')[-1]}" for file in audio_files])
-    processing_context = json.dumps(processing_metadata, indent=2) if processing_metadata else "No additional context"
-    return PLANNER_USER_PROMPT_TEMPLATE.format(
-        user_request=user_request,
-        processing_type=processing_type,
-        file_count=len(audio_files),
-        file_list=file_list,
-        transcript_summary=transcript_summary,
-        processing_context=processing_context,
-    )
-def analyze_transcript_data(scripts: Dict[str, Any]) -> str:
-    """Analyze transcript data to inform planning decisions."""
-    if not scripts:
-        return "No transcript data available"
-    summary = ""
-    total_filler_words = 0
-    quality_scores = []
-    insights = []
-    for file_url, script_data in scripts.items():
-        filename = file_url.split('/')[-1] if '/' in file_url else file_url
-        transcript = script_data.get("transcript", "")
-        filler_words = script_data.get("filler_words", [])
-        quality_score = script_data.get("quality_score", 0)
-        file_insights = script_data.get("insights", [])
-        total_filler_words += len(filler_words)
-        if quality_score > 0:
-            quality_scores.append(quality_score)
-        insights.extend(file_insights)
-        summary += f"\n- **{filename}**: {len(transcript)} chars, {len(filler_words)} fillers"
-        if quality_score > 0:
-            summary += f", {quality_score:.1%} quality"
-    # Overall analysis
-    avg_quality = sum(quality_scores) / len(quality_scores) if quality_scores else 0
-    summary += f"\n\n**Overall Analysis:**"
-    summary += f"\n- Total filler words across all files: {total_filler_words}"
-    summary += f"\n- Average transcript quality: {avg_quality:.1%}" if avg_quality > 0 else ""
-    if insights:
-        summary += f"\n- Key insights: {', '.join(insights[:3])}"
-    # Planning recommendations
-    if total_filler_words > 10:
-        summary += f"\n- **Recommendation**: High filler word count suggests need for silence trimming and cutting"
-    if avg_quality < 0.7:
-        summary += f"\n- **Recommendation**: Lower quality transcript suggests audio may need normalization"
-    return summary
-def convert_plan_to_execution_format(execution_plan: ExecutionPlan) -> List[Dict[str, Any]]:
-    """Convert LLM execution plan to format expected by audio processor."""
-    formatted_steps = []
-    for step in execution_plan.steps:
-        formatted_step = {
-            "step": step.step_id,
-            "tool": step.tool_name,
-            "params": step.parameters,
-            "description": step.description,
-            "reasoning": step.reasoning,
-            "priority": step.priority
-        }
-        formatted_steps.append(formatted_step)
-    return formatted_steps
-def create_fallback_execution_plan(
-    user_request: str,
-    audio_files: List[str],
-    processing_type: str
-) -> ExecutionPlan:
-    """Create fallback execution plan if LLM planning fails."""
-    steps = []
-    for i, audio_file in enumerate(audio_files):
-        # Basic processing steps
-        steps.extend([
-            ExecutionStep(
-                step_id=f"update_info_{i}",
-                tool_name="update_audio_info",
-                parameters={"audio_file": audio_file},
-                description=f"Update audio information",
-                reasoning="Essential for proper file handling",
-                priority="high"
-            ),
-            ExecutionStep(
-                step_id=f"trim_silence_{i}",
-                tool_name="apply_silence_trimming",
-                parameters={"audio_file": audio_file, "threshold_db": -40},
-                description="Remove silence and quiet sections",
-                reasoning="Improves audio quality and reduces file size",
-                priority="medium"
-            ),
-            ExecutionStep(
-                step_id=f"normalize_{i}",
-                tool_name="apply_normalization",
-                parameters={"audio_file": audio_file, "target_level": -3},
-                description="Normalize audio levels",
-                reasoning="Ensures consistent volume levels",
-                priority="medium"
-            )
-        ])
-    return ExecutionPlan(
-        steps=steps,
-        strategy="Fallback plan: basic audio enhancement with silence removal and normalization",
-        expected_outcomes=["Cleaner audio", "Consistent levels", "Reduced file size"],
-        estimated_duration="2-5 minutes",
-        risks_and_mitigations=["Minimal risk with basic processing steps"]
     )
-def create_plan_summary_message(execution_plan: ExecutionPlan, formatted_plan: List[Dict[str, Any]]) -> str:
-    """Create comprehensive plan summary message."""
-    summary = "🎯 **Intelligent Execution Plan Created**\n\n"
-    # Strategy
-    summary += f"**📋 Strategy:** {execution_plan.strategy}\n\n"
-    # Plan overview
-    summary += f"**📊 Plan Overview:**\n"
-    summary += f"- Total steps: {len(execution_plan.steps)}\n"
-    summary += f"- Estimated duration: {execution_plan.estimated_duration}\n"
-    # Priority breakdown
-    high_priority = len([s for s in execution_plan.steps if s.priority == "high"])
-    medium_priority = len([s for s in execution_plan.steps if s.priority == "medium"])
-    low_priority = len([s for s in execution_plan.steps if s.priority == "low"])
-    summary += f"- Priority breakdown: {high_priority} high, {medium_priority} medium, {low_priority} low\n\n"
-    # Key steps
-    summary += "**🔧 Key Processing Steps:**\n"
-    for i, step in enumerate(execution_plan.steps[:5], 1):  # Show first 5 steps
-        summary += f"{i}. **{step.tool_name}**: {step.description}\n"
-    if len(execution_plan.steps) > 5:
-        summary += f"... and {len(execution_plan.steps) - 5} more steps\n"
-    summary += "\n"
-    # Expected outcomes
-    if execution_plan.expected_outcomes:
-        summary += "**🎯 Expected Outcomes:**\n"
-        for outcome in execution_plan.expected_outcomes[:3]:
-            summary += f"- {outcome}\n"
-        summary += "\n"
-    # Risks and mitigations
-    if execution_plan.risks_and_mitigations:
-        summary += "**⚠️ Risk Management:**\n"
-        for risk in execution_plan.risks_and_mitigations[:2]:
-            summary += f"- {risk}\n"
-        summary += "\n"
-    summary += "✅ **Ready to execute intelligent plan...**"
-    return summary
-def create_no_files_plan_response(state: Dict[str, Any]) -> Dict[str, Any]:
-    """Handle case when no audio files are available for planning."""
-    messages = state.get("messages", [])
-    messages.append(AIMessage(content="❌ **No Planning Possible**: No audio files available to process."))
-    return {
-        "execution_plan": [],
-        "messages": messages,
-        "errors": ["No audio files available for execution planning"]
-    }

 from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnableParallel
+from src.state import AgentState
+from operator import itemgetter
+def planner_node(state: AgentState) -> AgentState:
+    llm = ChatOpenAI(model="gpt-4.1")
+    llm = llm.with_structured_output(AgentState)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are planner that finds what user wants to do and how can we achieve it. Generate a comprehensive plan for the user."),
+        ("user", "{state}")
+    ])
+    chain = (
+        RunnableParallel({
+            "state": itemgetter("state")
+        })
+        | prompt
+        | llm
     )
+    return chain.invoke({"state": state})

src/nodes/processor.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from src.state import AgentState
+from operator import itemgetter
+from langchain_core.runnables import RunnableParallel
+def processor_node(state: AgentState) -> AgentState:
+    llm = ChatOpenAI(model="gpt-4.1")
+    llm = llm.with_structured_output(AgentState)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are processor that processes the plan and generates a final response to the user."),
+        ("user", "Current state: {state}")
+    ])
+    chain = (
+        RunnableParallel({
+            "state": itemgetter("state")
+        })
+        | prompt
+        | llm
+    )
+    return chain.invoke({"state": state})

src/nodes/prompts.py DELETED Viewed

@@ -1,440 +0,0 @@
-PLANNER_SYSTEM_PROMPT = """You are an expert audio processing strategist and execution planner. Your job is to create optimal, step-by-step execution plans for audio processing tasks.
-**Available Audio Processing Tools:**
-1. **Information & Metadata Tools:**
-   - update_audio_info: Updates general audio file information
-   - update_duration_info: Updates audio duration and timing information
-   - update_transcription_info: Updates transcription-related metadata
-2. **Core Processing Tools:**
-   - process_cut_audio: Cuts/trims audio to specific time ranges (params: audio_file, _start_time, _end_time)
-   - apply_normalization: Normalizes audio levels (params: audio_file, target_level)
-   - apply_volume_adjustment: Adjusts volume by gain amount (params: audio_file, gain_db)
-   - apply_speed_adjustment: Changes playback speed (params: audio_file, speed_factor)
-   - apply_fades: Adds fade in/out effects (params: audio_file, fade_in_ms, fade_out_ms)
-   - apply_reverse: Reverses audio playback (params: audio_file)
-   - apply_silence_trimming: Removes silence/quiet sections (params: audio_file, threshold_db)
-**Planning Principles:**
-1. **Context-Aware**: Consider the user's specific goals, not just keywords
-2. **Quality-First**: Prioritize steps that will most improve the final result
-3. **Efficient**: Order steps logically to minimize processing time and quality loss
-4. **Robust**: Include metadata updates and error-handling steps
-5. **Adaptive**: Tailor approach based on transcript insights and file characteristics
-**Step Ordering Best Practices:**
-- Start with metadata updates (audio_info, duration_info)
-- Apply destructive edits first (cutting, trimming)
-- Then apply enhancement (normalization, volume, speed)
-- Finish with aesthetic touches (fades, effects)
-**User Intent Analysis:**
-- "Clean up" / "improve" = silence trimming + normalization + possible filler removal
-- "Remove filler words" = intelligent cutting based on transcript analysis
-- "Cut" / "trim" = precise time-based cutting
-- "Louder" / "quieter" = volume adjustment
-- "Faster" / "slower" = speed adjustment
-- "Professional" = normalization + fades + silence trimming
-Be intelligent about combining the user's explicit requests with transcript insights to create a comprehensive plan that achieves their goals."""
-PLANNER_USER_PROMPT_TEMPLATE = """
-**Planning Request for Audio Processing**
-**User's Original Request:**
-{user_request}
-**Processing Type:** {processing_type}
-**Audio Files to Process:** {file_count} files
-{file_list}
-**Transcript Analysis:**
-{transcript_summary}
-**Processing Context:**
-{processing_context}
-**Planning Requirements:**
-1. **Analyze the user's true intent** - what do they actually want to achieve?
-2. **Consider transcript insights** - filler words, quality issues, content characteristics
-3. **Create step-by-step execution plan** - specific tools with exact parameters
-4. **Optimize for quality and efficiency** - best order for operations
-5. **Include appropriate metadata steps** - ensure proper file handling
-6. **Plan for potential issues** - what could go wrong and how to handle it
-**Key Questions to Address:**
-- What's the primary goal of this processing?
-- Which transcript insights should influence the plan?
-- What's the optimal order of operations?
-- What parameters will achieve the best results?
-- How can we ensure high-quality output?
-Create a comprehensive execution plan that intelligently combines the user's requests with the insights from the transcript analysis.
-"""
-# Prompts for script_generator.py
-SCRIPT_GENERATOR_SYSTEM_PROMPT = """You are an expert audio transcription strategist. Your job is to create optimal plans for transcribing audio files based on user needs and available tools.
-**Available Tool Types:**
-- transcribe_audio_sync: Main transcription tool for converting audio to text
-- update_transcription_info: Updates transcription metadata and info
-**Planning Considerations:**
-- Order files by complexity/priority
-- Choose appropriate tools based on file characteristics
-- Consider user's specific goals (filler removal, cutting, quality improvement)
-- Anticipate potential challenges (multiple speakers, background noise, etc.)
-- Plan analysis goals that align with user intent
-**Your planning should be:**
-- Strategic: Consider the best order and approach
-- Practical: Use available tools effectively
-- Goal-oriented: Focus on what the user actually needs
-- Robust: Anticipate and prepare for common issues
-Be intelligent about the user's intent - if they want to remove filler words, prioritize filler detection. If they want to cut audio, focus on timestamp accuracy."""
-SCRIPT_GENERATOR_USER_PROMPT_TEMPLATE = """
-**Audio Files to Process:** {file_count} files
-{file_list}
-**User's Request:** {user_request}
-**Available Tools:** {available_tools}
-Create an optimal transcription plan that:
-1. Determines the best order to process these files
-2. Selects appropriate tools for the task
-3. Defines analysis goals that align with the user's needs
-4. Anticipates potential challenges
-5. Provides clear reasoning for the approach
-Consider the user's intent and optimize for their specific goals.
-"""
-ANALYZE_TRANSCRIPT_PROMPT_TEMPLATE = """
-Analyze this audio transcript and provide structured insights:
-**Audio File:** {audio_file}
-**Transcript:** {transcript_content}
-Please provide analysis in JSON format with these fields:
-- "timestamps": Array of objects with start/end times and text segments (estimate based on content)
-- "filler_words": Array of detected filler words with positions and context
-- "quality_score": Float 0-1 indicating transcript quality
-- "insights": Array of key insights about the content
-- "speaker_analysis": Information about speakers if detectable
-- "content_summary": Brief summary of what the audio contains
-Focus on practical insights that would help with audio processing decisions.
-"""
-ANALYZE_TRANSCRIPTION_RESULTS_SYSTEM_PROMPT = """You are an expert audio transcription analyst. Analyze the transcription results and provide insights about success, quality, and recommendations for next steps."""
-ANALYZE_TRANSCRIPTION_RESULTS_USER_PROMPT_TEMPLATE = """
-**User's Original Request:** {user_request}
-**Analysis Goals:** {analysis_goals}
-**Transcription Results:**
-- Successfully transcribed: {success_count} files
-- Failed transcriptions: {failure_count} files
-- Errors: {errors}
-**Script Details:**
-{script_details}
-Provide analysis of the transcription quality, success rate, and specific recommendations for audio processing based on these results.
-"""
-# Prompts for chat.py
-CHAT_SYSTEM_PROMPT_BASE = """You are an expert Audio Processing Assistant powered by advanced audio tools.
-Your role is to:
-1. Answer questions about audio processing capabilities
-2. Provide guidance on how to use audio tools
-3. Demonstrate tool usage only when explicitly requested
-4. Explain audio concepts and best practices
-5. Help users understand what's possible with audio processing
-You have a conversational, helpful, and knowledgeable personality. You can discuss both technical and practical aspects of audio processing.
-IMPORTANT: Only call tools when the user explicitly asks for a demonstration. For general questions about capabilities, explain the tools without calling them."""
-CHAT_SYSTEM_PROMPT_TOOLS_HEADER = "\n\n**Available Audio Tools:**\n"
-CHAT_SYSTEM_PROMPT_GUIDELINES = """
-**Guidelines:**
-- Provide clear, helpful explanations about audio processing
-- Only demonstrate tools when explicitly asked to do so
-- Explain tool capabilities without necessarily calling them
-- Be encouraging about what's possible with audio processing
-- Keep responses informative but concise
-- Use emojis to make responses engaging (🎵 🔧 📊 ✨)
-**For audio file processing requests:**
-Suggest using the full audio processing workflow by providing audio file URLs and describing the desired outcome.
-"""
-# Prompts for validator.py
-VALIDATOR_SYSTEM_PROMPT = """You are an expert audio processing validator. Your job is to assess the results of audio processing workflows and provide intelligent feedback.
-Your role:
-1. Analyze processing results against the user's original request
-2. Evaluate the quality and completeness of the work done
-3. Identify critical issues, warnings, and successes
-4. Provide actionable recommendations
-5. Determine if reprocessing would be beneficial
-Assessment criteria:
-- Did the processing achieve the user's goals?
-- Are there any critical failures that prevent success?
-- What is the overall quality of the results?
-- Are there minor issues that could be improved?
-- Would reprocessing with different parameters help?
-Consider the context:
-- User's original request and intent
-- What processing steps were planned vs. completed
-- Any errors or issues encountered
-- The quality of transcripts and processing metadata
-- Whether processed files were successfully generated
-Be thorough but practical in your assessment. Focus on actionable insights that would help improve the audio processing results."""
-VALIDATOR_USER_PROMPT_TEMPLATE = """
-## Processing Assessment Request
-**User's Original Request:**
-{user_request}
-**Execution Plan ({plan_steps} steps planned):**
-{execution_plan}
-**Completed Steps ({completed_steps_count}):**
-{completed_steps}
-**Processing Results:**
-{processing_results}
-**Transcript Analysis:**
-{transcript_analysis}
-**Errors Encountered ({error_count}):**
-{errors}
-{reprocessing_note}
-## Assessment Task
-Please analyze this processing workflow and provide a comprehensive validation assessment. Consider:
-1. How well did the processing achieve the user's goals?
-2. What is the overall quality and success rate?
-3. Are there critical issues that prevent success?
-4. What warnings or minor issues should be noted?
-5. What specific recommendations would improve results?
-6. Would reprocessing with adjustments be beneficial?
-Provide honest, actionable feedback that would help improve the audio processing results.
-"""
-# Prompts for final_response.py
-FINAL_RESPONSE_SYSTEM_PROMPT = """You are an expert audio processing communication specialist. Your job is to create engaging, informative, and personalized final responses for users who have completed audio processing workflows.
-**Your Role:**
-- Craft compelling, user-friendly summaries of what was accomplished
-- Highlight key improvements and value delivered
-- Provide clear information about processed files and how to access them
-- Offer personalized recommendations based on the specific processing
-- Suggest appropriate next steps
-- Maintain an encouraging and professional tone
-**Response Principles:**
-1. **User-Centric**: Focus on what the user gained and achieved
-2. **Clear and Actionable**: Provide specific, actionable information
-3. **Celebratory**: Acknowledge accomplishments and improvements
-4. **Helpful**: Offer valuable insights and next steps
-5. **Professional**: Maintain expertise while being approachable
-6. **Specific**: Reference actual results and improvements made
-**Key Elements to Include:**
-- Engaging title that captures what was accomplished
-- Clear summary of processing results
-- Specific improvements and enhancements made
-- Quality assessment and success metrics
-- Download information for processed files
-- Personalized recommendations based on the processing
-- Encouraging next steps or call to action
-**Tone Guidelines:**
-- Professional but friendly
-- Confident in the results achieved
-- Encouraging about next steps
-- Specific about technical improvements
-- Celebratory of success, honest about limitations
-Be specific about the actual processing done and results achieved. Reference real file names, improvements made, and quality metrics when available."""
-FINAL_RESPONSE_USER_PROMPT_TEMPLATE = """
-**Create Final Response for Audio Processing Workflow**
-**User's Original Request:**
-{user_request}
-**Processing Type:** {processing_type}
-**Processing Results:**
-- Successfully processed: {processed_files_count} files
-- Completed steps: {completed_steps_count}
-- Errors encountered: {error_count}
-**Processed Files:**
-{processed_files}
-**Processing Context Analysis:**
-{processing_summary}
-**Execution Plan Summary:**
-{plan_steps} steps planned
-Key tools used: {tools_used}
-**Completed Steps (last 5):**
-{completed_steps}
-**Errors (if any):**
-{errors}
-**Processing Metadata:**
-{processing_metadata}
-**Task:**
-Create a comprehensive, engaging final response that:
-1. Celebrates what was accomplished
-2. Clearly explains the results and improvements
-3. Provides specific download information for processed files
-4. Offers personalized recommendations based on this specific processing
-5. Suggests appropriate next steps
-6. Maintains an encouraging and professional tone
-Focus on the value delivered to the user and make it clear how to access and use their processed audio files.
-"""
-# Prompts for router.py
-ROUTER_SYSTEM_PROMPT = """You are an intelligent routing agent for an audio processing system. Your job is to analyze user requests and determine the best processing path.
-**Processing Types Available:**
-1. **chat** - For general questions, help requests, or when no audio processing is needed
-   - User asking about capabilities, features, or how to use the system
-   - General conversation or questions
-   - No audio files present, or user just wants information
-2. **audio_processing** - For single or multiple audio file processing tasks
-   - Removing filler words, cutting audio, improving quality
-   - Normalizing volume, adjusting speed, adding effects
-   - Transcription and analysis tasks
-   - Any audio enhancement or modification
-3. **dialogue_generation** - For combining multiple audio files into conversations
-   - Creating interviews, podcasts, or conversations from separate files
-   - Merging voices or speakers into dialogue format
-   - Building composite audio experiences
-**Audio File Detection:**
-Extract any audio file URLs or paths from the user's message. Look for:
-- HTTP/HTTPS URLs ending in .mp3, .wav, .m4a, .flac, .aac, .ogg
-- Local file paths with audio extensions
-- References to audio files even if not explicitly formatted as URLs
-**Priority Assessment:**
-- **high**: Urgent processing needs, multiple complex steps, time-sensitive
-- **medium**: Standard processing requests, moderate complexity
-- **low**: Simple questions, basic single-step tasks
-**Your Analysis Should:**
-- Understand the user's true intent behind their request
-- Identify all audio files mentioned or linked
-- Choose the most appropriate processing type
-- Assess the complexity and urgency
-- Provide clear reasoning for your decision
-Be intelligent about context - a user saying "help me clean up this audio" with a file link clearly needs audio_processing, not chat."""
-ROUTER_USER_PROMPT_TEMPLATE = """
-Please analyze this user request and determine the appropriate routing:
-**User Request:**
-{user_content}
-**Analysis Task:**
-1. What is the user's primary intent?
-2. Are there any audio files mentioned or linked?
-3. What type of processing would best serve their needs?
-4. How complex/urgent is this request?
-5. What's the reasoning for your routing decision?
-Provide a structured analysis with your routing decision.
-"""
-# Prompts for audio_processor.py
-LLM_PROCESSING_DECISION_PROMPT_TEMPLATE = """
-You are an intelligent audio processing engine. Decide whether to execute this processing step:
-**Step to Consider:**
-- Tool: {tool_name}
-- Description: {description}
-- Planned Parameters: {planned_parameters}
-**Context:**
-- User Request: {user_request}
-- Completed Steps: {completed_steps_count}
-- Errors So Far: {error_count}
-- Available Tools: {available_tools}
-- Current File URLs: {current_file_urls}
-**Recent Activity:**
-{recent_activity}
-Should this step be executed? Respond with:
-1. "EXECUTE" or "SKIP"
-2. If EXECUTE, provide any parameter modifications in JSON format (or "NO_CHANGES")
-Example response:
-EXECUTE
-{{"audio_file": "updated_url.mp3", "target_level": -6}}
-Or:
-SKIP - This step is redundant given previous processing
-"""
-LLM_PROCESSING_SUMMARY_PROMPT_TEMPLATE = """
-Create a concise processing summary for the user:
-**User's Request:** {user_request}
-**Results:**
-- Completed Steps: {completed_steps_count}
-- Errors: {error_count}
-- Files Processed: {processed_files_count}
-**Step Details:**
-{step_details}
-**Processed Files:**
-{processed_files}
-**Errors:**
-{errors}
-Create a brief, encouraging summary focusing on what was accomplished and next steps.
-"""

src/nodes/router.py DELETED Viewed

@@ -1,167 +0,0 @@
-"""
-Intelligent LLM-powered router node for determining processing type and extracting context.
-"""
-import re
-from typing import Dict, Any, List
-from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
-from langchain_openai import ChatOpenAI
-from langchain_core.output_parsers import PydanticOutputParser
-from pydantic import BaseModel, Field
-from .prompts import ROUTER_SYSTEM_PROMPT, ROUTER_USER_PROMPT_TEMPLATE
-class RouterDecision(BaseModel):
-    """Structured output for router decisions."""
-    processing_type: str = Field(description="Type of processing needed: 'chat', 'audio_processing', or 'dialogue_generation'")
-    user_request: str = Field(description="Clean, parsed version of the user's request")
-    audio_files: List[str] = Field(description="List of audio file URLs/paths found in the message")
-    reasoning: str = Field(description="Brief explanation of why this processing type was chosen")
-    priority_level: str = Field(description="Priority level: 'low', 'medium', or 'high'")
-async def router_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Intelligently route the conversation using LLM analysis.
-    The LLM analyzes user input to determine:
-    - Processing type needed
-    - Audio files to extract
-    - User intent and priority
-    """
-    # Get the latest user message
-    latest_message = None
-    for msg in reversed(state.get("messages", [])):
-        if isinstance(msg, HumanMessage):
-            latest_message = msg
-            break
-    if not latest_message:
-        return create_default_routing()
-    # Use LLM to make routing decision
-    router_decision = await analyze_user_request_with_llm(latest_message.content)
-    # Create status message
-    status_message = f"🎯 **Routing Analysis**: {router_decision.reasoning}\n**Processing Type**: {router_decision.processing_type}\n**Priority**: {router_decision.priority_level}"
-    messages = state.get("messages", [])
-    messages.append(AIMessage(content=status_message))
-    return {
-        "processing_type": router_decision.processing_type,
-        "user_request": router_decision.user_request,
-        "audio_files": router_decision.audio_files,
-        "messages": messages,
-        "errors": [],
-        "needs_reprocessing": False,
-        "completed_steps": [],
-        "scripts": {},
-        "processed_files": {},
-        "processing_metadata": {
-            "router_reasoning": router_decision.reasoning,
-            "priority_level": router_decision.priority_level
-        }
-    }
-async def analyze_user_request_with_llm(user_content: str) -> RouterDecision:
-    """Use LLM to intelligently analyze user request and make routing decisions."""
-    system_message = create_router_system_message()
-    user_message = create_router_user_message(user_content)
-    # Set up LLM with structured output
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
-    parser = PydanticOutputParser(pydantic_object=RouterDecision)
-    prompt_messages = [
-        system_message,
-        HumanMessage(content=user_message),
-        HumanMessage(content=parser.get_format_instructions())
-    ]
-    try:
-        response = await llm.ainvoke(prompt_messages)
-        router_decision = parser.parse(response.content)
-        return router_decision
-    except Exception as e:
-        # Fallback to simple analysis
-        return create_fallback_routing(user_content)
-def create_router_system_message() -> SystemMessage:
-    """Create system message for LLM routing analysis."""
-    return SystemMessage(content=ROUTER_SYSTEM_PROMPT)
-def create_router_user_message(user_content: str) -> str:
-    """Create user message for routing analysis."""
-    return ROUTER_USER_PROMPT_TEMPLATE.format(user_content=user_content)
-def create_fallback_routing(user_content: str) -> RouterDecision:
-    """Create fallback routing if LLM analysis fails."""
-    content_lower = user_content.lower()
-    # Simple pattern matching for fallback
-    audio_patterns = ['.mp3', '.wav', '.m4a', '.flac', '.aac', '.ogg', 'http']
-    dialogue_keywords = ['dialogue', 'conversation', 'combine', 'merge', 'interview']
-    has_audio = any(pattern in user_content for pattern in audio_patterns)
-    is_dialogue = any(keyword in content_lower for keyword in dialogue_keywords)
-    if has_audio and is_dialogue:
-        processing_type = "dialogue_generation"
-    elif has_audio:
-        processing_type = "audio_processing"
-    else:
-        processing_type = "chat"
-    # Extract audio files with simple regex
-    audio_files = extract_audio_files_simple(user_content)
-    return RouterDecision(
-        processing_type=processing_type,
-        user_request=user_content,
-        audio_files=audio_files,
-        reasoning=f"Fallback analysis: detected {processing_type} based on content patterns",
-        priority_level="medium"
-    )
-def extract_audio_files_simple(content: str) -> List[str]:
-    """Simple regex-based audio file extraction for fallback."""
-    # Look for URLs (http/https)
-    url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+\.(mp3|wav|m4a|flac|aac|ogg)'
-    urls = re.findall(url_pattern, content, re.IGNORECASE)
-    # Look for file paths
-    path_pattern = r'[^\s<>"{}|\\^`\[\]]+\.(mp3|wav|m4a|flac|aac|ogg)'
-    paths = re.findall(path_pattern, content, re.IGNORECASE)
-    # Combine and deduplicate
-    audio_files = list(set(urls + [path for path in paths if not path.startswith('http')]))
-    return audio_files
-def create_default_routing() -> Dict[str, Any]:
-    """Create default routing when no user message found."""
-    return {
-        "processing_type": "chat",
-        "user_request": "",
-        "audio_files": [],
-        "errors": [],
-        "needs_reprocessing": False,
-        "completed_steps": [],
-        "scripts": {},
-        "processed_files": {},
-        "processing_metadata": {}
-    }

src/nodes/script_generator.py DELETED Viewed

@@ -1,412 +0,0 @@
-"""
-Intelligent LLM-powered script generator for audio transcription and analysis.
-"""
-from typing import Dict, Any, List
-from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
-from langchain_openai import ChatOpenAI
-from langchain_core.output_parsers import PydanticOutputParser
-from pydantic import BaseModel, Field
-import json
-from .prompts import (
-    SCRIPT_GENERATOR_SYSTEM_PROMPT,
-    SCRIPT_GENERATOR_USER_PROMPT_TEMPLATE,
-    ANALYZE_TRANSCRIPT_PROMPT_TEMPLATE,
-    ANALYZE_TRANSCRIPTION_RESULTS_SYSTEM_PROMPT,
-    ANALYZE_TRANSCRIPTION_RESULTS_USER_PROMPT_TEMPLATE,
-)
-class TranscriptionPlan(BaseModel):
-    """Plan for transcribing audio files."""
-    tools_to_use: List[str] = Field(description="List of tool names to use for transcription")
-    processing_order: List[str] = Field(description="Order to process audio files")
-    analysis_goals: List[str] = Field(description="What to analyze in the transcripts")
-    expected_challenges: List[str] = Field(description="Potential issues to watch for")
-    reasoning: str = Field(description="Reasoning for this transcription approach")
-class TranscriptionResults(BaseModel):
-    """Results of transcription analysis."""
-    success_files: List[str] = Field(description="Successfully transcribed files")
-    failed_files: List[str] = Field(description="Files that failed to transcribe")
-    insights: List[str] = Field(description="Key insights from the transcriptions")
-    quality_assessment: str = Field(description="Assessment of transcription quality")
-    recommendations: List[str] = Field(description="Recommendations for next steps")
-async def script_generator_node(state: Dict[str, Any], tools: list) -> Dict[str, Any]:
-    """
-    Intelligently generate transcripts using LLM-guided tool usage.
-    The LLM analyzes the audio files and user request to:
-    - Decide which transcription tools to use
-    - Determine the best processing approach
-    - Execute transcription with intelligent error handling
-    - Analyze results and extract insights
-    """
-    audio_files = state.get("audio_files", [])
-    user_request = state.get("user_request", "")
-    if not audio_files:
-        return create_no_files_response(state)
-    # Get available transcription tools
-    available_tools = get_transcription_tools(tools)
-    if not available_tools:
-        return create_no_tools_response(state)
-    # LLM creates transcription plan
-    transcription_plan = await create_transcription_plan_with_llm(
-        audio_files, user_request, available_tools
-    )
-    # Execute transcription based on LLM plan
-    scripts = {}
-    errors = []
-    completed_steps = state.get("completed_steps", [])
-    for audio_file in transcription_plan.processing_order:
-        if audio_file in audio_files:  # Ensure file is in our list
-            script_result = await execute_transcription_with_llm(
-                audio_file, transcription_plan.tools_to_use, tools
-            )
-            if script_result["success"]:
-                scripts[audio_file] = script_result["data"]
-                completed_steps.append(f"✅ Transcribed: {audio_file.split('/')[-1]}")
-            else:
-                errors.append(script_result["error"])
-                completed_steps.append(f"❌ Failed: {audio_file.split('/')[-1]}")
-    # LLM analyzes results and provides insights
-    analysis_results = await analyze_transcription_results_with_llm(
-        scripts, errors, user_request, transcription_plan.analysis_goals
-    )
-    # Create comprehensive response
-    response_message = create_transcription_response(
-        scripts, errors, transcription_plan, analysis_results
-    )
-    messages = state.get("messages", [])
-    messages.append(AIMessage(content=response_message))
-    return {
-        "scripts": scripts,
-        "completed_steps": completed_steps,
-        "errors": errors,
-        "messages": messages,
-        "processing_metadata": {
-            "transcription_plan": transcription_plan.dict(),
-            "analysis_results": analysis_results.dict()
-        }
-    }
-async def create_transcription_plan_with_llm(
-    audio_files: List[str],
-    user_request: str,
-    available_tools: List[str]
-) -> TranscriptionPlan:
-    """Use LLM to create intelligent transcription plan."""
-    system_message = create_transcription_planning_system_message()
-    user_message = create_transcription_planning_user_message(audio_files, user_request, available_tools)
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
-    parser = PydanticOutputParser(pydantic_object=TranscriptionPlan)
-    prompt_messages = [
-        system_message,
-        HumanMessage(content=user_message),
-        HumanMessage(content=parser.get_format_instructions())
-    ]
-    try:
-        response = await llm.ainvoke(prompt_messages)
-        plan = parser.parse(response.content)
-        return plan
-    except Exception as e:
-        # Fallback plan
-        return TranscriptionPlan(
-            tools_to_use=available_tools[:2],  # Use first 2 available tools
-            processing_order=audio_files,
-            analysis_goals=["Basic transcription", "Filler word detection"],
-            expected_challenges=["Audio quality issues", "Multiple speakers"],
-            reasoning="Fallback plan due to LLM planning failure"
-        )
-async def execute_transcription_with_llm(
-    audio_file: str,
-    tools_to_use: List[str],
-    available_tools: list
-) -> Dict[str, Any]:
-    """Execute transcription for a single file using planned tools."""
-    # Find the actual tool objects
-    tool_objects = {}
-    for tool in available_tools:
-        if tool.name in tools_to_use:
-            tool_objects[tool.name] = tool
-    transcript_data = {
-        "transcript": "",
-        "timestamps": [],
-        "filler_words": [],
-        "quality_score": 0.0
-    }
-    try:
-        # Use update_transcription_info first if available
-        if "update_transcription_info" in tool_objects:
-            await tool_objects["update_transcription_info"].ainvoke({"audio_file": audio_file})
-        # Use transcribe_audio_sync for main transcription
-        if "transcribe_audio_sync" in tool_objects:
-            transcript_result = await tool_objects["transcribe_audio_sync"].ainvoke({"audio_file": audio_file})
-            # Process the transcript result
-            if hasattr(transcript_result, 'content'):
-                transcript_content = transcript_result.content
-            else:
-                transcript_content = str(transcript_result)
-            # Use LLM to analyze the transcript
-            analysis = await analyze_transcript_with_llm(transcript_content, audio_file)
-            transcript_data.update({
-                "transcript": transcript_content,
-                "timestamps": analysis.get("timestamps", []),
-                "filler_words": analysis.get("filler_words", []),
-                "quality_score": analysis.get("quality_score", 0.5),
-                "insights": analysis.get("insights", [])
-            })
-            return {"success": True, "data": transcript_data}
-        else:
-            return {"success": False, "error": f"No suitable transcription tool found for {audio_file}"}
-    except Exception as e:
-        return {"success": False, "error": f"Transcription failed for {audio_file}: {str(e)}"}
-async def analyze_transcript_with_llm(transcript_content: str, audio_file: str) -> Dict[str, Any]:
-    """Use LLM to analyze transcript content and extract insights."""
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
-    analysis_prompt = ANALYZE_TRANSCRIPT_PROMPT_TEMPLATE.format(
-        audio_file=audio_file,
-        transcript_content=transcript_content
-    )
-    try:
-        response = await llm.ainvoke([SystemMessage(content=analysis_prompt)])
-        # Try to parse as JSON
-        analysis_data = json.loads(response.content)
-        return analysis_data
-    except Exception as e:
-        # Fallback to simple analysis
-        return {
-            "timestamps": create_simple_timestamps(transcript_content),
-            "filler_words": detect_simple_filler_words(transcript_content),
-            "quality_score": 0.7,
-            "insights": ["Basic transcript generated"],
-            "speaker_analysis": "Unable to analyze speakers",
-            "content_summary": transcript_content[:100] + "..." if len(transcript_content) > 100 else transcript_content
-        }
-async def analyze_transcription_results_with_llm(
-    scripts: Dict[str, Any],
-    errors: List[str],
-    user_request: str,
-    analysis_goals: List[str]
-) -> TranscriptionResults:
-    """Use LLM to analyze overall transcription results."""
-    system_message = SystemMessage(content=ANALYZE_TRANSCRIPTION_RESULTS_SYSTEM_PROMPT)
-    script_details = json.dumps({
-        k: {
-            "length": len(v.get("transcript", "")),
-            "filler_count": len(v.get("filler_words", [])),
-            "quality": v.get("quality_score", 0)
-        } for k, v in scripts.items()
-    }, indent=2)
-    user_message_content = ANALYZE_TRANSCRIPTION_RESULTS_USER_PROMPT_TEMPLATE.format(
-        user_request=user_request,
-        analysis_goals=", ".join(analysis_goals),
-        success_count=len(scripts),
-        failure_count=len(errors),
-        errors=errors,
-        script_details=script_details
-    )
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
-    parser = PydanticOutputParser(pydantic_object=TranscriptionResults)
-    prompt_messages = [
-        system_message,
-        HumanMessage(content=user_message_content),
-        HumanMessage(content=parser.get_format_instructions())
-    ]
-    try:
-        response = await llm.ainvoke(prompt_messages)
-        results = parser.parse(response.content)
-        return results
-    except Exception as e:
-        # Fallback analysis
-        return TranscriptionResults(
-            success_files=list(scripts.keys()),
-            failed_files=[f"Error occurred: {str(e)}"],
-            insights=["Basic transcription completed"],
-            quality_assessment="Unable to assess quality automatically",
-            recommendations=["Proceed with standard audio processing"]
-        )
-def get_transcription_tools(tools: list) -> List[str]:
-    """Extract transcription tool names from available tools."""
-    transcription_tool_names = []
-    for tool in tools:
-        if any(keyword in tool.name.lower() for keyword in ['transcribe', 'transcript']):
-            transcription_tool_names.append(tool.name)
-    return transcription_tool_names
-def create_transcription_planning_system_message() -> SystemMessage:
-    """Create system message for transcription planning."""
-    return SystemMessage(content=SCRIPT_GENERATOR_SYSTEM_PROMPT)
-def create_transcription_planning_user_message(audio_files: List[str], user_request: str, available_tools: List[str]) -> str:
-    """Create user message for transcription planning."""
-    file_list = "\n".join([f"- {file}" for file in audio_files])
-    return SCRIPT_GENERATOR_USER_PROMPT_TEMPLATE.format(
-        file_count=len(audio_files),
-        file_list=file_list,
-        user_request=user_request,
-        available_tools=", ".join(available_tools)
-    )
-def create_simple_timestamps(transcript: str) -> List[Dict[str, Any]]:
-    """Create simple timestamp estimates for fallback."""
-    timestamps = []
-    lines = [line.strip() for line in transcript.split('\n') if line.strip()]
-    for i, line in enumerate(lines):
-        start_time = i * 3.0
-        end_time = start_time + 3.0
-        timestamps.append({
-            "start": start_time,
-            "end": end_time,
-            "text": line
-        })
-    return timestamps
-def detect_simple_filler_words(transcript: str) -> List[Dict[str, Any]]:
-    """Simple filler word detection for fallback."""
-    filler_words = ["um", "uh", "like", "you know", "so", "well", "actually"]
-    found_fillers = []
-    words = transcript.lower().split()
-    for i, word in enumerate(words):
-        clean_word = word.strip('.,!?;:"()[]{}')
-        if clean_word in filler_words:
-            found_fillers.append({
-                "word": clean_word,
-                "position": i,
-                "context": " ".join(words[max(0, i-2):min(len(words), i+3)])
-            })
-    return found_fillers
-def create_transcription_response(
-    scripts: Dict[str, Any],
-    errors: List[str],
-    plan: TranscriptionPlan,
-    analysis: TranscriptionResults
-) -> str:
-    """Create comprehensive transcription response message."""
-    if not scripts and errors:
-        return f"❌ **Transcription Failed**\n\n{chr(10).join(errors)}"
-    response = "🎙️ **Intelligent Transcription Complete**\n\n"
-    # Plan summary
-    response += f"**📋 Strategy Used:** {plan.reasoning}\n\n"
-    # Results summary
-    response += f"**📊 Results:**\n"
-    response += f"- ✅ Successfully transcribed: {len(scripts)} files\n"
-    response += f"- ❌ Failed: {len(errors)} files\n\n"
-    # File details
-    if scripts:
-        response += "**📝 Transcript Details:**\n"
-        for file_url, script_data in scripts.items():
-            filename = file_url.split('/')[-1] if '/' in file_url else file_url
-            transcript_len = len(script_data.get("transcript", ""))
-            filler_count = len(script_data.get("filler_words", []))
-            quality = script_data.get("quality_score", 0)
-            response += f"- **{filename}**: {transcript_len} chars, {filler_count} fillers, {quality:.1%} quality\n"
-        response += "\n"
-    # AI insights
-    if analysis.insights:
-        response += "**🤖 AI Insights:**\n"
-        for insight in analysis.insights[:3]:
-            response += f"- {insight}\n"
-        response += "\n"
-    # Next steps
-    if analysis.recommendations:
-        response += "**🎯 Recommendations:**\n"
-        for rec in analysis.recommendations[:2]:
-            response += f"- {rec}\n"
-        response += "\n"
-    response += "✅ **Ready for execution planning...**"
-    return response
-def create_no_files_response(state: Dict[str, Any]) -> Dict[str, Any]:
-    """Handle case when no audio files are provided."""
-    messages = state.get("messages", [])
-    messages.append(AIMessage(content="❌ **No Audio Files**: Please provide audio files to transcribe."))
-    return {
-        "scripts": {},
-        "errors": ["No audio files provided for transcription"],
-        "messages": messages
-    }
-def create_no_tools_response(state: Dict[str, Any]) -> Dict[str, Any]:
-    """Handle case when no transcription tools are available."""
-    messages = state.get("messages", [])
-    messages.append(AIMessage(content="❌ **Transcription Tools Unavailable**: Cannot proceed without transcription capabilities."))
-    return {
-        "scripts": {},
-        "errors": ["No transcription tools available"],
-        "messages": messages
-    }

src/nodes/validator.py CHANGED Viewed

@@ -1,272 +1,30 @@
-"""
-Validator node for intelligent LLM-powered validation of processing results.
-"""
-from typing import Dict, Any, List
-from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
 from langchain_openai import ChatOpenAI
-from langchain_core.output_parsers import PydanticOutputParser
-from pydantic import BaseModel, Field
-import datetime
-from .prompts import VALIDATOR_SYSTEM_PROMPT, VALIDATOR_USER_PROMPT_TEMPLATE
-class ValidationResults(BaseModel):
-    """Structured validation results from LLM assessment."""
-    overall_status: str = Field(description="Overall status: 'success', 'partial_success', or 'failed'")
-    completion_rate: float = Field(description="Estimated completion rate as a decimal (0.0 to 1.0)")
-    quality_score: float = Field(description="Quality assessment score (0.0 to 1.0)")
-    critical_errors: List[str] = Field(description="List of critical errors that need addressing")
-    warnings: List[str] = Field(description="List of warnings or minor issues")
-    recommendations: List[str] = Field(description="Specific recommendations for improvement")
-    needs_reprocessing: bool = Field(description="Whether reprocessing is recommended")
-    success_indicators: List[str] = Field(description="What went well in the processing")
-async def validator_node(state: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Intelligently validate processing results using LLM assessment.
-    """
-    processed_files = state.get("processed_files", {})
-    errors = state.get("errors", [])
-    completed_steps = state.get("completed_steps", [])
-    execution_plan = state.get("execution_plan", [])
-    user_request = state.get("user_request", "")
-    scripts = state.get("scripts", {})
-    current_needs_reprocessing = state.get("needs_reprocessing", False)
-    # Use LLM to perform intelligent validation
-    validation_results = await perform_llm_validation(
-        user_request=user_request,
-        processed_files=processed_files,
-        errors=errors,
-        completed_steps=completed_steps,
-        execution_plan=execution_plan,
-        scripts=scripts,
-        current_needs_reprocessing=current_needs_reprocessing
-    )
-    # Create validation summary
-    validation_summary = create_validation_summary(validation_results)
-    messages = state.get("messages", [])
-    messages.append(AIMessage(content=validation_summary))
-    return {
-        "needs_reprocessing": validation_results.needs_reprocessing,
-        "processing_metadata": {
-            "validation_results": validation_results.model_dump(),
-            "validation_timestamp": get_current_timestamp()
-        },
-        "messages": messages
-    }
-async def perform_llm_validation(
-    user_request: str,
-    processed_files: Dict[str, str],
-    errors: List[str],
-    completed_steps: List[str],
-    execution_plan: List[Dict[str, Any]],
-    scripts: Dict[str, Any],
-    current_needs_reprocessing: bool
-) -> ValidationResults:
-    """Use LLM to intelligently validate processing results."""
-    # Create system message for validation
-    system_message = create_validation_system_message()
-    # Create user message with processing context
-    user_message = create_validation_context_message(
-        user_request, processed_files, errors, completed_steps,
-        execution_plan, scripts, current_needs_reprocessing
-    )
-    # Set up LLM with structured output
-    llm = ChatOpenAI(model="gpt-4o", temperature=0.1)
-    parser = PydanticOutputParser(pydantic_object=ValidationResults)
-    prompt_messages = [
-        system_message,
-        HumanMessage(content=user_message),
-        HumanMessage(content=parser.get_format_instructions())
-    ]
-    # Get LLM assessment
-    try:
-        response = await llm.ainvoke(prompt_messages)
-        validation_results = parser.parse(response.content)
-        return validation_results
-    except Exception as e:
-        # Fallback validation if LLM fails
-        return create_fallback_validation(processed_files, errors, completed_steps, execution_plan)
-def create_validation_system_message() -> SystemMessage:
-    """Create system message for LLM validation."""
-    return SystemMessage(content=VALIDATOR_SYSTEM_PROMPT)
-def create_validation_context_message(
-    user_request: str,
-    processed_files: Dict[str, str],
-    errors: List[str],
-    completed_steps: List[str],
-    execution_plan: List[Dict[str, Any]],
-    scripts: Dict[str, Any],
-    current_needs_reprocessing: bool
-) -> str:
-    """Create context message with all processing information."""
-    plan_str = "\n".join([
-        f"{i}. {step.get('tool', 'unknown')}: {step.get('description', 'No description')}"
-        for i, step in enumerate(execution_plan, 1)
     ])
-    completed_steps_str = "\n".join([f"- {step}" for step in completed_steps])
-    if processed_files:
-        processing_results_str = f"Successfully processed {len(processed_files)} file(s)\n"
-        for original, processed in processed_files.items():
-            filename = original.split('/')[-1] if '/' in original else original
-            processed_name = processed.split('/')[-1] if '/' in processed else processed
-            processing_results_str += f"  • {filename} → {processed_name}\n"
-    else:
-        processing_results_str = "No files were successfully processed\n"
-    if scripts:
-        transcript_analysis_str = ""
-        for file_url, script_data in scripts.items():
-            filename = file_url.split('/')[-1] if '/' in file_url else file_url
-            transcript = script_data.get("transcript", "")
-            filler_count = len(script_data.get("filler_words", []))
-            transcript_analysis_str += f"- {filename}: {len(transcript)} chars, {filler_count} filler words detected\n"
-    else:
-        transcript_analysis_str = "No transcript data available.\n"
-    errors_str = "\n".join([f"- {error}" for error in errors]) if errors else "None"
-    reprocessing_note_str = "\n**Note:** This is already a reprocessing attempt.\n" if current_needs_reprocessing else ""
-    return VALIDATOR_USER_PROMPT_TEMPLATE.format(
-        user_request=user_request,
-        plan_steps=len(execution_plan),
-        execution_plan=plan_str,
-        completed_steps_count=len(completed_steps),
-        completed_steps=completed_steps_str,
-        processing_results=processing_results_str,
-        transcript_analysis=transcript_analysis_str,
-        error_count=len(errors),
-        errors=errors_str,
-        reprocessing_note=reprocessing_note_str
-    )
-def create_fallback_validation(
-    processed_files: Dict[str, str],
-    errors: List[str],
-    completed_steps: List[str],
-    execution_plan: List[Dict[str, Any]]
-) -> ValidationResults:
-    """Create fallback validation if LLM assessment fails."""
-    total_steps = len(execution_plan) if execution_plan else 1
-    successful_steps = len([step for step in completed_steps if step.startswith("✅")])
-    completion_rate = successful_steps / total_steps if total_steps > 0 else 0
-    has_processed_files = len(processed_files) > 0
-    has_critical_errors = any("failed" in error.lower() or "error" in error.lower() for error in errors)
-    if has_processed_files and completion_rate >= 0.7:
-        overall_status = "success"
-        quality_score = 0.8
-    elif has_processed_files and completion_rate >= 0.3:
-        overall_status = "partial_success"
-        quality_score = 0.5
-    else:
-        overall_status = "failed"
-        quality_score = 0.2
-    return ValidationResults(
-        overall_status=overall_status,
-        completion_rate=completion_rate,
-        quality_score=quality_score,
-        critical_errors=errors if has_critical_errors else [],
-        warnings=errors if not has_critical_errors else [],
-        recommendations=[
-            "Check processing logs for detailed error information",
-            "Verify audio file formats and accessibility",
-            "Consider simplifying the processing request"
-        ],
-        needs_reprocessing=has_critical_errors and completion_rate > 0.1 and completion_rate < 0.8,
-        success_indicators=["Some processing steps completed"] if completed_steps else []
     )
-def create_validation_summary(validation_results: ValidationResults) -> str:
-    """Create a human-readable summary of validation results."""
-    # Status emoji mapping
-    status_emoji = {
-        "success": "✅",
-        "partial_success": "⚠️",
-        "failed": "❌"
-    }
-    emoji = status_emoji.get(validation_results.overall_status, "❓")
-    summary = f"{emoji} **Intelligent Validation Results**\n\n"
-    # Overall assessment
-    summary += f"**Overall Status**: {validation_results.overall_status.replace('_', ' ').title()}\n"
-    summary += f"**Completion Rate**: {validation_results.completion_rate:.1%}\n"
-    summary += f"**Quality Score**: {validation_results.quality_score:.1%}\n\n"
-    # Success indicators
-    if validation_results.success_indicators:
-        summary += "**✨ What Went Well:**\n"
-        for indicator in validation_results.success_indicators:
-            summary += f"- {indicator}\n"
-        summary += "\n"
-    # Critical errors
-    if validation_results.critical_errors:
-        summary += f"**🚨 Critical Issues ({len(validation_results.critical_errors)}):**\n"
-        for error in validation_results.critical_errors[:3]:
-            summary += f"- {error}\n"
-        if len(validation_results.critical_errors) > 3:
-            summary += f"- ... and {len(validation_results.critical_errors) - 3} more\n"
-        summary += "\n"
-    # Warnings
-    if validation_results.warnings:
-        summary += f"**⚠️ Warnings ({len(validation_results.warnings)}):**\n"
-        for warning in validation_results.warnings[:2]:
-            summary += f"- {warning}\n"
-        if len(validation_results.warnings) > 2:
-            summary += f"- ... and {len(validation_results.warnings) - 2} more\n"
-        summary += "\n"
-    # Recommendations
-    if validation_results.recommendations:
-        summary += "**🎯 Recommendations:**\n"
-        for rec in validation_results.recommendations[:4]:
-            summary += f"- {rec}\n"
-        if len(validation_results.recommendations) > 4:
-            summary += f"- ... and {len(validation_results.recommendations) - 4} more\n"
-        summary += "\n"
-    # Reprocessing decision
-    if validation_results.needs_reprocessing:
-        summary += "🔄 **Reprocessing Recommended**: The LLM assessment suggests reprocessing could improve results."
     else:
-        if validation_results.overall_status == "success":
-            summary += "🎉 **Processing Complete**: High-quality results achieved!"
-        else:
-            summary += "⏹️ **Processing Complete**: Reprocessing not recommended based on current assessment."
-    return summary
-def get_current_timestamp() -> str:
-    """Get current timestamp for metadata."""
-    return datetime.datetime.now().isoformat()

 from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from src.state import AgentState
+from operator import itemgetter
+from langchain_core.runnables import RunnableParallel
+def validator_node(state: AgentState) -> AgentState:
+    llm = ChatOpenAI(model="gpt-4.1")
+    llm = llm.with_structured_output(AgentState)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are validator that checks the steps taken and output if something is wrong. Give feedback to flow."),
+        ("user", "Current state: {state}")
     ])
+    chain = (
+        RunnableParallel({
+            "state": itemgetter("state")
+        })
+        | prompt
+        | llm
     )
+    return chain.invoke({"state": state})
+def validator_node_router(state: AgentState) -> str:
+    if state.validator_feedback == "":
+        return "chat"
     else:
+        return "planner"

src/state.py CHANGED Viewed

@@ -1,46 +1,11 @@
-"""
-Graph state definition for the audio processing agent.
-"""
-from typing import List, Dict, Any, Optional, Annotated
-from langchain_core.messages import BaseMessage
-from langgraph.graph.message import add_messages
-class AudioProcessingState:
-    """State schema for the audio processing graph."""
-    # Chat history
-    messages: Annotated[List[BaseMessage], add_messages]
-    # Audio files provided by user
-    audio_files: List[str]  # URLs or paths to audio files
-    # User's processing request
-    user_request: str
-    # Processing type determined by router
-    processing_type: str  # "chat", "audio_processing", "dialogue_generation"
-    # Generated scripts with timestamps
-    scripts: Dict[str, Any]  # {file_url: {transcript: str, timestamps: List}}
-    # Execution plan created by planner
-    execution_plan: List[Dict[str, Any]]  # List of tool calls with parameters
-    # Processing results
-    processed_files: Dict[str, str]  # {original_url: processed_url}
-    # Processing steps completed
-    completed_steps: List[str]
-    # Final output
-    final_audio_url: Optional[str]
-    final_response: str
-    # Error handling
-    errors: List[str]
-    needs_reprocessing: bool
-    # Metadata
-    processing_metadata: Dict[str, Any]

+from pydantic import BaseModel, Field
+class AgentState(BaseModel):
+    steps_details: list[str] = Field(description="The steps that have been completed.", default=[])
+    user_input: str = Field(description="The user's input.", default="")
+    plan: str = Field(description="The plan for the user.", default="")
+    final_response: str = Field(description="The final response to the user.", default="")
+    requires_processing: bool = Field(description="Whether the response requires detailed audio processing.", default=False)
+    validator_feedback: str = Field(description="The feedback from the validator. Indicates steps must be taken again.", default="")
+    input_audio_files: list[str] = Field(description="The input audio files.", default=[])
+    output_audio_files: list[str] = Field(description="The output audio files.", default=[])