Kheem Dharmani commited on
Commit
32f259e
·
0 Parent(s):

Initial commit: Agentic Multiwriter

Browse files
Files changed (41) hide show
  1. .env.example +23 -0
  2. .gitignore +51 -0
  3. Dockerfile +15 -0
  4. README.md +93 -0
  5. app.py +176 -0
  6. docker-compose.yml +0 -0
  7. examples/example_runs.md +31 -0
  8. examples/sample_config_hybrid.yaml +5 -0
  9. examples/sample_config_local.yaml +5 -0
  10. pyproject.toml +24 -0
  11. requirements.txt +12 -0
  12. scripts/run_cli.sh +6 -0
  13. scripts/run_local.sh +6 -0
  14. src/agentic_multiwriter/__init__.py +10 -0
  15. src/agentic_multiwriter/agents/__init__.py +13 -0
  16. src/agentic_multiwriter/agents/aggregator.py +44 -0
  17. src/agentic_multiwriter/agents/critic.py +44 -0
  18. src/agentic_multiwriter/agents/formatter.py +48 -0
  19. src/agentic_multiwriter/agents/researcher.py +35 -0
  20. src/agentic_multiwriter/agents/writer.py +38 -0
  21. src/agentic_multiwriter/api/__init__.py +3 -0
  22. src/agentic_multiwriter/api/server.py +44 -0
  23. src/agentic_multiwriter/cli/__init__.py +3 -0
  24. src/agentic_multiwriter/cli/main.py +51 -0
  25. src/agentic_multiwriter/config.py +37 -0
  26. src/agentic_multiwriter/graph/__init__.py +3 -0
  27. src/agentic_multiwriter/graph/router.py +27 -0
  28. src/agentic_multiwriter/graph/workflow.py +36 -0
  29. src/agentic_multiwriter/models/__init__.py +13 -0
  30. src/agentic_multiwriter/models/llm_client.py +77 -0
  31. src/agentic_multiwriter/models/prompts.py +47 -0
  32. src/agentic_multiwriter/state.py +39 -0
  33. src/agentic_multiwriter/tools/__init__.py +5 -0
  34. src/agentic_multiwriter/tools/logging_utils.py +38 -0
  35. src/agentic_multiwriter/tools/parsing.py +34 -0
  36. src/agentic_multiwriter/tools/web_search.py +41 -0
  37. tests/__init__.py +3 -0
  38. tests/test_agents.py +18 -0
  39. tests/test_state.py +16 -0
  40. tests/test_tools.py +10 -0
  41. tests/test_workflow.py +23 -0
.env.example ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agentic Multiwriter configuration
2
+
3
+ # LLM provider:
4
+ # - "ollama" → uses local Ollama (default, recommended to start)
5
+ # - "openai" → uses OpenAI Chat models (requires OPENAI_API_KEY)
6
+ AMW_LLM_PROVIDER=ollama
7
+
8
+ # LLM model name:
9
+ # - For Ollama: e.g. "llama3", "llama3.1", "qwen2.5"
10
+ # - For OpenAI: e.g. "gpt-4o-mini", "gpt-4.1"
11
+ AMW_LLM_MODEL=llama3
12
+
13
+ # Temperature:
14
+ # - 0.0–0.3: more deterministic
15
+ # - 0.4–0.7: more creative
16
+ AMW_TEMPERATURE=0.4
17
+
18
+ # Max number of search results to pull from DuckDuckGo.
19
+ AMW_MAX_SEARCH_RESULTS=8
20
+
21
+ # Required only if AMW_LLM_PROVIDER=openai
22
+ # (Leave blank in example file so you don't commit real keys.)
23
+ OPENAI_API_KEY=
.gitignore ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python bytecode / caches
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual environments
7
+ .venv/
8
+ venv/
9
+ env/
10
+ ENV/
11
+ .venv*/
12
+ .env*/
13
+
14
+ # Build / packaging
15
+ build/
16
+ dist/
17
+ *.egg-info/
18
+ .eggs/
19
+
20
+ # Test / coverage
21
+ .pytest_cache/
22
+ .coverage
23
+ htmlcov/
24
+ .tox/
25
+ .mypy_cache/
26
+ .dmypy.json
27
+
28
+ # IDE / editor
29
+ .vscode/
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+
34
+ # OS-specific
35
+ .DS_Store
36
+ Thumbs.db
37
+
38
+ # Logs
39
+ logs/
40
+ *.log
41
+
42
+ # Environment and secrets
43
+ .env
44
+ .env.local
45
+ .env.*.local
46
+
47
+ # Uvicorn / FastAPI temp files (just in case)
48
+ *.pid
49
+
50
+ # Docker
51
+ *.pid
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ COPY pyproject.toml ./
8
+ RUN pip install --upgrade pip && \
9
+ pip install -e .
10
+
11
+ COPY src ./src
12
+
13
+ EXPOSE 8000
14
+
15
+ CMD ["uvicorn", "agentic_multiwriter.api.server:app", "--host", "0.0.0.0", "--port", "8000"]
README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agentic Multiwriter
2
+
3
+ A multi-agent **research & writing** system built with **LangGraph**, **LangChain**, and local/remote LLMs.
4
+
5
+ Pipeline:
6
+
7
+ 1. **Researcher** – web search on a topic
8
+ 2. **Aggregator** – cleans & deduplicates snippets
9
+ 3. **Writer** – drafts content based on research
10
+ 4. **Critic** – improves clarity & completeness
11
+ 5. **Formatter** – outputs a final, mode-specific format (blog / research_summary / linkedin_post)
12
+
13
+ Supports:
14
+
15
+ - Local LLM via **Ollama** (e.g. `llama3`)
16
+ - Optional OpenAI models (if configured)
17
+ - REST API with FastAPI
18
+ - CLI entrypoint
19
+
20
+ ---
21
+
22
+ ## Quickstart
23
+
24
+ ````bash
25
+ # From repo root
26
+ python -m venv .venv
27
+ source .venv/bin/activate
28
+
29
+ pip install -e .
30
+
31
+ # Run CLI
32
+ PYTHONPATH=src python -m agentic_multiwriter.cli.main \
33
+ --topic "Future of AI agents in healthcare" \
34
+ --mode blog
35
+
36
+ # Run API
37
+ PYTHONPATH=src uvicorn agentic_multiwriter.api.server:app --reload
38
+ Example request:
39
+
40
+ bash
41
+ Copy code
42
+ curl -X POST "http://127.0.0.1:8000/generate" \
43
+ -H "Content-Type: application/json" \
44
+ -d '{"topic": "Future of AI agents in healthcare", "mode": "blog"}'
45
+ Configuration
46
+ Environment variables:
47
+
48
+ AMW_LLM_PROVIDER – "ollama" or "openai" (default: "ollama")
49
+
50
+ AMW_LLM_MODEL – e.g. "llama3" (default) or "gpt-4o-mini"
51
+
52
+ AMW_TEMPERATURE – float, default 0.4
53
+
54
+ OPENAI_API_KEY – required if using openai provider
55
+
56
+ You can also see example configs in examples/sample_config_local.yaml and examples/sample_config_hybrid.yaml.
57
+
58
+ Development
59
+ Run tests:
60
+
61
+ bash
62
+ Copy code
63
+ PYTHONPATH=src pytest -q
64
+ License
65
+ MIT (or whatever you prefer).
66
+
67
+ bash
68
+ Copy code
69
+
70
+ ---
71
+
72
+ ## `docker-compose.yml`
73
+
74
+ ```yaml
75
+ version: "3.9"
76
+
77
+ services:
78
+ api:
79
+ build: .
80
+ container_name: agentic-multiwriter-api
81
+ environment:
82
+ - AMW_LLM_PROVIDER=${AMW_LLM_PROVIDER:-ollama}
83
+ - AMW_LLM_MODEL=${AMW_LLM_MODEL:-llama3}
84
+ - AMW_TEMPERATURE=${AMW_TEMPERATURE:-0.4}
85
+ - OPENAI_API_KEY=${OPENAI_API_KEY:-}
86
+ ports:
87
+ - "8000:8000"
88
+ command: ["uvicorn", "agentic_multiwriter.api.server:app", "--host", "0.0.0.0", "--port", "8000"]
89
+ working_dir: /app
90
+ volumes:
91
+ - ./src:/app/src
92
+ - ./examples:/app/examples
93
+ ````
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import textwrap
5
+ from typing import List
6
+
7
+ import gradio as gr
8
+
9
+ from agentic_multiwriter.state import AgentState, ResearchSnippet
10
+ from agentic_multiwriter.agents import (
11
+ researcher_node,
12
+ aggregator_node,
13
+ writer_node,
14
+ critic_node,
15
+ formatter_node,
16
+ )
17
+ from agentic_multiwriter.tools import get_logger
18
+
19
+ logger = get_logger()
20
+
21
+
22
+ def _format_sources(snippets: List[ResearchSnippet]) -> str:
23
+ if not snippets:
24
+ return "No web sources were retrieved."
25
+
26
+ lines = []
27
+ for s in snippets:
28
+ title = s["title"] or s["url"]
29
+ url = s["url"]
30
+ snippet = s["snippet"]
31
+ if url:
32
+ lines.append(f"- [{title}]({url})\n \n > {snippet}")
33
+ else:
34
+ lines.append(f"- {title}\n \n > {snippet}")
35
+ return "\n\n".join(lines)
36
+
37
+
38
+ def generate(topic: str, mode: str, progress=gr.Progress()):
39
+ """Gradio callback to run the pipeline step-by-step with progress."""
40
+ topic = topic.strip()
41
+ if not topic:
42
+ return (
43
+ "Please enter a topic.",
44
+ "",
45
+ "",
46
+ "",
47
+ "",
48
+ )
49
+
50
+ # Initial state
51
+ state: AgentState = {
52
+ "topic": topic,
53
+ "mode": mode,
54
+ "research_snippets": [],
55
+ "outline": [],
56
+ "draft": "",
57
+ "revised_draft": "",
58
+ "final_output": "",
59
+ "meta": {},
60
+ }
61
+
62
+ # 1. Research
63
+ progress(0.1, "Researching the web...")
64
+ logger.info("UI: starting researcher_node")
65
+ state = researcher_node(state)
66
+
67
+ # 2. Aggregate
68
+ progress(0.25, "Aggregating and cleaning snippets...")
69
+ logger.info("UI: starting aggregator_node")
70
+ state = aggregator_node(state)
71
+
72
+ # 3. Write draft
73
+ progress(0.5, "Writing first draft...")
74
+ logger.info("UI: starting writer_node")
75
+ state = writer_node(state)
76
+ initial_draft = state.get("draft", "") or ""
77
+
78
+ # 4. Critic / edit
79
+ progress(0.7, "Reviewing and improving draft...")
80
+ logger.info("UI: starting critic_node")
81
+ state = critic_node(state)
82
+ revised_draft = state.get("revised_draft", "") or initial_draft
83
+
84
+ # 5. Format final output
85
+ progress(0.9, f"Formatting final output as {mode}...")
86
+ logger.info("UI: starting formatter_node")
87
+ state = formatter_node(state)
88
+ final_output = state.get("final_output", "") or revised_draft
89
+
90
+ # 6. Prepare outline, meta, sources
91
+ outline = state.get("outline", []) or []
92
+ meta = state.get("meta", {}) or {}
93
+ snippets = state.get("research_snippets", []) or []
94
+
95
+ outline_text = "\n".join(f"- {item}" for item in outline)
96
+ meta_text = json.dumps(meta, indent=2)
97
+ sources_md = _format_sources(snippets)
98
+
99
+ progress(1.0, "Done.")
100
+ return final_output, initial_draft, revised_draft, sources_md, meta_text
101
+
102
+
103
+ def build_interface() -> gr.Blocks:
104
+ with gr.Blocks(title="Agentic Multiwriter") as demo:
105
+ gr.Markdown(
106
+ """
107
+ # 🧠 Agentic Multiwriter
108
+
109
+ Multi-agent research & writing system built with **LangGraph**.
110
+
111
+ 1. Researches your topic on the web
112
+ 2. Aggregates and cleans snippets
113
+ 3. Writes a draft
114
+ 4. Critiques and improves it
115
+ 5. Formats it as a blog, research summary, or LinkedIn-style post
116
+ """
117
+ )
118
+
119
+ with gr.Row():
120
+ topic_input = gr.Textbox(
121
+ label="Topic",
122
+ placeholder="e.g. Future of agentic AI",
123
+ lines=2,
124
+ )
125
+ mode_input = gr.Radio(
126
+ choices=["blog", "research_summary", "linkedin_post"],
127
+ value="blog",
128
+ label="Output mode",
129
+ )
130
+ run_button = gr.Button("Generate", variant="primary")
131
+
132
+ with gr.Tab("Final Output"):
133
+ final_output_box = gr.Markdown(label="Final Output")
134
+
135
+ with gr.Tab("Initial Draft (Writer)"):
136
+ initial_draft_box = gr.Markdown(label="Initial Draft")
137
+
138
+ with gr.Tab("Revised Draft (Critic)"):
139
+ revised_draft_box = gr.Markdown(label="Revised Draft")
140
+
141
+ with gr.Tab("Sources"):
142
+ sources_box = gr.Markdown(label="Web Sources Used")
143
+
144
+ with gr.Tab("Meta"):
145
+ meta_box = gr.Textbox(
146
+ label="Meta (timings, counts, etc.)", lines=10)
147
+
148
+ run_button.click(
149
+ fn=generate,
150
+ inputs=[topic_input, mode_input],
151
+ outputs=[
152
+ final_output_box,
153
+ initial_draft_box,
154
+ revised_draft_box,
155
+ sources_box,
156
+ meta_box,
157
+ ],
158
+ )
159
+
160
+ gr.Markdown(
161
+ textwrap.dedent(
162
+ """
163
+ ---
164
+ ⚠️ **Note**: First run may take longer while the model loads or if you are
165
+ using a local model. For best performance on Hugging Face Spaces, set
166
+ `AMW_LLM_PROVIDER=openai` and use a small hosted model (e.g. `gpt-4o-mini`).
167
+ """
168
+ )
169
+ )
170
+
171
+ return demo
172
+
173
+
174
+ if __name__ == "__main__":
175
+ demo = build_interface()
176
+ demo.launch(server_name="0.0.0.0", server_port=7860)
docker-compose.yml ADDED
File without changes
examples/example_runs.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Example Runs
2
+
3
+ ## Blog mode (local, Ollama llama3)
4
+
5
+ ```bash
6
+ PYTHONPATH=src python -m agentic_multiwriter.cli.main \
7
+ --topic "Future of AI agents in healthcare" \
8
+ --mode blog
9
+ Outputs (truncated):
10
+
11
+ Outline with 4–6 headings
12
+
13
+ Draft text ~800–1200 words
14
+
15
+ Final formatted blog with title, intro, body, conclusion
16
+
17
+ Research summary mode
18
+ bash
19
+ Copy code
20
+ PYTHONPATH=src python -m agentic_multiwriter.cli.main \
21
+ --topic "Randomized controlled trials using AI in radiology" \
22
+ --mode research_summary
23
+ Outputs:
24
+
25
+ Bullet-pointed key findings
26
+
27
+ Short narrative summary
28
+
29
+ Suggestions for further reading
30
+
31
+ ```
examples/sample_config_hybrid.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ llm_provider: "openai"
2
+ llm_model: "gpt-4o-mini"
3
+ temperature: 0.3
4
+ max_search_results: 10
5
+ mode: "research_summary"
examples/sample_config_local.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ llm_provider: "ollama"
2
+ llm_model: "llama3"
3
+ temperature: 0.4
4
+ max_search_results: 8
5
+ mode: "blog"
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "agentic-multiwriter"
3
+ version = "0.1.0"
4
+ description = "Multi-agent research & writing system using LangGraph, LangChain, and LLMs."
5
+ authors = [{name = "Your Name"}]
6
+ requires-python = ">=3.10"
7
+
8
+ dependencies = [
9
+ "fastapi>=0.115.0",
10
+ "uvicorn[standard]>=0.30.0",
11
+ "pydantic>=2.8.0",
12
+ "langchain>=0.3.0",
13
+ "langchain-community>=0.3.0",
14
+ "langchain-ollama>=0.2.0",
15
+ "langchain-openai>=0.2.0",
16
+ "langgraph>=0.2.0",
17
+ "duckduckgo-search>=6.2.10",
18
+ "python-dotenv>=1.0.1",
19
+ "httpx>=0.27.0",
20
+ ]
21
+
22
+ [build-system]
23
+ requires = ["setuptools>=61.0"]
24
+ build-backend = "setuptools.build_meta"
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.115.0
2
+ uvicorn[standard]>=0.30.0
3
+ pydantic>=2.8.0
4
+ langchain>=0.3.0
5
+ langchain-community>=0.3.0
6
+ langchain-ollama>=0.2.0
7
+ langchain-openai>=0.2.0
8
+ langgraph>=0.2.0
9
+ ddgs>=1.8.0
10
+ python-dotenv>=1.0.1
11
+ httpx>=0.27.0
12
+ gradio>=5.0.0
scripts/run_cli.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -e
3
+
4
+ export PYTHONPATH=src:${PYTHONPATH}
5
+
6
+ python -m agentic_multiwriter.cli.main "$@"
scripts/run_local.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -e
3
+
4
+ export PYTHONPATH=src:${PYTHONPATH}
5
+
6
+ uvicorn agentic_multiwriter.api.server:app --reload --host 0.0.0.0 --port 8000
src/agentic_multiwriter/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agentic Multiwriter package.
3
+
4
+ Multi-agent research & writing pipeline using LangGraph and LLMs.
5
+ """
6
+
7
+ from .config import Settings
8
+ from .graph.router import run_pipeline
9
+
10
+ __all__ = ["Settings", "run_pipeline"]
src/agentic_multiwriter/agents/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .researcher import researcher_node
2
+ from .aggregator import aggregator_node
3
+ from .writer import writer_node
4
+ from .critic import critic_node
5
+ from .formatter import formatter_node
6
+
7
+ __all__ = [
8
+ "researcher_node",
9
+ "aggregator_node",
10
+ "writer_node",
11
+ "critic_node",
12
+ "formatter_node",
13
+ ]
src/agentic_multiwriter/agents/aggregator.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from typing import List
5
+
6
+ from ..state import AgentState, ResearchSnippet
7
+ from ..tools import normalize_snippets, get_logger, log_state_summary
8
+ from ..config import settings
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ def _rank_snippets(snippets: List[ResearchSnippet]) -> List[ResearchSnippet]:
14
+ """
15
+ Very simple ranking: prefer snippets that mention the topic more often (if present),
16
+ otherwise keep original order.
17
+ """
18
+ return snippets
19
+
20
+
21
+ def aggregator_node(state: AgentState) -> AgentState:
22
+ start = time.time()
23
+
24
+ snippets = state.get("research_snippets", []) or []
25
+ logger.info("Aggregator: normalizing %d snippets...", len(snippets))
26
+
27
+ normalized = normalize_snippets(snippets)
28
+ ranked = _rank_snippets(normalized)
29
+
30
+ # Trim to a maximum number for LLM context
31
+ max_keep = max(3, settings.max_search_results)
32
+ trimmed = ranked[:max_keep]
33
+
34
+ meta = state.get("meta", {}) or {}
35
+ meta["aggregator_time_sec"] = round(time.time() - start, 3)
36
+ meta["aggregated_snippets_count"] = len(trimmed)
37
+
38
+ new_state: AgentState = {
39
+ **state,
40
+ "research_snippets": trimmed,
41
+ "meta": meta,
42
+ }
43
+ log_state_summary(new_state, prefix="Aggregator")
44
+ return new_state
src/agentic_multiwriter/agents/critic.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import time
4
+
5
+ from ..state import AgentState
6
+ from ..models import LLMClient, CRITIC_PROMPT_TEMPLATE
7
+ from ..tools import get_logger, log_state_summary
8
+
9
+ logger = get_logger()
10
+
11
+
12
+ def critic_node(state: AgentState) -> AgentState:
13
+ start = time.time()
14
+ topic = state.get("topic", "")
15
+ snippets = state.get("research_snippets", []) or []
16
+ draft = state.get("draft", "")
17
+
18
+ if not draft.strip():
19
+ raise ValueError("Draft is empty in state for critic_node.")
20
+
21
+ joined_snippets = "\n\n".join(
22
+ f"- {s['snippet']} (source: {s['title']} – {s['url']})" for s in snippets
23
+ )
24
+
25
+ client = LLMClient()
26
+
27
+ system_prompt = "You are a thoughtful editor focused on clarity, coherence, and factual grounding."
28
+ user_prompt = CRITIC_PROMPT_TEMPLATE.format(
29
+ topic=topic, snippets=joined_snippets, draft=draft
30
+ )
31
+
32
+ logger.info("Critic: improving draft...")
33
+ revised = client.generate(system_prompt=system_prompt, user_prompt=user_prompt)
34
+
35
+ meta = state.get("meta", {}) or {}
36
+ meta["critic_time_sec"] = round(time.time() - start, 3)
37
+
38
+ new_state: AgentState = {
39
+ **state,
40
+ "revised_draft": revised,
41
+ "meta": meta,
42
+ }
43
+ log_state_summary(new_state, prefix="Critic")
44
+ return new_state
src/agentic_multiwriter/agents/formatter.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import time
4
+
5
+ from ..state import AgentState
6
+ from ..models import LLMClient, FORMATTER_PROMPT_TEMPLATE
7
+ from ..tools import get_logger, log_state_summary
8
+
9
+ logger = get_logger()
10
+
11
+
12
+ def formatter_node(state: AgentState) -> AgentState:
13
+ start = time.time()
14
+ topic = state.get("topic", "")
15
+ mode = state.get("mode", "blog")
16
+ revised = state.get("revised_draft") or state.get("draft") or ""
17
+
18
+ if not revised.strip():
19
+ raise ValueError("No draft or revised draft available for formatter_node.")
20
+
21
+ client = LLMClient()
22
+
23
+ system_prompt = "You are an expert formatter who adapts content to the requested mode."
24
+ user_prompt = FORMATTER_PROMPT_TEMPLATE.format(
25
+ mode=mode, topic=topic, draft=revised
26
+ )
27
+
28
+ logger.info("Formatter: producing final output in mode '%s'...", mode)
29
+ final_output = client.generate(system_prompt=system_prompt, user_prompt=user_prompt)
30
+
31
+ # Naive outline extraction: use headings lines
32
+ outline = []
33
+ for line in final_output.splitlines():
34
+ stripped = line.strip()
35
+ if stripped.startswith("#") or stripped.endswith(":"):
36
+ outline.append(stripped.lstrip("#").strip())
37
+
38
+ meta = state.get("meta", {}) or {}
39
+ meta["formatter_time_sec"] = round(time.time() - start, 3)
40
+
41
+ new_state: AgentState = {
42
+ **state,
43
+ "final_output": final_output,
44
+ "outline": outline or state.get("outline", []) or [],
45
+ "meta": meta,
46
+ }
47
+ log_state_summary(new_state, prefix="Formatter")
48
+ return new_state
src/agentic_multiwriter/agents/researcher.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import time
4
+
5
+ from ..state import AgentState
6
+ from ..tools import web_search, log_state_summary, get_logger
7
+
8
+ logger = get_logger()
9
+
10
+
11
+ def researcher_node(state: AgentState) -> AgentState:
12
+ start = time.time()
13
+ topic = state.get("topic", "")
14
+ if not topic:
15
+ raise ValueError("Topic must be provided in state for researcher_node.")
16
+
17
+ logger.info("Researcher: searching web for topic '%s'...", topic)
18
+
19
+ query = f"key facts and recent information about {topic}"
20
+ snippets = web_search(query)
21
+
22
+ existing = state.get("research_snippets", []) or []
23
+ merged = existing + snippets
24
+
25
+ meta = state.get("meta", {}) or {}
26
+ meta["researcher_time_sec"] = round(time.time() - start, 3)
27
+ meta["research_snippets_count"] = len(merged)
28
+
29
+ new_state: AgentState = {
30
+ **state,
31
+ "research_snippets": merged,
32
+ "meta": meta,
33
+ }
34
+ log_state_summary(new_state, prefix="Researcher")
35
+ return new_state
src/agentic_multiwriter/agents/writer.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import time
4
+
5
+ from ..state import AgentState
6
+ from ..models import LLMClient, WRITER_PROMPT_TEMPLATE
7
+ from ..tools import get_logger, log_state_summary
8
+
9
+ logger = get_logger()
10
+
11
+
12
+ def writer_node(state: AgentState) -> AgentState:
13
+ start = time.time()
14
+ topic = state.get("topic", "")
15
+ snippets = state.get("research_snippets", []) or []
16
+
17
+ joined_snippets = "\n\n".join(
18
+ f"- {s['snippet']} (source: {s['title']} – {s['url']})" for s in snippets
19
+ )
20
+
21
+ client = LLMClient()
22
+
23
+ system_prompt = "You are a precise and structured writer who follows instructions carefully."
24
+ user_prompt = WRITER_PROMPT_TEMPLATE.format(topic=topic, snippets=joined_snippets)
25
+
26
+ logger.info("Writer: generating first draft...")
27
+ draft = client.generate(system_prompt=system_prompt, user_prompt=user_prompt)
28
+
29
+ meta = state.get("meta", {}) or {}
30
+ meta["writer_time_sec"] = round(time.time() - start, 3)
31
+
32
+ new_state: AgentState = {
33
+ **state,
34
+ "draft": draft,
35
+ "meta": meta,
36
+ }
37
+ log_state_summary(new_state, prefix="Writer")
38
+ return new_state
src/agentic_multiwriter/api/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .server import app
2
+
3
+ __all__ = ["app"]
src/agentic_multiwriter/api/server.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from fastapi import FastAPI
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+
6
+ from ..state import GenerateRequest, GenerateResponse
7
+ from ..graph.router import run_pipeline
8
+ from ..tools import get_logger
9
+
10
+ logger = get_logger()
11
+
12
+ app = FastAPI(
13
+ title="Agentic Multiwriter API",
14
+ version="0.1.0",
15
+ description="Multi-agent research & writing pipeline using LangGraph and LLMs.",
16
+ )
17
+
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=["*"],
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+
26
+ @app.get("/health")
27
+ def health() -> dict:
28
+ return {"status": "ok"}
29
+
30
+
31
+ @app.post("/generate", response_model=GenerateResponse)
32
+ def generate(payload: GenerateRequest) -> GenerateResponse:
33
+ logger.info("API: /generate called with topic='%s', mode='%s'", payload.topic, payload.mode)
34
+ state = run_pipeline(topic=payload.topic, mode=payload.mode)
35
+
36
+ return GenerateResponse(
37
+ topic=state.get("topic", payload.topic),
38
+ mode=state.get("mode", payload.mode),
39
+ outline=state.get("outline", []) or [],
40
+ draft=state.get("draft", "") or "",
41
+ revised_draft=state.get("revised_draft", "") or "",
42
+ final_output=state.get("final_output", "") or "",
43
+ meta=state.get("meta", {}) or {},
44
+ )
src/agentic_multiwriter/cli/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ CLI entrypoint package for Agentic Multiwriter.
3
+ """
src/agentic_multiwriter/cli/main.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import textwrap
5
+
6
+ from ..graph.router import run_pipeline
7
+ from ..tools import get_logger
8
+
9
+ logger = get_logger()
10
+
11
+
12
+ def parse_args() -> argparse.Namespace:
13
+ parser = argparse.ArgumentParser(
14
+ description="Agentic Multiwriter - multi-agent research & writing pipeline."
15
+ )
16
+ parser.add_argument(
17
+ "--topic",
18
+ required=True,
19
+ help="Topic to research and write about.",
20
+ )
21
+ parser.add_argument(
22
+ "--mode",
23
+ default="blog",
24
+ choices=["blog", "research_summary", "linkedin_post"],
25
+ help="Output mode (default: blog).",
26
+ )
27
+ return parser.parse_args()
28
+
29
+
30
+ def main() -> None:
31
+ args = parse_args()
32
+ topic = args.topic
33
+ mode = args.mode
34
+
35
+ logger.info("CLI: running pipeline for topic='%s', mode='%s'", topic, mode)
36
+ state = run_pipeline(topic=topic, mode=mode)
37
+
38
+ final_output = state.get("final_output", "") or ""
39
+ meta = state.get("meta", {}) or {}
40
+
41
+ print("\n" + "=" * 80)
42
+ print(f"TOPIC: {topic}")
43
+ print(f"MODE: {mode}")
44
+ print("=" * 80 + "\n")
45
+ print(textwrap.dedent(final_output).strip())
46
+ print("\n" + "-" * 80)
47
+ print("Meta:", meta)
48
+
49
+
50
+ if __name__ == "__main__":
51
+ main()
src/agentic_multiwriter/config.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dataclasses import dataclass
3
+
4
+
5
+ @dataclass
6
+ class Settings:
7
+ """Global configuration for the agentic multiwriter system."""
8
+
9
+ # ollama | openai | hf_endpoint
10
+ llm_provider: str = os.getenv("AMW_LLM_PROVIDER", "ollama").lower()
11
+ llm_model: str = os.getenv("AMW_LLM_MODEL", "llama3")
12
+ temperature: float = float(os.getenv("AMW_TEMPERATURE", "0.4"))
13
+ max_search_results: int = int(os.getenv("AMW_MAX_SEARCH_RESULTS", "8"))
14
+
15
+ def validate(self) -> None:
16
+ allowed = {"ollama", "openai", "hf_endpoint"}
17
+ if self.llm_provider not in allowed:
18
+ raise ValueError(
19
+ f"Unsupported LLM provider '{self.llm_provider}'. "
20
+ f"Use one of: {', '.join(sorted(allowed))}."
21
+ )
22
+
23
+ if self.llm_provider == "openai" and not os.getenv("OPENAI_API_KEY"):
24
+ raise ValueError(
25
+ "OPENAI_API_KEY is required when AMW_LLM_PROVIDER is 'openai'."
26
+ )
27
+
28
+ if self.llm_provider == "hf_endpoint" and not os.getenv(
29
+ "HUGGINGFACEHUB_API_TOKEN"
30
+ ):
31
+ raise ValueError(
32
+ "HUGGINGFACEHUB_API_TOKEN is required when "
33
+ "AMW_LLM_PROVIDER is 'hf_endpoint'."
34
+ )
35
+
36
+
37
+ settings = Settings()
src/agentic_multiwriter/graph/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .workflow import build_workflow_app
2
+
3
+ __all__ = ["build_workflow_app"]
src/agentic_multiwriter/graph/router.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, Any
4
+
5
+ from ..state import AgentState
6
+ from .workflow import build_workflow_app
7
+
8
+
9
+ def run_pipeline(topic: str, mode: str = "blog") -> AgentState:
10
+ """
11
+ High-level helper to run the full workflow from scratch.
12
+ """
13
+ initial_state: AgentState = {
14
+ "topic": topic,
15
+ "mode": mode,
16
+ "research_snippets": [],
17
+ "outline": [],
18
+ "draft": "",
19
+ "revised_draft": "",
20
+ "final_output": "",
21
+ "meta": {},
22
+ }
23
+
24
+ app = build_workflow_app()
25
+ final_state: Dict[str, Any] = app.invoke(initial_state) # type: ignore[assignment]
26
+ # LangGraph returns a dict compatible with AgentState
27
+ return final_state # type: ignore[return-value]
src/agentic_multiwriter/graph/workflow.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from langgraph.graph import StateGraph, END
4
+
5
+ from ..state import AgentState
6
+ from ..agents import (
7
+ researcher_node,
8
+ aggregator_node,
9
+ writer_node,
10
+ critic_node,
11
+ formatter_node,
12
+ )
13
+
14
+
15
+ def build_workflow_app() -> any:
16
+ """
17
+ Build and compile the LangGraph workflow for the multi-agent pipeline.
18
+ Flow: Researcher -> Aggregator -> Writer -> Critic -> Formatter -> END
19
+ """
20
+ graph = StateGraph(AgentState)
21
+
22
+ graph.add_node("researcher", researcher_node)
23
+ graph.add_node("aggregator", aggregator_node)
24
+ graph.add_node("writer", writer_node)
25
+ graph.add_node("critic", critic_node)
26
+ graph.add_node("formatter", formatter_node)
27
+
28
+ graph.set_entry_point("researcher")
29
+ graph.add_edge("researcher", "aggregator")
30
+ graph.add_edge("aggregator", "writer")
31
+ graph.add_edge("writer", "critic")
32
+ graph.add_edge("critic", "formatter")
33
+ graph.add_edge("formatter", END)
34
+
35
+ app = graph.compile()
36
+ return app
src/agentic_multiwriter/models/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .llm_client import LLMClient
2
+ from .prompts import (
3
+ WRITER_PROMPT_TEMPLATE,
4
+ CRITIC_PROMPT_TEMPLATE,
5
+ FORMATTER_PROMPT_TEMPLATE,
6
+ )
7
+
8
+ __all__ = [
9
+ "LLMClient",
10
+ "WRITER_PROMPT_TEMPLATE",
11
+ "CRITIC_PROMPT_TEMPLATE",
12
+ "FORMATTER_PROMPT_TEMPLATE",
13
+ ]
src/agentic_multiwriter/models/llm_client.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from langchain_ollama import ChatOllama
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_core.messages import SystemMessage, HumanMessage
8
+ from langchain_community.llms import HuggingFaceEndpoint
9
+
10
+ from ..config import settings
11
+ from ..tools import get_logger
12
+
13
+ logger = get_logger()
14
+
15
+
16
+ class LLMClient:
17
+ """
18
+ Wrapper to abstract over multiple LLM providers:
19
+ - ollama: local Ollama server
20
+ - openai: OpenAI Chat API
21
+ - hf_endpoint: Hugging Face Inference Endpoint / hosted model
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ provider: Optional[str] = None,
27
+ model: Optional[str] = None,
28
+ temperature: Optional[float] = None,
29
+ ) -> None:
30
+ self.provider = (provider or settings.llm_provider).lower()
31
+ self.model = model or settings.llm_model
32
+ self.temperature = temperature if temperature is not None else settings.temperature
33
+
34
+ settings.validate()
35
+ self._init_client()
36
+ logger.info(
37
+ "LLMClient initialized with provider='%s', model='%s', temperature=%.2f",
38
+ self.provider,
39
+ self.model,
40
+ self.temperature,
41
+ )
42
+
43
+ def _init_client(self) -> None:
44
+ if self.provider == "ollama":
45
+ self._client = ChatOllama(
46
+ model=self.model, temperature=self.temperature)
47
+ elif self.provider == "openai":
48
+ self._client = ChatOpenAI(
49
+ model=self.model, temperature=self.temperature)
50
+ elif self.provider == "hf_endpoint":
51
+ # Uses Hugging Face hosted model via Inference API
52
+ # Model id should be a HF repo id, e.g. "HuggingFaceH4/zephyr-7b-beta"
53
+ self._client = HuggingFaceEndpoint(
54
+ repo_id=self.model,
55
+ temperature=self.temperature,
56
+ )
57
+ else:
58
+ raise ValueError(f"Unsupported provider: {self.provider}")
59
+
60
+ def generate(self, system_prompt: str, user_prompt: str) -> str:
61
+ if self.provider in {"ollama", "openai"}:
62
+ messages = [
63
+ SystemMessage(content=system_prompt),
64
+ HumanMessage(content=user_prompt),
65
+ ]
66
+ response = self._client.invoke(messages)
67
+ return response.content or ""
68
+
69
+ if self.provider == "hf_endpoint":
70
+ # HuggingFaceEndpoint is a text-completion-style model,
71
+ # so we concatenate system + user into a single prompt.
72
+ prompt = f"{system_prompt.strip()}\n\n{user_prompt.strip()}"
73
+ response = self._client.invoke(prompt)
74
+ return response or ""
75
+
76
+ raise ValueError(
77
+ f"Unsupported provider at generate(): {self.provider}")
src/agentic_multiwriter/models/prompts.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ WRITER_PROMPT_TEMPLATE = """You are a careful, insightful writer.
2
+ Write a structured first draft about the topic below using ONLY the research snippets provided.
3
+ Do not invent facts. If something is unclear from the snippets, either omit it or clearly mark it as uncertainty.
4
+
5
+ Topic:
6
+ {topic}
7
+
8
+ Research snippets:
9
+ {snippets}
10
+
11
+ Write a coherent, well-organized draft in clear, natural language.
12
+ """
13
+
14
+ CRITIC_PROMPT_TEMPLATE = """You are an editor reviewing the following draft.
15
+ Your goals:
16
+ - Improve clarity and flow.
17
+ - Remove repetition.
18
+ - Highlight or remove any statements that are not clearly supported by the snippets.
19
+ - Preserve factual accuracy.
20
+
21
+ Topic:
22
+ {topic}
23
+
24
+ Research snippets:
25
+ {snippets}
26
+
27
+ Original draft:
28
+ {draft}
29
+
30
+ Now return the improved draft.
31
+ """
32
+
33
+ FORMATTER_PROMPT_TEMPLATE = """You are formatting the final output based on the requested mode.
34
+
35
+ Mode: {mode}
36
+ Topic: {topic}
37
+
38
+ Draft to format:
39
+ {draft}
40
+
41
+ Rules:
42
+ - For mode "blog": add a title, short intro hook, subheadings, and a concise conclusion.
43
+ - For mode "research_summary": provide bullet points for key findings, then a short narrative summary.
44
+ - For mode "linkedin_post": keep it within ~150–220 words, conversational but professional, and end with a question or call to action.
45
+
46
+ Return ONLY the formatted text.
47
+ """
src/agentic_multiwriter/state.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import TypedDict, List, Dict, Any
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class ResearchSnippet(TypedDict):
8
+ title: str
9
+ url: str
10
+ snippet: str
11
+
12
+
13
+ class AgentState(TypedDict, total=False):
14
+ topic: str
15
+ mode: str
16
+ research_snippets: List[ResearchSnippet]
17
+ outline: List[str]
18
+ draft: str
19
+ revised_draft: str
20
+ final_output: str
21
+ meta: Dict[str, Any]
22
+
23
+
24
+ class GenerateRequest(BaseModel):
25
+ topic: str = Field(..., description="Topic to research and write about.")
26
+ mode: str = Field(
27
+ "blog",
28
+ description="Output mode: 'blog', 'research_summary', or 'linkedin_post'.",
29
+ )
30
+
31
+
32
+ class GenerateResponse(BaseModel):
33
+ topic: str
34
+ mode: str
35
+ outline: List[str]
36
+ draft: str
37
+ revised_draft: str
38
+ final_output: str
39
+ meta: Dict[str, Any] = Field(default_factory=dict)
src/agentic_multiwriter/tools/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .web_search import web_search
2
+ from .parsing import normalize_snippets
3
+ from .logging_utils import get_logger, log_state_summary
4
+
5
+ __all__ = ["web_search", "normalize_snippets", "get_logger", "log_state_summary"]
src/agentic_multiwriter/tools/logging_utils.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any, Dict
5
+
6
+ from ..state import AgentState
7
+
8
+ _LOGGER_NAME = "agentic_multiwriter"
9
+
10
+
11
+ def get_logger() -> logging.Logger:
12
+ logger = logging.getLogger(_LOGGER_NAME)
13
+ if not logger.handlers:
14
+ handler = logging.StreamHandler()
15
+ formatter = logging.Formatter(
16
+ "[%(asctime)s] %(levelname)s - %(message)s", "%Y-%m-%d %H:%M:%S"
17
+ )
18
+ handler.setFormatter(formatter)
19
+ logger.addHandler(handler)
20
+ logger.setLevel(logging.INFO)
21
+ return logger
22
+
23
+
24
+ def log_state_summary(state: AgentState, prefix: str = "") -> None:
25
+ logger = get_logger()
26
+ topic = state.get("topic", "")
27
+ mode = state.get("mode", "")
28
+ n_snippets = len(state.get("research_snippets", []) or [])
29
+ logger.info(
30
+ "%sState summary - topic='%s', mode='%s', research_snippets=%d",
31
+ f"{prefix} " if prefix else "",
32
+ topic,
33
+ mode,
34
+ n_snippets,
35
+ )
36
+ meta: Dict[str, Any] = state.get("meta", {}) or {}
37
+ if meta:
38
+ logger.info("%sMeta: %s", prefix, meta)
src/agentic_multiwriter/tools/parsing.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Dict
4
+
5
+ from ..state import ResearchSnippet
6
+
7
+
8
+ def normalize_snippets(snippets: List[ResearchSnippet]) -> List[ResearchSnippet]:
9
+ """
10
+ Deduplicate snippets by URL+snippet text and trim length.
11
+ """
12
+ seen: set[tuple[str, str]] = set()
13
+ normalized: List[ResearchSnippet] = []
14
+
15
+ for s in snippets:
16
+ key = (s["url"], s["snippet"])
17
+ if key in seen:
18
+ continue
19
+ seen.add(key)
20
+
21
+ snippet_text = s["snippet"].strip()
22
+ # Hard limit length for prompts
23
+ if len(snippet_text) > 600:
24
+ snippet_text = snippet_text[:600] + "..."
25
+
26
+ normalized.append(
27
+ {
28
+ "title": s["title"].strip(),
29
+ "url": s["url"].strip(),
30
+ "snippet": snippet_text,
31
+ }
32
+ )
33
+
34
+ return normalized
src/agentic_multiwriter/tools/web_search.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import List
4
+
5
+ # Prefer the new ddgs package, fall back to duckduckgo_search if needed
6
+ try:
7
+ from ddgs import DDGS # new package name
8
+ except ImportError:
9
+ from duckduckgo_search import DDGS # old package name
10
+
11
+ from ..state import ResearchSnippet
12
+ from ..config import settings
13
+
14
+
15
+ def web_search(query: str, max_results: int | None = None) -> List[ResearchSnippet]:
16
+ """
17
+ Perform a web search and return normalized snippets.
18
+ Uses DuckDuckGo via ddgs (preferred) or duckduckgo_search.
19
+ """
20
+ limit = max_results or settings.max_search_results
21
+ snippets: List[ResearchSnippet] = []
22
+
23
+ # region/safesearch can matter for results; 'wt-wt' = worldwide, 'off' = no filtering
24
+ with DDGS() as ddgs:
25
+ for result in ddgs.text(
26
+ query,
27
+ max_results=limit,
28
+ ):
29
+ title = result.get("title") or ""
30
+ url = result.get("href") or result.get("url") or ""
31
+ snippet = result.get("body") or result.get("snippet") or ""
32
+ if not snippet.strip():
33
+ continue
34
+ snippets.append(
35
+ {
36
+ "title": title.strip(),
37
+ "url": url.strip(),
38
+ "snippet": snippet.strip(),
39
+ }
40
+ )
41
+ return snippets
tests/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Test package for agentic_multiwriter.
3
+ """
tests/test_agents.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agentic_multiwriter.state import AgentState
2
+ from agentic_multiwriter.agents import aggregator_node
3
+
4
+
5
+ def test_aggregator_node_handles_empty():
6
+ state: AgentState = {
7
+ "topic": "Test",
8
+ "mode": "blog",
9
+ "research_snippets": [],
10
+ "outline": [],
11
+ "draft": "",
12
+ "revised_draft": "",
13
+ "final_output": "",
14
+ "meta": {},
15
+ }
16
+ new_state = aggregator_node(state)
17
+ assert "research_snippets" in new_state
18
+ assert isinstance(new_state["research_snippets"], list)
tests/test_state.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agentic_multiwriter.state import AgentState, ResearchSnippet
2
+
3
+
4
+ def test_agent_state_basic():
5
+ state: AgentState = {
6
+ "topic": "Test",
7
+ "mode": "blog",
8
+ "research_snippets": [],
9
+ "outline": [],
10
+ "draft": "",
11
+ "revised_draft": "",
12
+ "final_output": "",
13
+ "meta": {},
14
+ }
15
+ assert state["topic"] == "Test"
16
+ assert state["mode"] == "blog"
tests/test_tools.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from agentic_multiwriter.tools import normalize_snippets
2
+
3
+
4
+ def test_normalize_snippets_deduplication():
5
+ snippets = [
6
+ {"title": "A", "url": "http://example.com", "snippet": "Same text"},
7
+ {"title": "B", "url": "http://example.com", "snippet": "Same text"},
8
+ ]
9
+ norm = normalize_snippets(snippets)
10
+ assert len(norm) == 1
tests/test_workflow.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agentic_multiwriter.graph.workflow import build_workflow_app
2
+ from agentic_multiwriter.state import AgentState
3
+
4
+
5
+ def test_build_workflow_app():
6
+ app = build_workflow_app()
7
+ assert app is not None
8
+
9
+ # Smoke test: invoke with minimal state up to first node.
10
+ state: AgentState = {
11
+ "topic": "Test topic",
12
+ "mode": "blog",
13
+ "research_snippets": [],
14
+ "outline": [],
15
+ "draft": "",
16
+ "revised_draft": "",
17
+ "final_output": "",
18
+ "meta": {},
19
+ }
20
+
21
+ # We won't run the whole graph here because nodes depend on external services.
22
+ # Just confirm the app can be created and has an 'invoke' method.
23
+ assert hasattr(app, "invoke")