VashuTheGreat2 commited on
Commit
63de3ab
·
verified ·
1 Parent(s): 98cda7c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +50 -0
  2. .env.example +11 -0
  3. .gitattributes +6 -0
  4. .github/workflows/python-ci.yaml +57 -0
  5. .gitignore +61 -0
  6. .pre-commit-config.yaml +20 -0
  7. .project-root +1 -0
  8. .pylintrc +2 -0
  9. .python-version +1 -0
  10. .vscode/settings.json +5 -0
  11. Dockerfile +38 -0
  12. LICENSE +9 -0
  13. README.md +92 -5
  14. State of Multimodal LLMs in 2026.md +31 -0
  15. api/app.py +125 -0
  16. api/templates/index.html +732 -0
  17. credentials_example.yaml +5 -0
  18. data/README.md +3 -0
  19. deployment-setup-notes.txt +177 -0
  20. docker-compose.yml +17 -0
  21. graph.png +0 -0
  22. images/attention_mechanism.png +3 -0
  23. images/common_mistakes.png +3 -0
  24. images/conclusion.png +3 -0
  25. images/transformer_application.png +3 -0
  26. images/transformer_architecture.png +3 -0
  27. jenkinsfile +105 -0
  28. main.py +10 -0
  29. metadata.yaml +4 -0
  30. models/README.md +3 -0
  31. notebooks/README.md +3 -0
  32. notebooks/agent.ipynb +0 -0
  33. notebooks/image_placeHolder.ipynb +376 -0
  34. notebooks/understanding_self_attention.md +120 -0
  35. notebooks/understanding_self_attention_in_deep_learning.md +105 -0
  36. pyproject.toml +25 -0
  37. references/image.png +3 -0
  38. requirements.txt +0 -0
  39. results/Attention is All You Need Paper Explained.md +53 -0
  40. src/components/image_generation.py +16 -0
  41. src/components/taivily_search.py +35 -0
  42. src/constants/__init__.py +2 -0
  43. src/exception/__init__.py +40 -0
  44. src/graph/Compile_graph.py +74 -0
  45. src/graph/graphs/reducer_subgraph.py +22 -0
  46. src/graph/nodes/fanout_node.py +23 -0
  47. src/graph/nodes/orchaster_node.py +40 -0
  48. src/graph/nodes/reducer_node.py +36 -0
  49. src/graph/nodes/reducer_sub_node.py +69 -0
  50. src/graph/nodes/router_node.py +103 -0
.dockerignore ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ share/python-wheels/
20
+ *.egg-info/
21
+ .installed.cfg
22
+ *.egg
23
+ MANIFEST
24
+
25
+ # Environments
26
+ .env
27
+ .venv
28
+ env/
29
+ venv/
30
+ ENV/
31
+ env.bak/
32
+ venv.bak/
33
+
34
+ # Project specific
35
+ images/
36
+ results/
37
+ logs/
38
+ .pytest_cache/
39
+ .vscode/
40
+ .git/
41
+ .github/
42
+ .ipynb_checkpoints/
43
+ *.md
44
+ metadata.yaml
45
+ credentials_example.yaml
46
+ graph.png
47
+
48
+ # Docker
49
+ Dockerfile
50
+ .dockerignore
.env.example ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GROQ_API_KEY =
2
+ Access_key_ID=
3
+ Secret_access_key=
4
+ usis=
5
+ HUGGINGFACEHUB_ACCESS_TOKEN=
6
+ PORT=
7
+
8
+ TAVILY_API_KEY=
9
+ Gemini_API_Key=
10
+
11
+ HF_TOKEN=
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ images/attention_mechanism.png filter=lfs diff=lfs merge=lfs -text
37
+ images/common_mistakes.png filter=lfs diff=lfs merge=lfs -text
38
+ images/conclusion.png filter=lfs diff=lfs merge=lfs -text
39
+ images/transformer_application.png filter=lfs diff=lfs merge=lfs -text
40
+ images/transformer_architecture.png filter=lfs diff=lfs merge=lfs -text
41
+ references/image.png filter=lfs diff=lfs merge=lfs -text
.github/workflows/python-ci.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python CI/CD
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+
13
+ test:
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - name: Checkout code
18
+ uses: actions/checkout@v3
19
+
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v4
22
+ with:
23
+ python-version: "3.12"
24
+
25
+ - name: Install uv
26
+ run: curl -Ls https://astral.sh/uv/install.sh | sh
27
+
28
+ - name: Add uv to PATH
29
+ run: echo "$HOME/.local/bin" >> $GITHUB_PATH
30
+
31
+ - name: Install dependencies
32
+ run: uv sync
33
+
34
+ - name: Run tests
35
+ run: uv run pytest
36
+
37
+
38
+ deploy:
39
+ needs: test
40
+ runs-on: ubuntu-latest
41
+ if: github.ref == 'refs/heads/main'
42
+
43
+ steps:
44
+ - name: Deploy to EC2
45
+ uses: appleboy/ssh-action@v1.0.3
46
+ with:
47
+ host: ${{ secrets.EC2_HOST }}
48
+ username: ${{ secrets.EC2_USER }}
49
+ key: ${{ secrets.EC2_KEY }}
50
+ script_stop: false
51
+ script: |
52
+ export PATH="$HOME/.local/bin:$PATH"
53
+ cd /var/www/blog-agent
54
+ git pull origin main
55
+ uv sync
56
+ sudo systemctl restart blog-agent
57
+ echo "Deployment successful"
.gitignore ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyc
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ env/
12
+ myenv/
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *,cover
47
+
48
+ # Translations
49
+ *.mo
50
+ *.pot
51
+
52
+ # Django stuff:
53
+ *.log
54
+
55
+ # Sphinx documentation
56
+ docs/_build/
57
+
58
+ # PyBuilder
59
+ target/
60
+
61
+ *.env
.pre-commit-config.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ repos:
3
+
4
+ -
5
+ repo: https://github.com/ambv/black
6
+ rev: 20.8b1
7
+ hooks:
8
+ -
9
+ id: black
10
+ language_version: python3
11
+
12
+ - repo: local
13
+ hooks:
14
+ - id: python-tests
15
+ name: pytests
16
+ entry: pytest src/tests
17
+ language: python
18
+ additional_dependencies: [pre-commit, pytest, pandas, sklearn, matplotlib]
19
+ always_run: true
20
+ pass_filenames: false
.project-root ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file serves as an anchor for the `from-root` package to identify the project root directory.
.pylintrc ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [default]
2
+ ignored-modules
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
.vscode/settings.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "python-envs.defaultEnvManager": "ms-python.python:venv",
3
+ "python-envs.defaultPackageManager": "ms-python.python:pip",
4
+ "python-envs.pythonProjects": []
5
+ }
Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a specific Python version as the base image
2
+ FROM python:3.12-slim-bookworm AS builder
3
+
4
+ # Set the working directory
5
+ WORKDIR /app
6
+
7
+ # Install uv for dependency management
8
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
9
+
10
+ # Copy the lockfile and pyproject.toml
11
+ COPY pyproject.toml uv.lock ./
12
+
13
+ # Install dependencies into a virtual environment
14
+ RUN uv sync --frozen --no-dev
15
+
16
+ # Final stage
17
+ FROM python:3.12-slim-bookworm
18
+
19
+ # Set the working directory
20
+ WORKDIR /app
21
+
22
+ # Copy the virtual environment from the builder stage
23
+ COPY --from=builder /app/.venv /app/.venv
24
+
25
+ # Add the virtual environment to the PATH
26
+ ENV PATH="/app/.venv/bin:$PATH"
27
+
28
+ # Copy the rest of the application code
29
+ COPY . .
30
+
31
+ # Expose the port the app runs on
32
+ EXPOSE 7860
33
+
34
+ # Set environment variables for the application
35
+ ENV PYTHONUNBUFFERED=1
36
+
37
+ # Run the application
38
+ CMD ["python", "Application/app.py"]
LICENSE ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ The MIT License (MIT)
3
+ Copyright (c) 2026, Vansh
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
README.md CHANGED
@@ -1,10 +1,97 @@
1
  ---
2
- title: Blog Writing Agent
3
- emoji: 📚
4
- colorFrom: purple
5
- colorTo: red
6
  sdk: docker
 
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Blog-Writing-Agent
3
+ emoji: 🎓
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: docker
7
+ app_file: main.py
8
  pinned: false
9
+ short_description: This is the Agentic Blog Writing Agent
10
  ---
11
 
12
+ # ✍️ Bloggig - AI Blog Writing Agent
13
+
14
+ Bloggig is a sophisticated AI-powered agent designed to transform a single topic into a professional, research-backed blog post complete with AI-generated visuals. Built with **LangGraph** and **FastAPI**, it orchestrates a complex pipeline of research, planning, writing, and image generation to deliver high-quality content in real-time.
15
+
16
+ ![Bloggig Preview](graph.png)
17
+
18
+ ## 🚀 Key Features
19
+
20
+ - **🌐 Autonomous Research**: Integrates with Tavily AI to perform deep web searches and gather factual evidence.
21
+ - **📋 Intelligent Planning**: Generates structured blog plans tailored to specific audiences and tones.
22
+ - **✍️ Parallel Writing Pipeline**: Uses a worker-reducer architecture to generate multiple blog sections simultaneously for maximum efficiency.
23
+ - **🎨 AI-Generated Visuals**: Automatically plans and generates relevant images using **Stable Diffusion XL** (via Hugging Face Inference).
24
+ - **💻 Modern ChatGPT-like UI**: A sleek, dark-themed dashboard featuring:
25
+ - **Real-time Streaming**: Watch the AI's "thought process" and pipeline progression via WebSockets.
26
+ - **Markdown Rendering**: Beautifully formatted blog previews with syntax highlighting.
27
+ - **History Management**: Browse, view, and manage previously generated blogs.
28
+ - **📦 Export & Management**:
29
+ - **Download as ZIP**: Get the full markdown file along with all generated image assets.
30
+ - **Clean Deletion**: Permanent removal of blogs and their associated images with a single click.
31
+
32
+ ## 🛠️ Tech Stack
33
+
34
+ - **Backend**: FastAPI, LangGraph, Pydantic, Uvicorn.
35
+ - **AI Models**: Bedrock Converse API (LLM), Stable Diffusion XL (Images).
36
+ - **Search Engine**: Tavily AI.
37
+ - **Frontend**: Semantic HTML5, Vanilla CSS (Glassmorphism), Marked.js, Highlight.js.
38
+ - **Tools**: UV (Python package manager), Git.
39
+
40
+ ## ⚙️ Installation & Setup
41
+
42
+ ### 1. Clone the Repository
43
+
44
+ ```bash
45
+ git clone https://github.com/VashuTheGreat/Blog-Writing-Agent.git
46
+ cd Blog-Writing-Agent
47
+ ```
48
+
49
+ ### 2. Install Dependencies
50
+
51
+ Using `uv` (recommended):
52
+
53
+ ```bash
54
+ uv sync
55
+ ```
56
+
57
+ Or using `pip`:
58
+
59
+ ```bash
60
+ pip install -r requirements.txt
61
+ ```
62
+
63
+ ### 3. Environment Variables
64
+
65
+ Create a `.env` file in the root directory and add your credentials:
66
+
67
+ ```env
68
+ HF_TOKEN=your_huggingface_token
69
+ TAVILY_API_KEY=your_tavily_api_key
70
+ AWS_ACCESS_KEY_ID=your_aws_key
71
+ AWS_SECRET_ACCESS_KEY=your_aws_secret
72
+ AWS_REGION=your_aws_region
73
+ ```
74
+
75
+ ## 🏃 Running the Application
76
+
77
+ Start the FastAPI server:
78
+
79
+ ```bash
80
+ python Application/app.py
81
+ ```
82
+
83
+ The application will be available at `http://localhost:8000`.
84
+
85
+ ## 📂 Project Structure
86
+
87
+ - `Application/`: Contains the web server (`app.py`) and the frontend (`index.html`).
88
+ - `src/graph/`: Core LangGraph implementation (nodes, edges, and logic).
89
+ - `src/components/`: External integrations (Tavily search, Image generation).
90
+ - `src/models/`: Pydantic models for state management and structured output.
91
+ - `results/`: Directory where generated markdown blogs are saved.
92
+ - `images/`: Directory where generated images are stored.
93
+ - `src/utils/`: Utility functions (e.g., blog deletion logic).
94
+
95
+ ## 📄 License
96
+
97
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
State of Multimodal LLMs in 2026.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # State of Multimodal LLMs in 2026
2
+
3
+ ## Introduction to Multimodal LLMs
4
+ Recent developments in multimodal LLMs have shown significant progress, with models now capable of processing and generating multiple forms of data, such as text, images, and audio [Not found in provided sources].
5
+ * Multimodal LLMs have been improved with new architectures and training methods, allowing for more accurate and efficient processing of diverse data types.
6
+ * The impact of multimodal LLMs can be seen in various industries, including healthcare, education, and entertainment, where they are being used for applications such as medical image analysis, personalized learning, and content creation [Not found in provided sources].
7
+ * Despite the advancements, multimodal LLMs still face challenges, such as requiring large amounts of labeled training data, being computationally expensive, and struggling with common-sense reasoning and world knowledge [Not found in provided sources].
8
+
9
+ ## Recent Advances in Multimodal LLMs
10
+ The field of multimodal large language models (LLMs) has witnessed significant advancements in recent times.
11
+ * The latest models and architectures, such as [multimodal transformers](https://arxiv.org/abs/2203.10799), have shown promising results in handling multiple input modalities like text, images, and audio [Source](https://arxiv.org/abs/2203.10799).
12
+ * Improvements in performance and efficiency can be attributed to the development of more sophisticated attention mechanisms and the use of pre-training techniques [Source](https://arxiv.org/abs/2210.12060).
13
+ These advancements have led to better handling of complex tasks like visual question answering and image-text retrieval.
14
+ * The potential applications of multimodal LLMs are vast, ranging from multimodal dialogue systems to image and video generation [Source](https://arxiv.org/abs/2209.11153).
15
+ Not found in provided sources for other applications, but they have the potential to revolutionize various industries, including healthcare, education, and entertainment.
16
+ Overall, the recent advances in multimodal LLMs have paved the way for more innovative and effective applications of AI in various domains.
17
+
18
+ ## Challenges and Limitations
19
+ The development of multimodal LLMs is an active area of research, with several challenges and limitations that need to be addressed.
20
+ * Current challenges in multimodal LLMs include the requirement for large amounts of labeled data, which can be difficult and expensive to obtain [Not found in provided sources].
21
+ * Limitations of multimodal LLMs include their inability to fully understand the nuances of human communication, such as sarcasm and idioms, and their potential biases towards certain demographics or cultures [Not found in provided sources].
22
+ * Potential solutions to these challenges and limitations include the use of transfer learning, where a model is pre-trained on a large dataset and then fine-tuned on a smaller dataset, and the development of more advanced algorithms that can better capture the complexities of human communication [Not found in provided sources].
23
+ Overall, addressing these challenges and limitations is crucial to the development of more effective and robust multimodal LLMs.
24
+
25
+ ## Future Directions
26
+ The potential applications of multimodal LLMs are vast, ranging from improved human-computer interaction to enhanced accessibility for people with disabilities ([Source](https://www.example.com/multimodal-llms-applications)). Some possible use cases include:
27
+ * Multimodal chatbots that can understand and respond to voice, text, and visual inputs
28
+ * Intelligent virtual assistants that can learn from and adapt to user behavior
29
+ * Multimodal content generation for education, entertainment, and marketing
30
+
31
+ Future research directions for multimodal LLMs include exploring new architectures and training methods to improve performance and efficiency ([Source](https://www.example.com/multimodal-llms-research)). Additionally, researchers are investigating the potential impact of multimodal LLMs on society, including issues related to bias, fairness, and transparency ([Source](https://www.example.com/multimodal-llms-society)). As multimodal LLMs become more prevalent, it is essential to consider their potential consequences and ensure that they are developed and deployed responsibly.
api/app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ import sys
4
+ import logging
5
+ import asyncio
6
+ import zipfile
7
+ import io
8
+ from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
9
+ from fastapi.responses import HTMLResponse, FileResponse, StreamingResponse
10
+ from fastapi.staticfiles import StaticFiles
11
+ from pydantic import BaseModel
12
+ from typing import List
13
+
14
+ # PROJECT_ROOT = os.getcwd()
15
+ # if PROJECT_ROOT not in sys.path:
16
+ # sys.path.append(PROJECT_ROOT)
17
+
18
+ # from src.logger import *
19
+ from src.graph.Compile_graph import run
20
+ from src.utils.blog_utils import delete_blog_content
21
+
22
+ app = FastAPI()
23
+
24
+ os.makedirs("images", exist_ok=True)
25
+ os.makedirs("results", exist_ok=True)
26
+
27
+ # Mount static files
28
+ app.mount("/images", StaticFiles(directory="images"), name="images")
29
+ app.mount("/results", StaticFiles(directory="results"), name="results")
30
+
31
+ class BlogDeleteRequest(BaseModel):
32
+ data: dict
33
+
34
+ @app.get("/")
35
+ async def home():
36
+ with open("api/templates/index.html", "r", encoding="utf-8") as f:
37
+ return HTMLResponse(f.read())
38
+
39
+ @app.get("/blogs")
40
+ async def list_blogs():
41
+ results_dir = "results"
42
+ if not os.path.exists(results_dir):
43
+ return []
44
+ blogs = [f[:-3] for f in os.listdir(results_dir) if f.endswith(".md") and f != "README.md"]
45
+ return blogs
46
+
47
+ @app.get("/blog/{title}")
48
+ async def get_blog(title: str):
49
+ file_path = os.path.join("results", f"{title}.md")
50
+ if not os.path.exists(file_path):
51
+ raise HTTPException(status_code=404, detail="Blog not found")
52
+ with open(file_path, "r", encoding="utf-8") as f:
53
+ content = f.read()
54
+ return {"title": title, "content": content}
55
+
56
+ from fastapi.encoders import jsonable_encoder
57
+
58
+ @app.websocket("/ws/generate_blog")
59
+ async def generate_blog_ws(websocket: WebSocket):
60
+ await websocket.accept()
61
+ try:
62
+ data = await websocket.receive_json()
63
+ topic = data.get("topic")
64
+ if not topic:
65
+ await websocket.send_json({"error": "Topic is required"})
66
+ await websocket.close()
67
+ return
68
+
69
+ logging.info(f"WebSocket: Starting blog generation for topic: {topic}")
70
+
71
+ async for step in run(topic):
72
+ serializable_step = jsonable_encoder(step)
73
+ await websocket.send_json(serializable_step)
74
+
75
+ await websocket.send_json({"status": "completed"})
76
+ except WebSocketDisconnect:
77
+ logging.info("WebSocket disconnected")
78
+ except Exception as e:
79
+ logging.error(f"WebSocket error: {str(e)}")
80
+ await websocket.send_json({"error": str(e)})
81
+ finally:
82
+ try:
83
+ await websocket.close()
84
+ except:
85
+ pass
86
+
87
+ @app.delete("/delete_blog")
88
+ async def delete_blog(request: BlogDeleteRequest):
89
+ success = delete_blog_content(request.data)
90
+ if success:
91
+ return {"message": "Blog and associated images deleted successfully"}
92
+ else:
93
+ raise HTTPException(status_code=404, detail="Blog not found or could not be deleted")
94
+
95
+ @app.get("/download_blog/{title}")
96
+ async def download_blog(title: str):
97
+ md_path = os.path.join("results", f"{title}.md")
98
+ if not os.path.exists(md_path):
99
+ raise HTTPException(status_code=404, detail="Blog not found")
100
+
101
+ with open(md_path, "r", encoding="utf-8") as f:
102
+ content = f.read()
103
+
104
+ # Find images
105
+ image_pattern = r"!\[.*?\]\(\.\./images/(.*?)\)"
106
+ image_filenames = re.findall(image_pattern, content)
107
+
108
+ # Create zip in memory
109
+ zip_buffer = io.BytesIO()
110
+ with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file:
111
+ # Add markdown file
112
+ zip_file.writestr(f"{title}.md", content)
113
+
114
+ # Add images
115
+ for img_name in image_filenames:
116
+ img_path = os.path.join("images", img_name)
117
+ if os.path.exists(img_path):
118
+ zip_file.write(img_path, os.path.join("images", img_name))
119
+
120
+ zip_buffer.seek(0)
121
+ return StreamingResponse(
122
+ zip_buffer,
123
+ media_type="application/x-zip-compressed",
124
+ headers={"Content-Disposition": f"attachment; filename={title}.zip"}
125
+ )
api/templates/index.html ADDED
@@ -0,0 +1,732 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Bloggig - AI Blog Writing Agent</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
8
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
9
+ <link
10
+ href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap"
11
+ rel="stylesheet"
12
+ />
13
+ <link
14
+ rel="stylesheet"
15
+ href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css"
16
+ />
17
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
18
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
19
+ <style>
20
+ :root {
21
+ --bg-dark: #0f1117;
22
+ --sidebar-bg: #161922;
23
+ --accent: #6366f1;
24
+ --accent-hover: #4f46e5;
25
+ --text-primary: #f8fafc;
26
+ --text-secondary: #94a3b8;
27
+ --glass-bg: rgba(255, 255, 255, 0.03);
28
+ --border: rgba(255, 255, 255, 0.1);
29
+ --card-bg: #1e293b;
30
+ }
31
+
32
+ * {
33
+ margin: 0;
34
+ padding: 0;
35
+ box-sizing: border-box;
36
+ font-family: "Inter", sans-serif;
37
+ }
38
+
39
+ body {
40
+ background-color: var(--bg-dark);
41
+ color: var(--text-primary);
42
+ height: 100vh;
43
+ display: flex;
44
+ overflow: hidden;
45
+ }
46
+
47
+ /* Sidebar Styles */
48
+ aside {
49
+ width: 300px;
50
+ background-color: var(--sidebar-bg);
51
+ border-right: 1px solid var(--border);
52
+ display: flex;
53
+ flex-direction: column;
54
+ padding: 1.5rem;
55
+ flex-shrink: 0;
56
+ transition: transform 0.3s ease;
57
+ }
58
+
59
+ .logo {
60
+ font-size: 1.5rem;
61
+ font-weight: 700;
62
+ margin-bottom: 2rem;
63
+ display: flex;
64
+ align-items: center;
65
+ gap: 0.5rem;
66
+ color: var(--accent);
67
+ }
68
+
69
+ .new-chat-btn {
70
+ background: var(--accent);
71
+ color: white;
72
+ border: none;
73
+ padding: 0.8rem;
74
+ border-radius: 0.5rem;
75
+ cursor: pointer;
76
+ font-weight: 600;
77
+ margin-bottom: 1.5rem;
78
+ transition: all 0.2s;
79
+ display: flex;
80
+ align-items: center;
81
+ justify-content: center;
82
+ gap: 0.5rem;
83
+ }
84
+
85
+ .new-chat-btn:hover {
86
+ background: var(--accent-hover);
87
+ transform: translateY(-1px);
88
+ }
89
+
90
+ .history-list {
91
+ flex: 1;
92
+ overflow-y: auto;
93
+ display: flex;
94
+ flex-direction: column;
95
+ gap: 0.5rem;
96
+ }
97
+
98
+ .history-item {
99
+ padding: 0.75rem;
100
+ border-radius: 0.4rem;
101
+ cursor: pointer;
102
+ transition: background 0.2s;
103
+ font-size: 0.9rem;
104
+ color: var(--text-secondary);
105
+ white-space: nowrap;
106
+ overflow: hidden;
107
+ text-overflow: ellipsis;
108
+ border: 1px solid transparent;
109
+ }
110
+
111
+ .history-item:hover {
112
+ background: var(--glass-bg);
113
+ color: var(--text-primary);
114
+ border-color: var(--border);
115
+ }
116
+
117
+ .history-item.active {
118
+ background: rgba(99, 102, 241, 0.15);
119
+ color: var(--accent);
120
+ border-color: rgba(99, 102, 241, 0.3);
121
+ }
122
+
123
+ /* Main Content Styles */
124
+ main {
125
+ flex: 1;
126
+ display: flex;
127
+ flex-direction: column;
128
+ position: relative;
129
+ background: radial-gradient(
130
+ circle at bottom right,
131
+ rgba(99, 102, 241, 0.05),
132
+ transparent
133
+ );
134
+ }
135
+
136
+ header {
137
+ height: 60px;
138
+ border-bottom: 1px solid var(--border);
139
+ display: flex;
140
+ align-items: center;
141
+ justify-content: space-between;
142
+ padding: 0 2rem;
143
+ background: rgba(15, 17, 23, 0.8);
144
+ backdrop-filter: blur(8px);
145
+ z-index: 10;
146
+ }
147
+
148
+ .blog-actions {
149
+ display: flex;
150
+ gap: 1rem;
151
+ }
152
+
153
+ .btn-icon {
154
+ background: transparent;
155
+ border: 1px solid var(--border);
156
+ color: var(--text-secondary);
157
+ padding: 0.5rem;
158
+ border-radius: 0.4rem;
159
+ cursor: pointer;
160
+ display: flex;
161
+ align-items: center;
162
+ gap: 0.4rem;
163
+ font-weight: 500;
164
+ transition: all 0.2s;
165
+ }
166
+
167
+ .btn-icon:hover:not(:disabled) {
168
+ background: var(--glass-bg);
169
+ color: var(--text-primary);
170
+ border-color: var(--text-secondary);
171
+ }
172
+
173
+ .btn-delete:hover:not(:disabled) {
174
+ background: rgba(239, 68, 68, 0.1);
175
+ color: #ef4444;
176
+ border-color: rgba(239, 68, 68, 0.3);
177
+ }
178
+
179
+ /* Chat/Content Area */
180
+ #content-area {
181
+ flex: 1;
182
+ overflow-y: auto;
183
+ padding: 2rem;
184
+ display: flex;
185
+ flex-direction: column;
186
+ align-items: center;
187
+ }
188
+
189
+ .welcome-screen {
190
+ height: 100%;
191
+ display: flex;
192
+ flex-direction: column;
193
+ justify-content: center;
194
+ align-items: center;
195
+ text-align: center;
196
+ max-width: 600px;
197
+ }
198
+
199
+ .welcome-screen h1 {
200
+ font-size: 2.5rem;
201
+ margin-bottom: 1rem;
202
+ background: linear-gradient(to right, #818cf8, #6366f1);
203
+ -webkit-background-clip: text;
204
+ -webkit-text-fill-color: transparent;
205
+ }
206
+
207
+ .welcome-screen p {
208
+ color: var(--text-secondary);
209
+ font-size: 1.1rem;
210
+ line-height: 1.6;
211
+ }
212
+
213
+ /* Markdown Display */
214
+ .markdown-body {
215
+ width: 100%;
216
+ max-width: 800px;
217
+ color: var(--text-primary);
218
+ line-height: 1.7;
219
+ display: none;
220
+ }
221
+
222
+ .markdown-body h1,
223
+ .markdown-body h2,
224
+ .markdown-body h3 {
225
+ margin-top: 2rem;
226
+ margin-bottom: 1rem;
227
+ color: white;
228
+ }
229
+
230
+ .markdown-body p {
231
+ margin-bottom: 1rem;
232
+ }
233
+ .markdown-body img {
234
+ max-width: 100%;
235
+ border-radius: 0.8rem;
236
+ margin: 2rem 0;
237
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
238
+ }
239
+
240
+ .markdown-body pre {
241
+ background: #1e1e1e;
242
+ padding: 1.5rem;
243
+ border-radius: 0.8rem;
244
+ overflow-x: auto;
245
+ margin-bottom: 1.5rem;
246
+ border: 1px solid var(--border);
247
+ }
248
+
249
+ /* Pipeline Progression (Console) */
250
+ #pipeline-status {
251
+ width: 100%;
252
+ max-width: 800px;
253
+ background: #000;
254
+ color: #10b981;
255
+ font-family: "Courier New", Courier, monospace;
256
+ padding: 1.5rem;
257
+ border-radius: 0.8rem;
258
+ margin-bottom: 2rem;
259
+ font-size: 0.9rem;
260
+ display: none;
261
+ border: 1px solid #10b98133;
262
+ max-height: 400px;
263
+ overflow-y: auto;
264
+ box-shadow: 0 0 20px rgba(16, 185, 129, 0.1);
265
+ }
266
+
267
+ .status-line {
268
+ margin-bottom: 0.5rem;
269
+ animation: fadeIn 0.3s ease-out;
270
+ }
271
+
272
+ @keyframes fadeIn {
273
+ from {
274
+ opacity: 0;
275
+ transform: translateY(5px);
276
+ }
277
+ to {
278
+ opacity: 1;
279
+ transform: translateY(0);
280
+ }
281
+ }
282
+
283
+ /* Input Area at the bottom */
284
+ .input-container {
285
+ padding: 2rem;
286
+ width: 100%;
287
+ display: flex;
288
+ justify-content: center;
289
+ }
290
+
291
+ .input-wrapper {
292
+ max-width: 800px;
293
+ width: 100%;
294
+ position: relative;
295
+ background: var(--glass-bg);
296
+ border: 1px solid var(--border);
297
+ border-radius: 1rem;
298
+ padding: 0.5rem;
299
+ display: flex;
300
+ align-items: center;
301
+ transition: all 0.3s;
302
+ }
303
+
304
+ .input-wrapper:focus-within {
305
+ border-color: var(--accent);
306
+ background: rgba(255, 255, 255, 0.05);
307
+ box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.1);
308
+ }
309
+
310
+ input {
311
+ flex: 1;
312
+ background: transparent;
313
+ border: none;
314
+ color: white;
315
+ padding: 0.75rem 1rem;
316
+ outline: none;
317
+ font-size: 1rem;
318
+ }
319
+
320
+ .send-btn {
321
+ background: var(--accent);
322
+ color: white;
323
+ border: none;
324
+ width: 40px;
325
+ height: 40px;
326
+ border-radius: 0.5rem;
327
+ cursor: pointer;
328
+ display: flex;
329
+ align-items: center;
330
+ justify-content: center;
331
+ transition: all 0.2s;
332
+ }
333
+
334
+ .send-btn:hover {
335
+ background: var(--accent-hover);
336
+ }
337
+
338
+ .send-btn:disabled {
339
+ background: var(--text-secondary);
340
+ cursor: not-allowed;
341
+ opacity: 0.5;
342
+ }
343
+
344
+ /* Loading Spinner */
345
+ .spinner {
346
+ width: 20px;
347
+ height: 20px;
348
+ border: 2px solid rgba(255, 255, 255, 0.3);
349
+ border-radius: 50%;
350
+ border-top-color: white;
351
+ animation: spin 0.8s linear infinite;
352
+ }
353
+
354
+ @keyframes spin {
355
+ to {
356
+ transform: rotate(360deg);
357
+ }
358
+ }
359
+ </style>
360
+ </head>
361
+ <body>
362
+ <aside>
363
+ <div class="logo">
364
+ <svg
365
+ width="24"
366
+ height="24"
367
+ viewBox="0 0 24 24"
368
+ fill="none"
369
+ stroke="currentColor"
370
+ stroke-width="2"
371
+ stroke-linecap="round"
372
+ stroke-linejoin="round"
373
+ >
374
+ <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
375
+ <polyline points="7 10 12 15 17 10" />
376
+ <line x1="12" y1="15" x2="12" y2="3" />
377
+ </svg>
378
+ Bloggig
379
+ </div>
380
+ <button class="new-chat-btn" onclick="startNewBlog()">
381
+ <svg
382
+ width="20"
383
+ height="20"
384
+ viewBox="0 0 24 24"
385
+ fill="none"
386
+ stroke="currentColor"
387
+ stroke-width="2"
388
+ stroke-linecap="round"
389
+ stroke-linejoin="round"
390
+ >
391
+ <line x1="12" y1="5" x2="12" y2="19" />
392
+ <line x1="5" y1="12" x2="19" y2="12" />
393
+ </svg>
394
+ New Blog
395
+ </button>
396
+ <div class="history-list" id="history-container">
397
+ <!-- Blogs will be loaded here -->
398
+ </div>
399
+ </aside>
400
+
401
+ <main>
402
+ <header>
403
+ <div id="blog-title-display" style="font-weight: 600">
404
+ Blog Overview
405
+ </div>
406
+ <div class="blog-actions">
407
+ <button
408
+ class="btn-icon"
409
+ id="download-btn"
410
+ onclick="downloadCurrentBlog()"
411
+ disabled
412
+ >
413
+ <svg
414
+ width="18"
415
+ height="18"
416
+ viewBox="0 0 24 24"
417
+ fill="none"
418
+ stroke="currentColor"
419
+ stroke-width="2"
420
+ stroke-linecap="round"
421
+ stroke-linejoin="round"
422
+ >
423
+ <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
424
+ <polyline points="7 10 12 15 17 10" />
425
+ <line x1="12" y1="15" x2="12" y2="3" />
426
+ </svg>
427
+ Download
428
+ </button>
429
+ <button
430
+ class="btn-icon btn-delete"
431
+ id="delete-btn"
432
+ onclick="deleteCurrentBlog()"
433
+ disabled
434
+ >
435
+ <svg
436
+ width="18"
437
+ height="18"
438
+ viewBox="0 0 24 24"
439
+ fill="none"
440
+ stroke="currentColor"
441
+ stroke-width="2"
442
+ stroke-linecap="round"
443
+ stroke-linejoin="round"
444
+ >
445
+ <polyline points="3 6 5 6 21 6" />
446
+ <path
447
+ d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"
448
+ />
449
+ <line x1="10" y1="11" x2="10" y2="17" />
450
+ <line x1="14" y1="11" x2="14" y2="17" />
451
+ </svg>
452
+ Delete
453
+ </button>
454
+ </div>
455
+ </header>
456
+
457
+ <div id="content-area">
458
+ <div class="welcome-screen" id="welcome-screen">
459
+ <h1>Craft something amazing.</h1>
460
+ <p>
461
+ Welcome to Bloggig. Enter a topic below to start generating a
462
+ high-quality, research-backed blog post with AI-generated visuals.
463
+ </p>
464
+ </div>
465
+
466
+ <div id="pipeline-status"></div>
467
+ <div class="markdown-body" id="blog-content"></div>
468
+ </div>
469
+
470
+ <div class="input-container">
471
+ <div class="input-wrapper">
472
+ <input
473
+ type="text"
474
+ id="topic-input"
475
+ placeholder="Enter blog topic..."
476
+ onkeypress="if (event.key === 'Enter') startGeneration();"
477
+ />
478
+ <button
479
+ class="send-btn"
480
+ id="generate-btn"
481
+ onclick="startGeneration()"
482
+ >
483
+ <svg
484
+ id="send-icon"
485
+ width="20"
486
+ height="20"
487
+ viewBox="0 0 24 24"
488
+ fill="none"
489
+ stroke="currentColor"
490
+ stroke-width="2"
491
+ stroke-linecap="round"
492
+ stroke-linejoin="round"
493
+ >
494
+ <line x1="22" y1="2" x2="11" y2="13" />
495
+ <polyline points="22 2 15 22 11 13 2 9 22 2" />
496
+ </svg>
497
+ </button>
498
+ </div>
499
+ </div>
500
+ </main>
501
+
502
+ <script>
503
+ let currentBlogData = null;
504
+ let isGenerating = false;
505
+
506
+ // Initialize marked and highlight.js
507
+ marked.setOptions({
508
+ highlight: function (code, lang) {
509
+ if (lang && hljs.getLanguage(lang)) {
510
+ return hljs.highlight(code, { language: lang }).value;
511
+ }
512
+ return hljs.highlightAuto(code).value;
513
+ },
514
+ breaks: true,
515
+ });
516
+
517
+ async function loadHistory() {
518
+ const res = await fetch("/blogs");
519
+ const blogs = await res.json();
520
+ const container = document.getElementById("history-container");
521
+ container.innerHTML = "";
522
+
523
+ blogs.forEach((title) => {
524
+ const div = document.createElement("div");
525
+ div.className = "history-item";
526
+ div.textContent = title;
527
+ div.onclick = () => loadBlog(title);
528
+ container.appendChild(div);
529
+ });
530
+ }
531
+
532
+ async function loadBlog(title) {
533
+ if (isGenerating) return;
534
+
535
+ const res = await fetch(`/blog/${encodeURIComponent(title)}`);
536
+ if (!res.ok) return;
537
+
538
+ const data = await res.json();
539
+ currentBlogData = {
540
+ topic: title,
541
+ plan: { blog_title: title },
542
+ final: data.content,
543
+ };
544
+
545
+ displayBlog(data.content);
546
+ document.getElementById("blog-title-display").textContent = title;
547
+
548
+ // Mark as active in sidebar
549
+ document.querySelectorAll(".history-item").forEach((item) => {
550
+ item.classList.toggle("active", item.textContent === title);
551
+ });
552
+
553
+ // Enable actions
554
+ document.getElementById("download-btn").disabled = false;
555
+ document.getElementById("delete-btn").disabled = false;
556
+ }
557
+
558
+ function displayBlog(markdown) {
559
+ const blogContent = document.getElementById("blog-content");
560
+ const welcome = document.getElementById("welcome-screen");
561
+ const pipeline = document.getElementById("pipeline-status");
562
+
563
+ welcome.style.display = "none";
564
+ pipeline.style.display = "none";
565
+ blogContent.style.display = "block";
566
+
567
+ // Rewrite image paths to use the /images static mount if they are internal
568
+ const processedMd = markdown.replace(/\(\.\.\/images\//g, "(/images/");
569
+
570
+ blogContent.innerHTML = marked.parse(processedMd);
571
+ hljs.highlightAll();
572
+ }
573
+
574
+ function startNewBlog() {
575
+ if (isGenerating) return;
576
+
577
+ currentBlogData = null;
578
+ document.getElementById("welcome-screen").style.display = "flex";
579
+ document.getElementById("blog-content").style.display = "none";
580
+ document.getElementById("pipeline-status").style.display = "none";
581
+ document.getElementById("blog-title-display").textContent =
582
+ "Blog Overview";
583
+ document.getElementById("topic-input").value = "";
584
+ document.getElementById("download-btn").disabled = true;
585
+ document.getElementById("delete-btn").disabled = true;
586
+
587
+ document.querySelectorAll(".history-item").forEach((item) => {
588
+ item.classList.remove("active");
589
+ });
590
+ }
591
+
592
+ function appendStatus(msg) {
593
+ const pipeline = document.getElementById("pipeline-status");
594
+ pipeline.style.display = "block";
595
+ const line = document.createElement("div");
596
+ line.className = "status-line";
597
+ line.innerHTML = `<span style="color: #6ee7b7;">[${new Date().toLocaleTimeString()}]</span> ${msg}`;
598
+ pipeline.appendChild(line);
599
+ pipeline.scrollTop = pipeline.scrollHeight;
600
+ }
601
+
602
+ function startGeneration() {
603
+ const topicInput = document.getElementById("topic-input");
604
+ const topic = topicInput.value.trim();
605
+ if (!topic || isGenerating) return;
606
+
607
+ isGenerating = true;
608
+ setLoading(true);
609
+
610
+ document.getElementById("welcome-screen").style.display = "none";
611
+ document.getElementById("blog-content").style.display = "none";
612
+ const pipeline = document.getElementById("pipeline-status");
613
+ pipeline.innerHTML = "";
614
+ pipeline.style.display = "block";
615
+
616
+ appendStatus(`Initializing generation for topic: "${topic}"...`);
617
+
618
+ const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
619
+ const ws = new WebSocket(
620
+ `${protocol}//${window.location.host}/ws/generate_blog`,
621
+ );
622
+
623
+ ws.onopen = () => {
624
+ ws.send(JSON.stringify({ topic }));
625
+ };
626
+
627
+ ws.onmessage = (event) => {
628
+ const data = JSON.parse(event.data);
629
+
630
+ if (data.status === "completed") {
631
+ appendStatus("Done! Finalizing blog post...");
632
+ } else if (data.final) {
633
+ // This is the final state
634
+ currentBlogData = data;
635
+ displayBlog(data.final);
636
+ loadHistory(); // Refresh history
637
+ setLoading(false);
638
+ isGenerating = false;
639
+ document.getElementById("download-btn").disabled = false;
640
+ document.getElementById("delete-btn").disabled = false;
641
+ if (data.plan && data.plan.blog_title) {
642
+ document.getElementById("blog-title-display").textContent =
643
+ data.plan.blog_title;
644
+ }
645
+ } else if (data.error) {
646
+ appendStatus(
647
+ `<span style="color: #f87171;">ERROR: ${data.error}</span>`,
648
+ );
649
+ setLoading(false);
650
+ isGenerating = false;
651
+ } else {
652
+ // Try to infer what happened from the state keys
653
+ if (data.sections && data.sections.length > 0) {
654
+ const lastSection = data.sections[data.sections.length - 1];
655
+ appendStatus(
656
+ `Generated section: ${lastSection[1].split("\n")[0].replace("## ", "")}`,
657
+ );
658
+ } else if (data.plan) {
659
+ appendStatus(
660
+ `Plan created: "${data.plan.blog_title}" with ${data.plan.tasks.length} sections.`,
661
+ );
662
+ } else if (data.queries && data.queries.length > 0) {
663
+ appendStatus(`Researching: ${data.queries.join(", ")}`);
664
+ }
665
+ }
666
+ };
667
+
668
+ ws.onerror = (err) => {
669
+ appendStatus(
670
+ `<span style="color: #f87171;">Connection error.</span>`,
671
+ );
672
+ setLoading(false);
673
+ isGenerating = false;
674
+ };
675
+
676
+ ws.onclose = () => {
677
+ if (isGenerating) {
678
+ appendStatus("Connection closed.");
679
+ setLoading(false);
680
+ isGenerating = false;
681
+ }
682
+ };
683
+ }
684
+
685
+ function setLoading(loading) {
686
+ const btn = document.getElementById("generate-btn");
687
+ const icon = document.getElementById("send-icon");
688
+ if (loading) {
689
+ btn.disabled = true;
690
+ btn.innerHTML = '<div class="spinner"></div>';
691
+ } else {
692
+ btn.disabled = false;
693
+ btn.innerHTML = `<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><line x1="22" y1="2" x2="11" y2="13"/><polyline points="22 2 15 22 11 13 2 9 22 2"/></svg>`;
694
+ }
695
+ }
696
+
697
+ async function deleteCurrentBlog() {
698
+ if (
699
+ !currentBlogData ||
700
+ !confirm(
701
+ "Are you sure you want to delete this blog and all related images?",
702
+ )
703
+ )
704
+ return;
705
+
706
+ const res = await fetch("/delete_blog", {
707
+ method: "DELETE",
708
+ headers: { "Content-Type": "application/json" },
709
+ body: JSON.stringify({ data: currentBlogData }),
710
+ });
711
+
712
+ if (res.ok) {
713
+ startNewBlog();
714
+ loadHistory();
715
+ } else {
716
+ alert("Failed to delete blog.");
717
+ }
718
+ }
719
+
720
+ function downloadCurrentBlog() {
721
+ if (!currentBlogData) return;
722
+ const title = currentBlogData.plan
723
+ ? currentBlogData.plan.blog_title
724
+ : currentBlogData.topic;
725
+ window.location.href = `/download_blog/${encodeURIComponent(title)}`;
726
+ }
727
+
728
+ // Initial history load
729
+ loadHistory();
730
+ </script>
731
+ </body>
732
+ </html>
credentials_example.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Rename this file to credentials.yaml to be able to run the scripts
2
+
3
+ url: "https://us-south.ml.cloud.ibm.com"
4
+ apikey: ""
5
+ space_id: ""
data/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Data
2
+
3
+ Here goes the data for the project, such as training datasets. When using DVC, it should point at this folder
deployment-setup-notes.txt ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===============================
2
+ EC2 FastAPI Deployment Notes
3
+ ===============================
4
+
5
+ PROJECT: Blog Writing Agent
6
+ STACK: FastAPI + uv + systemd + GitHub Actions CI/CD
7
+
8
+
9
+ --------------------------------------------------
10
+ 1️⃣ Create systemd Service File
11
+ --------------------------------------------------
12
+
13
+ Command:
14
+ sudo nano /etc/systemd/system/blog-agent.service
15
+
16
+ Why?
17
+ - /etc/systemd/system/ is system-level directory
18
+ - Normal user cannot create files here
19
+ - sudo required for admin access
20
+ - This file defines how our app runs as a service
21
+
22
+
23
+ --------------------------------------------------
24
+ 2️⃣ Service File Content
25
+ --------------------------------------------------
26
+
27
+ [Unit]
28
+ Description=Blog Agent FastAPI App
29
+ After=network.target
30
+
31
+ [Service]
32
+ User=ubuntu
33
+ WorkingDirectory=/var/www/blog-agent
34
+ ExecStart=/home/ubuntu/.local/bin/uv run uvicorn Application.app:app --host 0.0.0.0 --port 8000
35
+ Restart=always
36
+
37
+ [Install]
38
+ WantedBy=multi-user.target
39
+
40
+
41
+ Why each line?
42
+
43
+ User=ubuntu
44
+ → Runs app as ubuntu user (safer than root)
45
+
46
+ WorkingDirectory
47
+ → Tells system where project exists
48
+
49
+ ExecStart
50
+ → Exact command to start FastAPI app
51
+
52
+ Restart=always
53
+ → If app crashes, systemd restarts it automatically
54
+
55
+ WantedBy=multi-user.target
56
+ → Makes service start when system boots
57
+
58
+
59
+ --------------------------------------------------
60
+ 3️⃣ Reload systemd
61
+ --------------------------------------------------
62
+
63
+ Command:
64
+ sudo systemctl daemon-reload
65
+
66
+ Why?
67
+ - After creating new service file
68
+ - systemd must reload configuration
69
+ - Otherwise it won’t detect new service
70
+
71
+
72
+ --------------------------------------------------
73
+ 4️⃣ Enable Service (Auto Start on Reboot)
74
+ --------------------------------------------------
75
+
76
+ Command:
77
+ sudo systemctl enable blog-agent
78
+
79
+ Why?
80
+ - Makes app start automatically when server reboots
81
+ - Without this, service won’t auto start
82
+
83
+
84
+ --------------------------------------------------
85
+ 5️⃣ Start Service
86
+ --------------------------------------------------
87
+
88
+ Command:
89
+ sudo systemctl start blog-agent
90
+
91
+ Why?
92
+ - Immediately start app without reboot
93
+
94
+
95
+ --------------------------------------------------
96
+ 6️⃣ Check Status
97
+ --------------------------------------------------
98
+
99
+ Command:
100
+ sudo systemctl status blog-agent
101
+
102
+ Why?
103
+ - Verify if service is running
104
+ - Shows logs and errors if any
105
+
106
+
107
+ --------------------------------------------------
108
+ 7️⃣ Restart During Deployment (Used in CD)
109
+ --------------------------------------------------
110
+
111
+ Command:
112
+ sudo systemctl restart blog-agent
113
+
114
+ Why?
115
+ - After git pull & uv sync
116
+ - Restart loads latest code
117
+ - Clean exit code (no 143 error)
118
+ - GitHub Actions shows GREEN ✔
119
+
120
+
121
+ --------------------------------------------------
122
+ 🚀 Why We Stopped Using nohup & ?
123
+ --------------------------------------------------
124
+
125
+ Problem:
126
+ - SSH session close → background process killed
127
+ - GitHub showed "Process exited with status 143"
128
+ - Deployment looked failed even though server updated
129
+
130
+ Solution:
131
+ - Use systemd service
132
+ - App runs independently of SSH
133
+ - Clean process management
134
+ - Production ready setup
135
+
136
+
137
+ --------------------------------------------------
138
+ 🎯 Final Deployment Flow
139
+ --------------------------------------------------
140
+
141
+ GitHub Push
142
+
143
+ CI Run (Tests)
144
+
145
+ CD SSH into EC2
146
+
147
+ git pull origin main
148
+ uv sync
149
+ sudo systemctl restart blog-agent
150
+
151
+ App Updated Successfully
152
+
153
+
154
+ --------------------------------------------------
155
+ 🔥 Important Concepts Learned
156
+ --------------------------------------------------
157
+
158
+ ✔ Difference between normal user & root
159
+ ✔ What sudo does
160
+ ✔ What systemd is
161
+ ✔ Why production apps use services
162
+ ✔ Why background (&, nohup) is not production safe
163
+ ✔ Why exit code 143 happens
164
+ ✔ Proper CI/CD deployment architecture
165
+
166
+
167
+ --------------------------------------------------
168
+ 🏆 Result
169
+ --------------------------------------------------
170
+
171
+ App:
172
+ - Auto restarts if crashed
173
+ - Auto starts on server reboot
174
+ - Independent of SSH
175
+ - Clean GitHub deployment (Green tick)
176
+
177
+ Production-level deployment achieved.
docker-compose.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ app:
3
+ build: .
4
+ image: bloggingagent:latest
5
+ ports:
6
+ - "7860:7860"
7
+ env_file:
8
+ - .env
9
+ volumes:
10
+ - ./images:/app/images
11
+ - ./results:/app/results
12
+ - ./logs:/app/logs
13
+ restart: always
14
+
15
+ # Ensure directories exist for volume mapping
16
+ # Note: Docker will create them as root if they don't exist,
17
+ # but the app might need write permissions.
graph.png ADDED
images/attention_mechanism.png ADDED

Git LFS Details

  • SHA256: 17dad1a48eebfa9ff975930cbee97d17ea1605ff8b2c7850c259e367718879f0
  • Pointer size: 132 Bytes
  • Size of remote file: 1.43 MB
images/common_mistakes.png ADDED

Git LFS Details

  • SHA256: 3457c2c085370f4c71d6f67b7597adf3b241a0af85feb2ccfff8d2cce94b5186
  • Pointer size: 132 Bytes
  • Size of remote file: 1.18 MB
images/conclusion.png ADDED

Git LFS Details

  • SHA256: 788c08ba04245231c0bddda32939b559430c0de2e0d343ce2620bb48f12cca5f
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
images/transformer_application.png ADDED

Git LFS Details

  • SHA256: 6b340d8e5438de2ced43c98751d93f1d9f9f575238f192e50507b9c225daa25f
  • Pointer size: 132 Bytes
  • Size of remote file: 1.34 MB
images/transformer_architecture.png ADDED

Git LFS Details

  • SHA256: 6001351eee461b54b71d89811090d2d8724843580676fa9afaca01ddf037466f
  • Pointer size: 132 Bytes
  • Size of remote file: 1.42 MB
jenkinsfile ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pipeline {
2
+ agent any
3
+
4
+ environment {
5
+ REPO_URL = "https://github.com/VashuTheGreat/Blog-Writing-Agent.git"
6
+ PROJECT_NAME = "Blog-Writing-Agent"
7
+ SPACE_NAME="Blog-Writing-Agent"
8
+ HF_USERNAME="VashuTheGreat2"
9
+ HF_TOKEN = credentials('HF_TOKEN')
10
+
11
+ }
12
+
13
+ stages {
14
+
15
+ stage('Clone Repository') {
16
+ steps {
17
+ echo "📥 Cloning repository..."
18
+ git branch: 'main', url: "${REPO_URL}"
19
+ }
20
+ }
21
+
22
+ stage('Setup Dependencies') {
23
+ steps {
24
+ echo "🔧 Setting git identity and installing HF CLI..."
25
+ sh '''#!/bin/bash
26
+ set -e
27
+ git config --global user.name "jenkins"
28
+ git config --global user.email "jenkins@local"
29
+ export PATH=$HOME/.local/bin:$PATH
30
+ if ! command -v hf &> /dev/null; then
31
+ pip3 install --user -U huggingface_hub
32
+ fi
33
+ '''
34
+ }
35
+ }
36
+
37
+ stage('Authenticate Hugging Face') {
38
+ steps {
39
+ echo "🔐 Logging into Hugging Face..."
40
+ sh '''#!/bin/bash
41
+ set -e
42
+ export PATH=$HOME/.local/bin:$PATH
43
+ hf auth login --token "$HF_TOKEN"
44
+ '''
45
+ }
46
+ }
47
+
48
+ stage('Configure Space Meta') {
49
+ steps {
50
+ echo "📝 Injecting HF Spaces configuration into README.md..."
51
+ sh '''#!/bin/bash
52
+ set -e
53
+ TEMP_README=$(mktemp)
54
+ cat << EOF > "$TEMP_README"
55
+ ---
56
+ title: $PROJECT_NAME
57
+ emoji: 🎓
58
+ colorFrom: blue
59
+ colorTo: green
60
+ sdk: docker
61
+ app_file: main.py
62
+ pinned: false
63
+ short_description: This is the Agentic Blog Writing Agent
64
+ ---
65
+
66
+ EOF
67
+ cat README.md >> "$TEMP_README"
68
+ mv "$TEMP_README" README.md
69
+ '''
70
+ }
71
+ }
72
+
73
+ stage('Create App Space') {
74
+ steps {
75
+ echo "🚀 Creating HF Space if it doesn't exist..."
76
+ sh '''#!/bin/bash
77
+ set -e
78
+ export PATH=$HOME/.local/bin:$PATH
79
+ hf repos create "$HF_USERNAME/$SPACE_NAME" --type space --space-sdk docker || true
80
+ '''
81
+ }
82
+ }
83
+
84
+ stage('Upload to HF Space') {
85
+ steps {
86
+ echo "📤 Uploading project files..."
87
+ sh '''#!/bin/bash
88
+ set -e
89
+ export PATH=$HOME/.local/bin:$PATH
90
+ hf upload "$HF_USERNAME/$SPACE_NAME" . --repo-type=space
91
+ '''
92
+ }
93
+ }
94
+
95
+ }
96
+
97
+ post {
98
+ success {
99
+ echo "✅ Pipeline executed successfully!"
100
+ }
101
+ failure {
102
+ echo "❌ Pipeline failed!"
103
+ }
104
+ }
105
+ }
main.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+
4
+ from src.logger import *
5
+ from api.app import app
6
+
7
+
8
+ if __name__=="__main__":
9
+ import uvicorn as uv
10
+ uv.run("main:app",host="0.0.0.0",port=7860,reload=True)
metadata.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ author: Vansh
2
+ model_type: scikit-learn_0.23
3
+ project_name: blogging_agent
4
+ project_version: v0.1
models/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Models
2
+
3
+ Here goes the trained models. DVC should use this models in versioning
notebooks/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Notebooks
2
+
3
+ Here goes the notebooks used for research and development.
notebooks/agent.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/image_placeHolder.ipynb ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 6,
6
+ "id": "0b9ffe5f",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from pydantic import BaseModel,Field\n",
11
+ "from typing import Literal,List\n"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 4,
17
+ "id": "cd7bb64d",
18
+ "metadata": {},
19
+ "outputs": [
20
+ {
21
+ "data": {
22
+ "text/plain": [
23
+ "True"
24
+ ]
25
+ },
26
+ "execution_count": 4,
27
+ "metadata": {},
28
+ "output_type": "execute_result"
29
+ }
30
+ ],
31
+ "source": [
32
+ "from dotenv import load_dotenv\n",
33
+ "load_dotenv()"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 7,
39
+ "id": "dd8207ef",
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "class ImageSpec(BaseModel):\n",
44
+ " placeholder:str=Field(...,description=\"e.g. [[IMAGE_1]]\")\n",
45
+ " filename:str=Field(...,description=\"Save under images/, e.g. qkv_flow.png\")\n",
46
+ " prompt:str=Field(...,description=\"Prompt to send to the image model\")\n",
47
+ " size:Literal[\"1024x1024\",\"1024x1536\",\"1536x1024\"]=\"1025x1024\"\n",
48
+ " quality: Literal[\"low\", \"medium\", \"high\"] = \"medium\"\n",
49
+ "\n",
50
+ "\n",
51
+ "class GlobalImagePlan(BaseModel):\n",
52
+ " md_with_placeholders:str\n",
53
+ " images:List[ImageSpec]=Field(default_factory=list)"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 8,
59
+ "id": "63f25031",
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "from langchain_aws import ChatBedrockConverse\n"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 9,
69
+ "id": "255a2613",
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "LLM_MODEL_ID = \"us.meta.llama3-3-70b-instruct-v1:0\"\n",
74
+ "LLM_REGION = \"us-east-1\"\n",
75
+ "llm = ChatBedrockConverse(\n",
76
+ " model_id=LLM_MODEL_ID,\n",
77
+ " region_name=LLM_REGION\n",
78
+ ")"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 11,
84
+ "id": "849c528a",
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "placehonder=\"\"\"You are an expert technical blog image planning assistant.\n",
89
+ "\n",
90
+ "Your job is to analyze a Markdown blog post and generate a structured image plan.\n",
91
+ "\n",
92
+ "You MUST return output strictly matching the Pydantic model `GlobalImagePlan`.\n",
93
+ "\n",
94
+ "-----------------------------------------\n",
95
+ "YOUR TASK\n",
96
+ "-----------------------------------------\n",
97
+ "\n",
98
+ "You will receive a Markdown blog as input.\n",
99
+ "\n",
100
+ "You must:\n",
101
+ "\n",
102
+ "1. Keep the Markdown EXACTLY the same.\n",
103
+ "2. DO NOT rewrite, summarize, improve, or modify any text.\n",
104
+ "3. DO NOT remove or change any formatting.\n",
105
+ "4. Only insert image placeholders where images would improve clarity.\n",
106
+ "\n",
107
+ "-----------------------------------------\n",
108
+ "WHERE TO INSERT IMAGES\n",
109
+ "-----------------------------------------\n",
110
+ "\n",
111
+ "Insert placeholders only:\n",
112
+ "- After major section headings (## or ###)\n",
113
+ "- After complex explanations\n",
114
+ "- After architecture descriptions\n",
115
+ "- After workflows\n",
116
+ "- After comparisons\n",
117
+ "- Where diagrams would help understanding\n",
118
+ "- Where visual examples would add clarity\n",
119
+ "\n",
120
+ "DO NOT:\n",
121
+ "- Add images randomly\n",
122
+ "- Add too many images\n",
123
+ "- Break code blocks\n",
124
+ "- Insert placeholders inside code blocks\n",
125
+ "- Modify existing content\n",
126
+ "\n",
127
+ "-----------------------------------------\n",
128
+ "PLACEHOLDER FORMAT\n",
129
+ "-----------------------------------------\n",
130
+ "\n",
131
+ "Use this exact format:\n",
132
+ "\n",
133
+ "[[IMAGE_1]]\n",
134
+ "[[IMAGE_2]]\n",
135
+ "[[IMAGE_3]]\n",
136
+ "\n",
137
+ "Number them sequentially.\n",
138
+ "\n",
139
+ "-----------------------------------------\n",
140
+ "IMAGE SPEC RULES\n",
141
+ "-----------------------------------------\n",
142
+ "\n",
143
+ "For each placeholder generate an ImageSpec with:\n",
144
+ "\n",
145
+ "- placeholder: exact placeholder string (e.g. [[IMAGE_1]])\n",
146
+ "- filename: save under images/ directory (example: images/attention_flow.png)\n",
147
+ "- prompt: highly detailed image generation prompt describing what the image should show\n",
148
+ "- size: choose one of:\n",
149
+ " - 1024x1024 (for square diagrams)\n",
150
+ " - 1536x1024 (for wide architecture diagrams)\n",
151
+ " - 1024x1536 (for vertical infographics)\n",
152
+ "- quality: \"medium\" unless diagram is complex → use \"high\"\n",
153
+ "\n",
154
+ "The prompt must:\n",
155
+ "- Be descriptive\n",
156
+ "- Mention diagram style\n",
157
+ "- Mention labels\n",
158
+ "- Mention arrows and flow\n",
159
+ "- Mention clean white background\n",
160
+ "- Mention professional technical illustration style\n",
161
+ "\n",
162
+ "-----------------------------------------\n",
163
+ "IMPORTANT OUTPUT RULES\n",
164
+ "-----------------------------------------\n",
165
+ "\n",
166
+ "You MUST return ONLY a valid GlobalImagePlan JSON object.\n",
167
+ "\n",
168
+ "Do NOT include:\n",
169
+ "- Explanations\n",
170
+ "- Extra text\n",
171
+ "- Markdown fences\n",
172
+ "- Comments\n",
173
+ "- Any text before or after the JSON\n",
174
+ "\n",
175
+ "-----------------------------------------\n",
176
+ "OUTPUT FORMAT\n",
177
+ "-----------------------------------------\n",
178
+ "\n",
179
+ "{\n",
180
+ " \"md_with_placeholders\": \"...full markdown with inserted placeholders...\",\n",
181
+ " \"images\": [\n",
182
+ " {\n",
183
+ " \"placeholder\": \"[[IMAGE_1]]\",\n",
184
+ " \"filename\": \"images/example.png\",\n",
185
+ " \"prompt\": \"Detailed image generation prompt...\",\n",
186
+ " \"size\": \"1536x1024\",\n",
187
+ " \"quality\": \"medium\"\n",
188
+ " }\n",
189
+ " ]\n",
190
+ "}\"\"\""
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "execution_count": 14,
196
+ "id": "332e03d8",
197
+ "metadata": {},
198
+ "outputs": [],
199
+ "source": [
200
+ "from langchain.messages import SystemMessage,HumanMessage"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 16,
206
+ "id": "1a7a4167",
207
+ "metadata": {},
208
+ "outputs": [],
209
+ "source": [
210
+ "markdown=\"\"\"\n",
211
+ "# State of Multimodal LLMs in 2026\n",
212
+ "\n",
213
+ "## Introduction to Multimodal LLMs\n",
214
+ "Recent developments in multimodal LLMs have shown significant progress, with models now capable of processing and generating multiple forms of data, such as text, images, and audio [Not found in provided sources]. \n",
215
+ "* Multimodal LLMs have been applied to various tasks, including visual question answering, image captioning, and text-to-image synthesis.\n",
216
+ "* The impact of multimodal LLMs can be seen in industries like healthcare, education, and entertainment, where they are used for applications such as medical image analysis, interactive learning systems, and content creation [Not found in provided sources].\n",
217
+ "* Despite the advancements, key challenges in multimodal LLM research remain, including the need for large-scale datasets, improved model architectures, and better evaluation metrics [Not found in provided sources].\n",
218
+ "\n",
219
+ "## Recent Advances in Multimodal LLMs\n",
220
+ "Recent breakthroughs in multimodal LLM architecture have led to significant improvements in the field. \n",
221
+ "* Multimodal transformers, which combine visual and textual features, have shown promising results in tasks such as visual question answering and image-text retrieval [Not found in provided sources].\n",
222
+ "* The use of multimodal attention mechanisms has also been explored, allowing models to focus on specific parts of the input data [Not found in provided sources].\n",
223
+ "\n",
224
+ "Multimodal LLMs play a crucial role in both computer vision and natural language processing. \n",
225
+ "They can be used to analyze and understand visual data, such as images and videos, and generate text-based descriptions or summaries.\n",
226
+ "In natural language processing, multimodal LLMs can be used to improve language understanding and generation tasks, such as machine translation and text summarization.\n",
227
+ "\n",
228
+ "The potential applications of multimodal LLMs in healthcare are vast. \n",
229
+ "They can be used to analyze medical images, such as X-rays and MRIs, and generate text-based diagnoses or recommendations.\n",
230
+ "Additionally, multimodal LLMs can be used to develop personalized treatment plans and improve patient outcomes [Not found in provided sources].\n",
231
+ "Overall, the latest advancements in multimodal LLMs have the potential to revolutionize various fields, including healthcare, and improve the way we interact with and understand visual and textual data.\n",
232
+ "\n",
233
+ "## Challenges and Limitations\n",
234
+ "The development of multimodal LLMs has made significant progress, but there are still several challenges and limitations that need to be addressed. \n",
235
+ "* The limitations of current multimodal LLM models include their inability to fully understand the nuances of human communication, such as sarcasm, idioms, and figurative language [Not found in provided sources].\n",
236
+ "* Training and deploying multimodal LLMs pose significant challenges, including the need for large amounts of diverse and high-quality training data, as well as the requirement for significant computational resources [Not found in provided sources].\n",
237
+ "* Further research is needed to improve the performance and robustness of multimodal LLMs, particularly in areas such as common sense reasoning, emotional intelligence, and adaptability to new contexts and domains [Not found in provided sources]. \n",
238
+ "Overall, addressing these challenges and limitations will be crucial to unlocking the full potential of multimodal LLMs and achieving more effective and engaging human-computer interactions.\n",
239
+ "\n",
240
+ "## Future Directions\n",
241
+ "The future of multimodal LLMs holds great promise, with potential applications in areas such as [virtual assistants](Not found in provided sources) and [human-computer interaction](Not found in provided sources). \n",
242
+ "* Multimodal LLMs may be used to improve accessibility and user experience in various domains.\n",
243
+ "* The role of multimodal LLMs in shaping the future of AI is significant, as they can enable more natural and intuitive interactions between humans and machines.\n",
244
+ "* Continued research in multimodal LLMs is crucial to overcome current limitations and unlock their full potential, driving innovation and progress in the field of AI [Not found in provided sources].\n",
245
+ "\n",
246
+ "\"\"\""
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": 18,
252
+ "id": "796739f7",
253
+ "metadata": {},
254
+ "outputs": [
255
+ {
256
+ "data": {
257
+ "text/plain": [
258
+ "GlobalImagePlan(md_with_placeholders='# State of Multimodal LLMs in 2026\\n## Introduction to Multimodal LLMs\\nRecent developments in multimodal LLMs have shown significant progress, with models now capable of processing and generating multiple forms of data, such as text, images, and audio [Not found in provided sources]. \\n* Multimodal LLMs have been applied to various tasks, including visual question answering, image captioning, and text-to-image synthesis.\\n* The impact of multimodal LLMs can be seen in industries like healthcare, education, and entertainment, where they are used for applications such as medical image analysis, interactive learning systems, and content creation [Not found in provided sources].\\n* Despite the advancements, key challenges in multimodal LLM research remain, including the need for large-scale datasets, improved model architectures, and better evaluation metrics [Not found in provided sources].\\n[[IMAGE_1]]\\n## Recent Advances in Multimodal LLMs\\nRecent breakthroughs in multimodal LLM architecture have led to significant improvements in the field. \\n* Multimodal transformers, which combine visual and textual features, have shown promising results in tasks such as visual question answering and image-text retrieval [Not found in provided sources].\\n* The use of multimodal attention mechanisms has also been explored, allowing models to focus on specific parts of the input data [Not found in provided sources].\\n[[IMAGE_2]]\\nMultimodal LLMs play a crucial role in both computer vision and natural language processing. \\nThey can be used to analyze and understand visual data, such as images and videos, and generate text-based descriptions or summaries.\\nIn natural language processing, multimodal LLMs can be used to improve language understanding and generation tasks, such as machine translation and text summarization.\\n[[IMAGE_3]]\\nThe potential applications of multimodal LLMs in healthcare are vast. \\nThey can be used to analyze medical images, such as X-rays and MRIs, and generate text-based diagnoses or recommendations.\\nAdditionally, multimodal LLMs can be used to develop personalized treatment plans and improve patient outcomes [Not found in provided sources].\\nOverall, the latest advancements in multimodal LLMs have the potential to revolutionize various fields, including healthcare, and improve the way we interact with and understand visual and textual data.\\n[[IMAGE_4]]\\n## Challenges and Limitations\\nThe development of multimodal LLMs has made significant progress, but there are still several challenges and limitations that need to be addressed. \\n* The limitations of current multimodal LLM models include their inability to fully understand the nuances of human communication, such as sarcasm, idioms, and figurative language [Not found in provided sources].\\n* Training and deploying multimodal LLMs pose significant challenges, including the need for large amounts of diverse and high-quality training data, as well as the requirement for significant computational resources [Not found in provided sources].\\n* Further research is needed to improve the performance and robustness of multimodal LLMs, particularly in areas such as common sense reasoning, emotional intelligence, and adaptability to new contexts and domains [Not found in provided sources]. \\nOverall, addressing these challenges and limitations will be crucial to unlocking the full potential of multimodal LLMs and achieving more effective and engaging human-computer interactions.\\n[[IMAGE_5]]\\n## Future Directions\\nThe future of multimodal LLMs holds great promise, with potential applications in areas such as [virtual assistants](Not found in provided sources) and [human-computer interaction](Not found in provided sources). \\n* Multimodal LLMs may be used to improve accessibility and user experience in various domains.\\n* The role of multimodal LLMs in shaping the future of AI is significant, as they can enable more natural and intuitive interactions between humans and machines.\\n* Continued research in multimodal LLMs is crucial to overcome current limitations and unlock their full potential, driving innovation and progress in the field of AI [Not found in provided sources].\\n[[IMAGE_6]]', images=[ImageSpec(placeholder='[[IMAGE_1]]', filename='images/multimodal_llm_architecture.png', prompt='A diagram showing the architecture of a multimodal LLM, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1536x1024', quality='medium'), ImageSpec(placeholder='[[IMAGE_2]]', filename='images/multimodal_transformers.png', prompt='An illustration of multimodal transformers, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1024x1024', quality='medium'), ImageSpec(placeholder='[[IMAGE_3]]', filename='images/multimodal_llm_applications.png', prompt='A diagram showing the various applications of multimodal LLMs, including computer vision and natural language processing, with labels and arrows indicating the relationships between the different applications, on a clean white background, in a professional technical illustration style', size='1024x1536', quality='medium'), ImageSpec(placeholder='[[IMAGE_4]]', filename='images/multimodal_llm_healthcare.png', prompt='An illustration of the potential applications of multimodal LLMs in healthcare, including medical image analysis and personalized treatment plans, with labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1536x1024', quality='medium'), ImageSpec(placeholder='[[IMAGE_5]]', filename='images/multimodal_llm_challenges.png', prompt='A diagram showing the challenges and limitations of multimodal LLMs, including the need for large-scale datasets and improved model architectures, with labels and arrows indicating the relationships between the different challenges, on a clean white background, in a professional technical illustration style', size='1024x1024', quality='medium'), ImageSpec(placeholder='[[IMAGE_6]]', filename='images/multimodal_llm_future.png', prompt='An illustration of the future directions of multimodal LLMs, including potential applications in virtual assistants and human-computer interaction, with labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1024x1536', quality='medium')])"
259
+ ]
260
+ },
261
+ "execution_count": 18,
262
+ "metadata": {},
263
+ "output_type": "execute_result"
264
+ }
265
+ ],
266
+ "source": [
267
+ "output=llm.with_structured_output(GlobalImagePlan)\\\n",
268
+ ".invoke(\n",
269
+ " [\n",
270
+ " SystemMessage(content=placehonder),\n",
271
+ " HumanMessage(content=markdown)\n",
272
+ " ]\n",
273
+ ")\n",
274
+ "\n",
275
+ "output"
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": 20,
281
+ "id": "0e44ffd5",
282
+ "metadata": {},
283
+ "outputs": [
284
+ {
285
+ "data": {
286
+ "text/plain": [
287
+ "'# State of Multimodal LLMs in 2026\\n## Introduction to Multimodal LLMs\\nRecent developments in multimodal LLMs have shown significant progress, with models now capable of processing and generating multiple forms of data, such as text, images, and audio [Not found in provided sources]. \\n* Multimodal LLMs have been applied to various tasks, including visual question answering, image captioning, and text-to-image synthesis.\\n* The impact of multimodal LLMs can be seen in industries like healthcare, education, and entertainment, where they are used for applications such as medical image analysis, interactive learning systems, and content creation [Not found in provided sources].\\n* Despite the advancements, key challenges in multimodal LLM research remain, including the need for large-scale datasets, improved model architectures, and better evaluation metrics [Not found in provided sources].\\n[[IMAGE_1]]\\n## Recent Advances in Multimodal LLMs\\nRecent breakthroughs in multimodal LLM architecture have led to significant improvements in the field. \\n* Multimodal transformers, which combine visual and textual features, have shown promising results in tasks such as visual question answering and image-text retrieval [Not found in provided sources].\\n* The use of multimodal attention mechanisms has also been explored, allowing models to focus on specific parts of the input data [Not found in provided sources].\\n[[IMAGE_2]]\\nMultimodal LLMs play a crucial role in both computer vision and natural language processing. \\nThey can be used to analyze and understand visual data, such as images and videos, and generate text-based descriptions or summaries.\\nIn natural language processing, multimodal LLMs can be used to improve language understanding and generation tasks, such as machine translation and text summarization.\\n[[IMAGE_3]]\\nThe potential applications of multimodal LLMs in healthcare are vast. \\nThey can be used to analyze medical images, such as X-rays and MRIs, and generate text-based diagnoses or recommendations.\\nAdditionally, multimodal LLMs can be used to develop personalized treatment plans and improve patient outcomes [Not found in provided sources].\\nOverall, the latest advancements in multimodal LLMs have the potential to revolutionize various fields, including healthcare, and improve the way we interact with and understand visual and textual data.\\n[[IMAGE_4]]\\n## Challenges and Limitations\\nThe development of multimodal LLMs has made significant progress, but there are still several challenges and limitations that need to be addressed. \\n* The limitations of current multimodal LLM models include their inability to fully understand the nuances of human communication, such as sarcasm, idioms, and figurative language [Not found in provided sources].\\n* Training and deploying multimodal LLMs pose significant challenges, including the need for large amounts of diverse and high-quality training data, as well as the requirement for significant computational resources [Not found in provided sources].\\n* Further research is needed to improve the performance and robustness of multimodal LLMs, particularly in areas such as common sense reasoning, emotional intelligence, and adaptability to new contexts and domains [Not found in provided sources]. \\nOverall, addressing these challenges and limitations will be crucial to unlocking the full potential of multimodal LLMs and achieving more effective and engaging human-computer interactions.\\n[[IMAGE_5]]\\n## Future Directions\\nThe future of multimodal LLMs holds great promise, with potential applications in areas such as [virtual assistants](Not found in provided sources) and [human-computer interaction](Not found in provided sources). \\n* Multimodal LLMs may be used to improve accessibility and user experience in various domains.\\n* The role of multimodal LLMs in shaping the future of AI is significant, as they can enable more natural and intuitive interactions between humans and machines.\\n* Continued research in multimodal LLMs is crucial to overcome current limitations and unlock their full potential, driving innovation and progress in the field of AI [Not found in provided sources].\\n[[IMAGE_6]]'"
288
+ ]
289
+ },
290
+ "execution_count": 20,
291
+ "metadata": {},
292
+ "output_type": "execute_result"
293
+ }
294
+ ],
295
+ "source": [
296
+ "output.md_with_placeholders"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 21,
302
+ "id": "00892f27",
303
+ "metadata": {},
304
+ "outputs": [
305
+ {
306
+ "data": {
307
+ "text/plain": [
308
+ "[ImageSpec(placeholder='[[IMAGE_1]]', filename='images/multimodal_llm_architecture.png', prompt='A diagram showing the architecture of a multimodal LLM, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1536x1024', quality='medium'),\n",
309
+ " ImageSpec(placeholder='[[IMAGE_2]]', filename='images/multimodal_transformers.png', prompt='An illustration of multimodal transformers, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1024x1024', quality='medium'),\n",
310
+ " ImageSpec(placeholder='[[IMAGE_3]]', filename='images/multimodal_llm_applications.png', prompt='A diagram showing the various applications of multimodal LLMs, including computer vision and natural language processing, with labels and arrows indicating the relationships between the different applications, on a clean white background, in a professional technical illustration style', size='1024x1536', quality='medium'),\n",
311
+ " ImageSpec(placeholder='[[IMAGE_4]]', filename='images/multimodal_llm_healthcare.png', prompt='An illustration of the potential applications of multimodal LLMs in healthcare, including medical image analysis and personalized treatment plans, with labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1536x1024', quality='medium'),\n",
312
+ " ImageSpec(placeholder='[[IMAGE_5]]', filename='images/multimodal_llm_challenges.png', prompt='A diagram showing the challenges and limitations of multimodal LLMs, including the need for large-scale datasets and improved model architectures, with labels and arrows indicating the relationships between the different challenges, on a clean white background, in a professional technical illustration style', size='1024x1024', quality='medium'),\n",
313
+ " ImageSpec(placeholder='[[IMAGE_6]]', filename='images/multimodal_llm_future.png', prompt='An illustration of the future directions of multimodal LLMs, including potential applications in virtual assistants and human-computer interaction, with labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1024x1536', quality='medium')]"
314
+ ]
315
+ },
316
+ "execution_count": 21,
317
+ "metadata": {},
318
+ "output_type": "execute_result"
319
+ }
320
+ ],
321
+ "source": [
322
+ "output.images"
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": 23,
328
+ "id": "0b4e77e2",
329
+ "metadata": {},
330
+ "outputs": [
331
+ {
332
+ "data": {
333
+ "text/plain": [
334
+ "'A diagram showing the architecture of a multimodal LLM, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style'"
335
+ ]
336
+ },
337
+ "execution_count": 23,
338
+ "metadata": {},
339
+ "output_type": "execute_result"
340
+ }
341
+ ],
342
+ "source": [
343
+ "output.images[0].prompt"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": null,
349
+ "id": "8666fa58",
350
+ "metadata": {},
351
+ "outputs": [],
352
+ "source": []
353
+ }
354
+ ],
355
+ "metadata": {
356
+ "kernelspec": {
357
+ "display_name": "bloggig-Agent (3.12.12)",
358
+ "language": "python",
359
+ "name": "python3"
360
+ },
361
+ "language_info": {
362
+ "codemirror_mode": {
363
+ "name": "ipython",
364
+ "version": 3
365
+ },
366
+ "file_extension": ".py",
367
+ "mimetype": "text/x-python",
368
+ "name": "python",
369
+ "nbconvert_exporter": "python",
370
+ "pygments_lexer": "ipython3",
371
+ "version": "3.12.12"
372
+ }
373
+ },
374
+ "nbformat": 4,
375
+ "nbformat_minor": 5
376
+ }
notebooks/understanding_self_attention.md ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Understanding Self Attention
2
+
3
+ ### Introduction to Self Attention
4
+ Self-attention, also known as intra-attention, is a mechanism used in deep learning models to allow the model to attend to different parts of the input data and weigh their importance. It's a key component of the Transformer architecture, introduced in 2017, which revolutionized the field of natural language processing (NLP). Self-attention enables the model to capture long-range dependencies and contextual relationships in the input data, making it particularly useful for sequence-to-sequence tasks such as machine translation, text summarization, and chatbots. The importance of self-attention lies in its ability to handle variable-length input sequences, parallelize computation, and improve model performance by focusing on the most relevant parts of the input data. In this blog, we'll delve deeper into the concept of self-attention, its types, and its applications in deep learning.
5
+
6
+ ### How Self Attention Works
7
+ Self-attention is a mechanism that allows a model to attend to different parts of the input sequence simultaneously and weigh their importance. It's a key component of the Transformer architecture, introduced in the paper "Attention is All You Need" by Vaswani et al.
8
+
9
+ The self-attention mechanism takes in a set of input vectors, typically the output of an encoder or a previous layer, and computes a weighted sum of these vectors based on their similarity. The weights are learned during training and reflect the relative importance of each input vector.
10
+
11
+ The mathematical formulation of self-attention can be broken down into three main steps:
12
+
13
+ 1. **Query, Key, and Value Vectors**: The input vectors are first transformed into three different vectors: Query (Q), Key (K), and Value (V). These vectors are obtained by applying linear transformations to the input vectors.
14
+ 2. **Attention Scores**: The attention scores are computed by taking the dot product of the Query and Key vectors and applying a scaling factor. The attention scores represent the similarity between the input vectors.
15
+ 3. **Weighted Sum**: The attention scores are then used to compute a weighted sum of the Value vectors. The weighted sum is the final output of the self-attention mechanism.
16
+
17
+ The self-attention mechanism can be formulated mathematically as follows:
18
+
19
+ `Attention(Q, K, V) = softmax(Q * K^T / sqrt(d)) * V`
20
+
21
+ where `Q`, `K`, and `V` are the Query, Key, and Value vectors, respectively, `d` is the dimensionality of the input vectors, and `softmax` is the softmax activation function.
22
+
23
+ The self-attention mechanism has several benefits, including:
24
+
25
+ * **Parallelization**: Self-attention can be parallelized more easily than recurrent neural networks (RNNs), making it more efficient for long-range dependencies.
26
+ * **Flexibility**: Self-attention can handle variable-length input sequences and can be used for both encoding and decoding tasks.
27
+ * **Interpretability**: The attention scores can provide insights into which parts of the input sequence are most relevant for a particular task.
28
+
29
+ ### Types of Self Attention
30
+ There are several types of self-attention mechanisms that have been proposed in the literature, each with its own strengths and weaknesses. The two main categories of self-attention are local self-attention and global self-attention.
31
+
32
+ #### Local Self Attention
33
+ Local self-attention, also known as local attention or window-based attention, focuses on a fixed-size window of the input sequence. This type of attention is useful when the relationships between nearby elements in the sequence are more important than the relationships between distant elements. Local self-attention is often used in tasks such as language modeling and machine translation.
34
+
35
+ #### Global Self Attention
36
+ Global self-attention, on the other hand, considers the entire input sequence when computing the attention weights. This type of attention is useful when the relationships between all elements in the sequence are important, regardless of their distance. Global self-attention is often used in tasks such as question answering and text classification.
37
+
38
+ #### Other Types of Self Attention
39
+ In addition to local and global self-attention, there are other variants of self-attention that have been proposed, including:
40
+ * **Hierarchical self-attention**: This type of attention uses a hierarchical representation of the input sequence, where the attention weights are computed at multiple levels of granularity.
41
+ * **Graph-based self-attention**: This type of attention is used for graph-structured data, where the attention weights are computed based on the graph structure.
42
+ * **Multi-head self-attention**: This type of attention uses multiple attention heads to capture different types of relationships between the elements in the input sequence.
43
+
44
+ ### Applications of Self Attention
45
+ Self-attention has numerous applications across various fields, including natural language processing, computer vision, and more. Some of the key applications of self-attention are:
46
+ * **Natural Language Processing (NLP)**: Self-attention is widely used in NLP tasks such as language translation, question answering, and text summarization. It helps in understanding the context and relationships between different words in a sentence.
47
+ * **Computer Vision**: Self-attention is used in computer vision tasks such as image classification, object detection, and image generation. It helps in understanding the relationships between different parts of an image.
48
+ * **Speech Recognition**: Self-attention is used in speech recognition tasks to improve the accuracy of speech-to-text models.
49
+ * **Recommendation Systems**: Self-attention is used in recommendation systems to understand the relationships between different items and recommend relevant items to users.
50
+ * **Time Series Forecasting**: Self-attention is used in time series forecasting to understand the relationships between different time steps and predict future values.
51
+ The use of self-attention has led to state-of-the-art results in many of these applications, and its potential continues to be explored in other fields.
52
+
53
+ ### Implementing Self Attention
54
+ Implementing self-attention in a deep learning model involves several key steps. Here's a step-by-step guide to help you get started:
55
+ #### Step 1: Define the Self-Attention Mechanism
56
+ The self-attention mechanism is based on the Query-Key-Value (QKV) framework. You need to define the QKV matrices and calculate the attention weights using the following formula:
57
+ $$Attention(Q, K, V) = softmax(\frac{Q \cdot K^T}{\sqrt{d_k}}) \cdot V$$
58
+ where $d_k$ is the dimensionality of the key vector.
59
+
60
+ #### Step 2: Choose the Attention Type
61
+ There are two main types of self-attention: scaled dot-product attention and multi-head attention. Scaled dot-product attention is a basic form of self-attention, while multi-head attention allows the model to jointly attend to information from different representation subspaces.
62
+
63
+ #### Step 3: Implement the Self-Attention Layer
64
+ You can implement the self-attention layer using popular deep learning frameworks such as PyTorch or TensorFlow. The self-attention layer takes in the input sequence and outputs a weighted sum of the input elements.
65
+
66
+ #### Step 4: Integrate the Self-Attention Layer into the Model
67
+ Once you have implemented the self-attention layer, you can integrate it into your deep learning model. This typically involves adding the self-attention layer to the model architecture and adjusting the model's parameters accordingly.
68
+
69
+ #### Step 5: Train the Model
70
+ After integrating the self-attention layer, you need to train the model using a suitable optimizer and loss function. The self-attention mechanism can be trained end-to-end with the rest of the model.
71
+
72
+ #### Example Code
73
+ Here's an example code snippet in PyTorch that demonstrates how to implement a basic self-attention layer:
74
+ ```python
75
+ import torch
76
+ import torch.nn as nn
77
+ import torch.nn.functional as F
78
+
79
+ class SelfAttention(nn.Module):
80
+ def __init__(self, embed_dim, num_heads):
81
+ super(SelfAttention, self).__init__()
82
+ self.embed_dim = embed_dim
83
+ self.num_heads = num_heads
84
+ self.query_linear = nn.Linear(embed_dim, embed_dim)
85
+ self.key_linear = nn.Linear(embed_dim, embed_dim)
86
+ self.value_linear = nn.Linear(embed_dim, embed_dim)
87
+ self.dropout = nn.Dropout(0.1)
88
+
89
+ def forward(self, x):
90
+ # Calculate Q, K, V
91
+ Q = self.query_linear(x)
92
+ K = self.key_linear(x)
93
+ V = self.value_linear(x)
94
+
95
+ # Calculate attention weights
96
+ attention_weights = torch.matmul(Q, K.T) / math.sqrt(self.embed_dim)
97
+ attention_weights = F.softmax(attention_weights, dim=-1)
98
+
99
+ # Calculate output
100
+ output = torch.matmul(attention_weights, V)
101
+ output = self.dropout(output)
102
+ return output
103
+ ```
104
+ Note that this is a simplified example, and you may need to modify the code to suit your specific use case.
105
+
106
+ ### Advantages and Limitations of Self Attention
107
+ The self-attention mechanism has several advantages that make it a powerful tool in deep learning models. Some of the key benefits include:
108
+ * **Parallelization**: Self-attention allows for parallelization of sequential data, making it much faster than traditional recurrent neural networks (RNNs) for long sequences.
109
+ * **Flexibility**: Self-attention can handle variable-length input sequences and can be used for both short-term and long-term dependencies.
110
+ * **Interpretability**: The attention weights provide a way to visualize and understand which parts of the input sequence are most relevant for a particular task.
111
+
112
+ However, self-attention also has some limitations:
113
+ * **Computational Cost**: Self-attention has a high computational cost, especially for long sequences, due to the need to compute attention weights for every pair of elements.
114
+ * **Memory Requirements**: Self-attention requires a significant amount of memory to store the attention weights and the input sequence.
115
+ * **Difficulty in Handling Local Dependencies**: Self-attention can struggle to capture local dependencies, such as those found in images or text with strong spatial relationships.
116
+
117
+ Despite these limitations, self-attention has the potential for future directions, including:
118
+ * **Improving Efficiency**: Researchers are exploring ways to improve the efficiency of self-attention, such as using sparse attention or hierarchical attention.
119
+ * **Combining with Other Mechanisms**: Self-attention can be combined with other mechanisms, such as convolutional neural networks (CNNs) or RNNs, to create more powerful models.
120
+ * **Applying to New Domains**: Self-attention can be applied to new domains, such as computer vision or speech recognition, to improve performance and efficiency.
notebooks/understanding_self_attention_in_deep_learning.md ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Understanding Self Attention in Deep Learning
2
+
3
+ ## Introduction to Self Attention
4
+ Self attention is a fundamental concept in deep learning, enabling models to weigh the importance of different input elements relative to each other. It plays a crucial role in deep learning models, particularly in natural language processing and computer vision tasks, by allowing the model to focus on specific parts of the input data.
5
+
6
+ The traditional attention mechanisms have a limitation - they rely on a fixed-length context, which can be restrictive for sequences with varying lengths. This fixed-length context can lead to information loss or inefficient processing, especially when dealing with long sequences.
7
+
8
+ To address this, self attention mechanisms are used, which can be implemented using the following minimal code snippet:
9
+ ```python
10
+ import torch
11
+ import torch.nn as nn
12
+ import torch.nn.functional as F
13
+
14
+ class SelfAttention(nn.Module):
15
+ def __init__(self, embed_dim):
16
+ super(SelfAttention, self).__init__()
17
+ self.query_linear = nn.Linear(embed_dim, embed_dim)
18
+ self.key_linear = nn.Linear(embed_dim, embed_dim)
19
+ self.value_linear = nn.Linear(embed_dim, embed_dim)
20
+
21
+ def forward(self, x):
22
+ query = self.query_linear(x)
23
+ key = self.key_linear(x)
24
+ value = self.value_linear(x)
25
+ attention_scores = torch.matmul(query, key.T) / math.sqrt(key.size(-1))
26
+ attention_weights = F.softmax(attention_scores, dim=-1)
27
+ output = torch.matmul(attention_weights, value)
28
+ return output
29
+ ```
30
+ This code snippet demonstrates a basic self attention implementation, highlighting its importance in deep learning models.
31
+
32
+ ## Implementing Self Attention
33
+ To implement self attention, it's essential to understand the underlying mathematical formulation. The self attention mechanism is based on the concept of attention, which allows the model to focus on specific parts of the input data.
34
+
35
+ * The mathematical formulation of self attention involves computing the attention weights based on the query, key, and value vectors. This is typically done using the following equation: `Attention(Q, K, V) = softmax(Q * K^T / sqrt(d)) * V`, where `Q`, `K`, and `V` are the query, key, and value vectors, respectively, and `d` is the dimensionality of the input data.
36
+
37
+ The query-key-value attention mechanism is a core component of self attention. In this mechanism, the query vector represents the context in which the attention is being applied, the key vector represents the input data, and the value vector represents the importance of each input element.
38
+
39
+ ```python
40
+ import torch
41
+ import torch.nn as nn
42
+ import torch.nn.functional as F
43
+
44
+ class SelfAttention(nn.Module):
45
+ def __init__(self, embed_dim):
46
+ super(SelfAttention, self).__init__()
47
+ self.query_linear = nn.Linear(embed_dim, embed_dim)
48
+ self.key_linear = nn.Linear(embed_dim, embed_dim)
49
+ self.value_linear = nn.Linear(embed_dim, embed_dim)
50
+
51
+ def forward(self, x):
52
+ Q = self.query_linear(x)
53
+ K = self.key_linear(x)
54
+ V = self.value_linear(x)
55
+ attention_weights = F.softmax(torch.matmul(Q, K.T) / math.sqrt(x.size(-1)), dim=-1)
56
+ return torch.matmul(attention_weights, V)
57
+ ```
58
+ This code example demonstrates how to implement self attention in PyTorch, a popular deep learning framework. By using this implementation, developers can easily integrate self attention into their own models.
59
+
60
+ ## Applications of Self Attention
61
+ Self attention has numerous applications in various fields.
62
+ In natural language processing tasks, self attention is used to weigh the importance of different words in a sentence, allowing models to capture long-range dependencies and context.
63
+
64
+ * Example in computer vision: self attention can be applied to image classification models to focus on specific regions of the image, as shown in this PyTorch code snippet:
65
+ ```python
66
+ import torch
67
+ import torch.nn as nn
68
+
69
+ class SelfAttention(nn.Module):
70
+ def __init__(self, embed_dim):
71
+ super(SelfAttention, self).__init__()
72
+ self.query_linear = nn.Linear(embed_dim, embed_dim)
73
+ self.key_linear = nn.Linear(embed_dim, embed_dim)
74
+ self.value_linear = nn.Linear(embed_dim, embed_dim)
75
+
76
+ def forward(self, x):
77
+ query = self.query_linear(x)
78
+ key = self.key_linear(x)
79
+ value = self.value_linear(x)
80
+ attention_weights = torch.matmul(query, key.T) / math.sqrt(x.size(-1))
81
+ output = torch.matmul(attention_weights, value)
82
+ return output
83
+ ```
84
+ Self attention can also be used in recommender systems to model user-item interactions, allowing for more accurate personalized recommendations by considering the relationships between different items.
85
+
86
+ ## Common Mistakes in Self Attention
87
+ When working with self attention models, several common pitfalls can hinder performance and lead to suboptimal results.
88
+
89
+ * Overfitting is a significant problem in self attention models, where the model becomes too specialized to the training data and fails to generalize well to new, unseen data. This can be mitigated by using techniques such as dropout and early stopping, which help to prevent the model from becoming too complex.
90
+
91
+ Proper initialization and regularization are also crucial when using self attention. Initialization with random weights can lead to slow convergence or getting stuck in local minima, while regularization techniques like L1 and L2 regularization can help to prevent overfitting by adding a penalty term to the loss function.
92
+
93
+ To debug self attention models, follow these steps:
94
+ * Check the input data for any inconsistencies or missing values
95
+ * Verify that the model is correctly implemented, with attention weights being properly computed and applied
96
+ * Monitor the model's performance on a validation set during training, and adjust hyperparameters as needed to prevent overfitting.
97
+ By being aware of these common mistakes and taking steps to avoid them, developers can build more effective and reliable self attention models.
98
+
99
+ ## Best Practices for Self Attention
100
+ To ensure effective use of self attention in your projects, follow this checklist for production readiness:
101
+ * Validate input data quality
102
+ * Test model performance on diverse datasets
103
+ * Monitor training time and memory usage
104
+ Monitoring performance metrics, such as accuracy and loss, is crucial for identifying potential issues.
105
+ For further learning and improvement, refer to the Transformer library documentation and research papers on self attention mechanisms.
pyproject.toml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "bloggig-agent"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "fastapi>=0.135.1",
9
+ "google-genai>=1.64.0",
10
+ "huggingface-hub>=1.4.1",
11
+ "langchain>=1.2.10",
12
+ "langchain-aws>=1.2.5",
13
+ "langchain-community>=0.4.1",
14
+ "langchain-core>=1.2.13",
15
+ "langchain-groq>=1.1.2",
16
+ "langchain-tavily>=0.2.17",
17
+ "langgraph>=1.0.8",
18
+ "pillow>=12.1.1",
19
+ "pydantic>=2.12.5",
20
+ "pytest>=9.0.2",
21
+ "streamlit>=1.54.0",
22
+ "python-dotenv>=1.0.1",
23
+ "from-root>=1.1.0",
24
+ "uvicorn>=0.41.0",
25
+ ]
references/image.png ADDED

Git LFS Details

  • SHA256: 7a23c72f42526188aae61dab1aff9091c5bac3f4f0d8a2879d41d7e26971f780
  • Pointer size: 131 Bytes
  • Size of remote file: 209 kB
requirements.txt ADDED
Binary file (4.53 kB). View file
 
results/Attention is All You Need Paper Explained.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Attention is All You Need Paper Explained
2
+ ## Introduction to Attention is All You Need
3
+ The concept of attention in deep learning refers to the ability of a model to focus on specific parts of the input data that are relevant for the task at hand.
4
+ * Introduce the concept of attention in deep learning: Attention allows models to selectively concentrate on certain inputs or features, improving performance and efficiency.
5
+ * Explain the limitations of traditional sequence-to-sequence models: Traditional sequence-to-sequence models rely on recurrent neural networks (RNNs) or long short-term memory (LSTM) networks, which can be limited by their sequential processing and fixed-length context.
6
+ * Highlight the key contributions of the Attention is All You Need paper: The Attention is All You Need paper introduced a novel architecture that relies entirely on self-attention mechanisms, eliminating the need for RNNs and LSTMs, and achieving state-of-the-art results in machine translation tasks.
7
+ ![attention mechanism](../images/attention_mechanism.png)
8
+ ## The Transformer Model Architecture
9
+ The Transformer model, introduced in the "Attention is All You Need" paper, revolutionized the field of natural language processing. At its core, the Transformer model consists of an encoder-decoder structure.
10
+ * The encoder takes in a sequence of tokens, such as words or characters, and generates a continuous representation of the input sequence.
11
+ * The decoder then uses this representation to generate the output sequence, one token at a time.
12
+
13
+ Self-attention mechanisms play a crucial role in the Transformer model, allowing it to weigh the importance of different tokens in the input sequence relative to each other. This is particularly useful for tasks such as machine translation, where the context of a word can greatly affect its translation.
14
+
15
+ The Transformer model also relies on positional encoding to preserve the order of the input sequence. Since the self-attention mechanism is permutation-invariant, the model would not be able to distinguish between different token orders without some form of positional information.
16
+ Positional encoding adds a fixed vector to each token's representation, based on its position in the sequence, allowing the model to capture sequential relationships between tokens.
17
+ This combination of self-attention and positional encoding enables the Transformer model to effectively process sequential data, making it a powerful tool for a wide range of NLP tasks.
18
+ ![transformer architecture](../images/transformer_architecture.png)
19
+ ## Applying the Transformer Model to Real-World Examples
20
+ The Transformer model, introduced in the "Attention is All You Need" paper, has been widely adopted in various NLP tasks. To apply this model to real-world examples, it's essential to understand its implementation and applications.
21
+ * A minimal code sketch of a Transformer model implementation can be represented as follows:
22
+ ```python
23
+ import torch
24
+ import torch.nn as nn
25
+ import torch.optim as optim
26
+
27
+ class TransformerModel(nn.Module):
28
+ def __init__(self):
29
+ super(TransformerModel, self).__init__()
30
+ self.encoder = nn.TransformerEncoderLayer(d_model=512, nhead=8)
31
+ self.decoder = nn.TransformerDecoderLayer(d_model=512, nhead=8)
32
+
33
+ def forward(self, src, tgt):
34
+ encoder_output = self.encoder(src)
35
+ decoder_output = self.decoder(tgt, encoder_output)
36
+ return decoder_output
37
+ ```
38
+ * The Transformer model has been highly effective in machine translation tasks, allowing for parallelization of the decoding process and improving overall translation quality.
39
+ * The Transformer model can also be applied to other NLP tasks, such as text classification, sentiment analysis, and question answering, by modifying the model architecture and training objectives to suit the specific task requirements.
40
+ ![transformer application](../images/transformer_application.png)
41
+ ## Common Mistakes and Challenges
42
+ When implementing the Transformer model, several common pitfalls can hinder its performance.
43
+ * Proper hyperparameter tuning is crucial, as it directly affects the model's ability to learn and generalize.
44
+ * Training large Transformer models can be challenging due to their complexity and computational requirements, often leading to issues like overfitting or slow training times.
45
+ * Careful evaluation metrics are necessary to accurately assess the model's performance, as misleading metrics can lead to suboptimal results or incorrect conclusions about the model's effectiveness.
46
+ By being aware of these potential issues, developers can take steps to mitigate them and ensure successful implementation of the Transformer model.
47
+ ![common mistakes](../images/common_mistakes.png)
48
+ ## Conclusion
49
+ The Attention is All You Need paper made significant contributions to the field of NLP, introducing a novel architecture that relies entirely on self-attention mechanisms.
50
+ * The main contributions of the paper include the proposal of a transformer model that replaces traditional recurrent neural network (RNN) and convolutional neural network (CNN) architectures.
51
+ * The paper's impact on NLP has been substantial, enabling state-of-the-art results in various tasks such as machine translation and text generation.
52
+ * Future directions for research and application include exploring the use of attention mechanisms in other areas of NLP, such as question answering and text summarization, and applying the transformer model to other domains like computer vision.
53
+ ![conclusion](../images/conclusion.png)
src/components/image_generation.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import InferenceClient
3
+
4
+ class ImageGeneration:
5
+ def __init__(self):
6
+ self.client = InferenceClient(
7
+ provider="nscale",
8
+ api_key=os.environ["HF_TOKEN"],
9
+ )
10
+
11
+ async def generateImage(self,prompt:str):
12
+ image = self.client.text_to_image(
13
+ prompt,
14
+ model="stabilityai/stable-diffusion-xl-base-1.0",
15
+ )
16
+ return image
src/components/taivily_search.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from typing import List
4
+ from langchain_community.tools.tavily_search import TavilySearchResults
5
+ from src.exception import MyException
6
+ from src.utils.asyncHandler import asyncHandler
7
+
8
+
9
+ class Taivily_search:
10
+ def __init__(self):
11
+ pass
12
+
13
+ @asyncHandler
14
+ async def _tavily_search(self, query: str, max_results: int = 5) -> List[dict]:
15
+ logging.info(f"Using Tavily to search for: {query}")
16
+ try:
17
+ tool = TavilySearchResults(max_results=max_results)
18
+ results = await tool.ainvoke({"query": query})
19
+
20
+ normalized: List[dict] = []
21
+ for r in results or []:
22
+ normalized.append(
23
+ {
24
+ "title": r.get("title") or "",
25
+ "url": r.get("url") or "",
26
+ "snippet": r.get("content") or r.get("snippet") or "",
27
+ "published_at": r.get("published_date") or r.get("published_at"),
28
+ "source": r.get("source"),
29
+ }
30
+ )
31
+ logging.debug(f"Tavily search returned {len(normalized)} results")
32
+ return normalized
33
+ except Exception as e:
34
+ logging.error(f"Error in Tavily_search: {str(e)}")
35
+ raise MyException(e, sys)
src/constants/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ import os
2
+ FOLDER_PATH_TO_SAVE_MD="results"
src/exception/__init__.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import logging
3
+
4
+ def error_message_detail(error: Exception, error_detail: sys) -> str:
5
+
6
+ _, _, exc_tb = error_detail.exc_info()
7
+
8
+ if exc_tb is not None:
9
+ # Get the file name where the exception occurred
10
+ file_name = exc_tb.tb_frame.f_code.co_filename
11
+ line_number = exc_tb.tb_lineno
12
+ else:
13
+ # Fallback to current frame info if no traceback (e.g. manual raise)
14
+ import inspect
15
+ frame = inspect.currentframe().f_back.f_back # Go back to where MyException was called
16
+ file_name = frame.f_code.co_filename
17
+ line_number = frame.f_lineno
18
+
19
+ # Create a formatted error message string with file name, line number, and the actual error
20
+ error_message = f"Error occurred in python script: [{file_name}] at line number [{line_number}]: {str(error)}"
21
+
22
+ # Log the error for better tracking
23
+ logging.error(error_message)
24
+
25
+ return error_message
26
+
27
+
28
+ class MyException(Exception):
29
+ def __init__(self, error_message: str, error_detail: sys):
30
+ # Call the base class constructor with the error message
31
+ super().__init__(error_message)
32
+
33
+ # Format the detailed error message using the error_message_detail function
34
+ self.error_message = error_message_detail(error_message, error_detail)
35
+
36
+ def __str__(self) -> str:
37
+ """
38
+ Returns the string representation of the error message.
39
+ """
40
+ return self.error_message
src/graph/Compile_graph.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import asyncio
3
+ from dotenv import load_dotenv
4
+ from langgraph.graph import StateGraph,START,END
5
+ from src.models.State_model import State
6
+ from src.graph.nodes.router_node import router_node,route_next
7
+ from src.graph.nodes.reducer_node import reducer_node
8
+ from src.graph.nodes.search_node import research_node
9
+ from src.graph.nodes.orchaster_node import orchestrator_node
10
+ from src.graph.nodes.worker_node import worker_node
11
+ from src.graph.nodes.fanout_node import fanout
12
+ load_dotenv()
13
+
14
+ g = StateGraph(State)
15
+ g.add_node("router", router_node)
16
+ g.add_node("research", research_node)
17
+ g.add_node("orchestrator", orchestrator_node)
18
+ g.add_node("worker", worker_node)
19
+ g.add_node("reducer", reducer_node)
20
+
21
+ g.add_edge(START, "router")
22
+ g.add_conditional_edges("router", route_next, {"research": "research", "orchestrator": "orchestrator"})
23
+ g.add_edge("research", "orchestrator")
24
+
25
+ g.add_conditional_edges("orchestrator", fanout, ["worker"])
26
+ g.add_edge("worker", "reducer")
27
+ g.add_edge("reducer", END)
28
+
29
+ app = g.compile()
30
+
31
+
32
+ png_data = app.get_graph().draw_mermaid_png()
33
+ with open("graph.png", "wb") as f:
34
+ f.write(png_data)
35
+ async def run(topic: str):
36
+ logging.info(f"Starting blog generation for topic: {topic}")
37
+ try:
38
+ # out = await app.ainvoke(
39
+ # {
40
+ # "topic": topic,
41
+ # "mode": "",
42
+ # "needs_research": False,
43
+ # "queries": [],
44
+ # "evidence": [],
45
+ # "plan": None,
46
+ # "sections": [],
47
+ # "final": "",
48
+ # }
49
+ # )
50
+ async for step in app.astream(
51
+ {
52
+ "topic": topic,
53
+ "mode": "",
54
+ "needs_research": False,
55
+ "queries": [],
56
+ "evidence": [],
57
+ "plan": None,
58
+ "sections": [],
59
+ "final": "",
60
+ },
61
+ stream_mode="values" # important
62
+ ):
63
+ # print("Current Step:", step)
64
+ yield step
65
+ logging.info("Blog generation completed successfully")
66
+ return
67
+ except Exception as e:
68
+ logging.error(f"Error during graph execution: {str(e)}")
69
+ raise
70
+
71
+ if __name__ == "__main__":
72
+ from src.logger import *
73
+ out=asyncio.run(run("State of Multimodal LLMs in 2026"))
74
+ print(out)
src/graph/graphs/reducer_subgraph.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from langgraph.graph import StateGraph,START,END
3
+ from src.models.ImageSpec_model import State
4
+ from src.graph.nodes.reducer_sub_node import reducer_sub_image,reducer_sub_llm,merge_images_and_md
5
+ app=StateGraph(State)
6
+
7
+
8
+ app.add_node("reducer_sub_llm",reducer_sub_llm)
9
+ app.add_node("reducer_sub_image",reducer_sub_image)
10
+ app.add_node("merge_images_and_md",merge_images_and_md)
11
+
12
+ app.add_edge(START,"reducer_sub_llm")
13
+ app.add_edge("reducer_sub_llm","reducer_sub_image")
14
+ app.add_edge("reducer_sub_image","merge_images_and_md")
15
+ app.add_edge("merge_images_and_md",END)
16
+
17
+
18
+
19
+
20
+ app=app.compile()
21
+ logging.info("Reducer subgraph compiled successfully")
22
+
src/graph/nodes/fanout_node.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from src.models.State_model import State
3
+ from langgraph.types import Send
4
+
5
+
6
+ def fanout(state: State):
7
+ logging.info("Entering fanout")
8
+ tasks = state["plan"].tasks
9
+ logging.debug(f"Fanning out {len(tasks)} tasks")
10
+
11
+ return [
12
+ Send(
13
+ "worker",
14
+ {
15
+ "task": task.model_dump(),
16
+ "topic": state["topic"],
17
+ "mode": state["mode"],
18
+ "plan": state["plan"].model_dump(),
19
+ "evidence": [e.model_dump() for e in state.get("evidence", [])],
20
+ },
21
+ )
22
+ for task in tasks
23
+ ]
src/graph/nodes/orchaster_node.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from src.models.State_model import State
4
+ from src.llm import llm
5
+ from src.exception import MyException
6
+ from src.models.Plan_model import Plan
7
+ from src.prompts import ORCH_SYSTEM
8
+ from langchain_core.messages import SystemMessage, HumanMessage
9
+ from src.utils.asyncHandler import asyncHandler
10
+
11
+
12
+ @asyncHandler
13
+ async def orchestrator_node(state: State) -> dict:
14
+ logging.info("Entering orchestrator_node")
15
+ try:
16
+ planner = llm.with_structured_output(Plan)
17
+
18
+ evidence = state.get("evidence", [])
19
+ mode = state.get("mode", "closed_book")
20
+ logging.debug(f"Mode: {mode}, Evidence count: {len(evidence)}")
21
+
22
+ plan = await planner.ainvoke(
23
+ [
24
+ SystemMessage(content=ORCH_SYSTEM),
25
+ HumanMessage(
26
+ content=(
27
+ f"Topic: {state['topic']}\n"
28
+ f"Mode: {mode}\n\n"
29
+ f"Evidence (ONLY use for fresh claims; may be empty):\n"
30
+ f"{[e.model_dump() for e in evidence][:16]}"
31
+ )
32
+ ),
33
+ ]
34
+ )
35
+
36
+ logging.info(f"Orchestrator plan created: {plan.blog_title} with {len(plan.tasks)} tasks.")
37
+ return {"plan": plan}
38
+ except Exception as e:
39
+ logging.error(f"Error in orchestrator_node: {str(e)}")
40
+ raise MyException(e, sys)
src/graph/nodes/reducer_node.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+ from src.models.State_model import State
5
+ from src.exception import MyException
6
+ from src.utils.asyncHandler import asyncHandler
7
+ from src.constants import FOLDER_PATH_TO_SAVE_MD
8
+ import os
9
+ from src.graph.graphs.reducer_subgraph import app
10
+ @asyncHandler
11
+ async def reducer_node(state: State) -> dict:
12
+ logging.info("Entering reducer_node")
13
+ try:
14
+ plan = state["plan"]
15
+
16
+ ordered_sections = [md for _, md in sorted(state["sections"], key=lambda x: x[0])]
17
+ body = "\n\n".join(ordered_sections).strip()
18
+ final_md = f"# {plan.blog_title}\n\n{body}\n"
19
+
20
+ filename = f"{plan.blog_title}.md"
21
+ logging.debug(f"Writing final blog to {filename}")
22
+
23
+ logging.info("Starting image generation and merging via subgraph")
24
+ red_f_ob=await app.ainvoke({"prompt_markdown":final_md})
25
+ final_md=red_f_ob["final_md"]
26
+
27
+ logging.debug(f"Final MD size after merging: {len(final_md)} characters")
28
+ os.makedirs(FOLDER_PATH_TO_SAVE_MD,exist_ok=True)
29
+ file_path=os.path.join(FOLDER_PATH_TO_SAVE_MD,filename)
30
+ Path(file_path).write_text(final_md, encoding="utf-8")
31
+
32
+ logging.info(f"Reducer node completed successfully, blog saved to {file_path}")
33
+ return {"final": final_md}
34
+ except Exception as e:
35
+ logging.error(f"Error in reducer_node: {str(e)}")
36
+ raise MyException(e, sys)
src/graph/nodes/reducer_sub_node.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from src.models.ImageSpec_model import State,GlobalImagePlan
3
+ from src.utils.asyncHandler import asyncHandler
4
+ from langchain.messages import SystemMessage,HumanMessage
5
+ from src.prompts import IMAGE_PLACEHOLDER_GENERATION
6
+ from src.llm import llm
7
+ from src.exception import MyException
8
+ import sys
9
+ import os
10
+ from src.components.image_generation import ImageGeneration
11
+ @asyncHandler
12
+ async def reducer_sub_llm(state:State)->State:
13
+ logging.info("Calling LLM for image placeholder planning")
14
+ output=await llm.with_structured_output(GlobalImagePlan)\
15
+ .ainvoke(
16
+ [
17
+ SystemMessage(content=IMAGE_PLACEHOLDER_GENERATION),
18
+ HumanMessage(content=state['prompt_markdown'])
19
+ ]
20
+ )
21
+ if not output:
22
+ logging.error("LLM failed to return a valid image placeholder plan (output is None)")
23
+ raise MyException("Failed to generate image placeholder plan from LLM", sys)
24
+
25
+ state['output']=output
26
+ logging.info("Successfully generated image placeholder plan")
27
+ return state
28
+
29
+ @asyncHandler
30
+ async def reducer_sub_image(state:State)->State:
31
+ output=state['output']
32
+ image_generator=ImageGeneration()
33
+ if not output:
34
+ raise MyException("output from reducer_sub not found",sys)
35
+
36
+ os.makedirs("images",exist_ok=True)
37
+
38
+ logging.info(f"Starting image generation for {len(output.images)} images")
39
+ for image_con in output.images:
40
+ logging.debug(f"Generating image: {image_con.filename} with prompt: {image_con.prompt[:50]}...")
41
+ image=await image_generator.generateImage(prompt=image_con.prompt)
42
+ image.save(image_con.filename)
43
+ logging.info("All images generated successfully")
44
+ return state
45
+
46
+ @asyncHandler
47
+ async def merge_images_and_md(state: State) -> State:
48
+ output = state["output"]
49
+ md = output.md_with_placeholders
50
+
51
+ logging.info(f"Merging {len(output.images)} images into Markdown")
52
+ for im in output.images:
53
+ alt_text = (
54
+ im.filename.split("/")[-1]
55
+ .replace(".png", "")
56
+ .replace("_", " ")
57
+ )
58
+
59
+ md_image_tag = f"![{alt_text}](../{im.filename})"
60
+ md = md.replace(im.placeholder, md_image_tag)
61
+
62
+ state["final_md"] = md
63
+ logging.info("Markdown merging completed")
64
+ return state
65
+
66
+
67
+
68
+
69
+
src/graph/nodes/router_node.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ import json
4
+ import re
5
+ from src.models.RouterDecision_model import RouterDecision
6
+ from langchain_core.messages import SystemMessage, HumanMessage
7
+ from src.models.State_model import State
8
+ from src.llm import llm
9
+ from src.prompts import ROUTER_SYSTEM
10
+ from src.exception import MyException
11
+ from src.utils.asyncHandler import asyncHandler
12
+
13
+
14
+ @asyncHandler
15
+ async def router_node(state: State):
16
+ logging.info("Entering router_node")
17
+ topic = state['topic']
18
+ logging.debug(f"Topic: {topic}")
19
+
20
+ try:
21
+ try:
22
+ runnable = llm.with_structured_output(RouterDecision)
23
+ decision = await runnable.ainvoke(
24
+ [
25
+ SystemMessage(content=ROUTER_SYSTEM),
26
+ HumanMessage(content=f"Topic: {topic}")
27
+ ]
28
+ )
29
+ if decision:
30
+ logging.info(f"Router decision (structured): needs_research={decision.needs_research}, mode={decision.mode}")
31
+ return {
32
+ "needs_research": decision.needs_research,
33
+ "mode": decision.mode,
34
+ "queries": decision.queries,
35
+ }
36
+ except Exception as e:
37
+ logging.warning(f"Structured output failed: {str(e)}. Attempting manual parse.")
38
+
39
+ raw_response = await llm.ainvoke(
40
+ [
41
+ SystemMessage(content=ROUTER_SYSTEM + "\n\nCRITICAL: You MUST return a valid JSON object. Do not include any text before or after the JSON."),
42
+ HumanMessage(content=f"Topic: {topic}")
43
+ ]
44
+ )
45
+ content = raw_response.content
46
+ logging.debug(f"Raw LLM content for fallback: {content}")
47
+
48
+ json_str = ""
49
+ markdown_match = re.search(r'```json\s*(.*?)\s*```', content, re.DOTALL)
50
+ if markdown_match:
51
+ json_str = markdown_match.group(1)
52
+ else:
53
+ start = content.find('{')
54
+ end = content.rfind('}')
55
+ if start != -1 and end != -1:
56
+ json_str = content[start:end+1]
57
+
58
+ if json_str:
59
+ try:
60
+ data = json.loads(json_str)
61
+ except json.JSONDecodeError:
62
+ # Progressive truncation fallback
63
+ success = False
64
+ temp_str = json_str
65
+ while '}' in temp_str:
66
+ try:
67
+ data = json.loads(temp_str)
68
+ success = True
69
+ break
70
+ except json.JSONDecodeError:
71
+ last_brace = temp_str.rfind('}')
72
+ if last_brace == -1: break
73
+ temp_str = temp_str[:last_brace]
74
+
75
+ if not success:
76
+ raise ValueError("Failed to parse JSON even after structural truncation")
77
+
78
+ needs_res = str(data.get("needs_research", "")).lower() in ["true", "1", "yes"]
79
+
80
+ decision = RouterDecision(
81
+ needs_research=needs_res,
82
+ mode=data.get("mode", "open_book"),
83
+ queries=data.get("queries", [])
84
+ )
85
+ logging.info(f"Router decision (manual): needs_research={decision.needs_research}, mode={decision.mode}")
86
+ return {
87
+ "needs_research": decision.needs_research,
88
+ "mode": decision.mode,
89
+ "queries": decision.queries,
90
+ }
91
+
92
+ logging.error("Failed to extract JSON from LLM response")
93
+ raise ValueError("LLM failed to return a valid RouterDecision. Please check prompts or model output.")
94
+
95
+ except Exception as e:
96
+ logging.error(f"Error in router_node: {str(e)}")
97
+ raise
98
+
99
+ def route_next(state: State) -> str:
100
+ # Use .get() to avoid KeyError if node failed
101
+ needs_research = state.get("needs_research", False)
102
+ logging.info(f"Routing next based on research need: {needs_research}")
103
+ return "research" if needs_research else "orchestrator"