Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .dockerignore +50 -0
- .env.example +11 -0
- .gitattributes +6 -0
- .github/workflows/python-ci.yaml +57 -0
- .gitignore +61 -0
- .pre-commit-config.yaml +20 -0
- .project-root +1 -0
- .pylintrc +2 -0
- .python-version +1 -0
- .vscode/settings.json +5 -0
- Dockerfile +38 -0
- LICENSE +9 -0
- README.md +92 -5
- State of Multimodal LLMs in 2026.md +31 -0
- api/app.py +125 -0
- api/templates/index.html +732 -0
- credentials_example.yaml +5 -0
- data/README.md +3 -0
- deployment-setup-notes.txt +177 -0
- docker-compose.yml +17 -0
- graph.png +0 -0
- images/attention_mechanism.png +3 -0
- images/common_mistakes.png +3 -0
- images/conclusion.png +3 -0
- images/transformer_application.png +3 -0
- images/transformer_architecture.png +3 -0
- jenkinsfile +105 -0
- main.py +10 -0
- metadata.yaml +4 -0
- models/README.md +3 -0
- notebooks/README.md +3 -0
- notebooks/agent.ipynb +0 -0
- notebooks/image_placeHolder.ipynb +376 -0
- notebooks/understanding_self_attention.md +120 -0
- notebooks/understanding_self_attention_in_deep_learning.md +105 -0
- pyproject.toml +25 -0
- references/image.png +3 -0
- requirements.txt +0 -0
- results/Attention is All You Need Paper Explained.md +53 -0
- src/components/image_generation.py +16 -0
- src/components/taivily_search.py +35 -0
- src/constants/__init__.py +2 -0
- src/exception/__init__.py +40 -0
- src/graph/Compile_graph.py +74 -0
- src/graph/graphs/reducer_subgraph.py +22 -0
- src/graph/nodes/fanout_node.py +23 -0
- src/graph/nodes/orchaster_node.py +40 -0
- src/graph/nodes/reducer_node.py +36 -0
- src/graph/nodes/reducer_sub_node.py +69 -0
- src/graph/nodes/router_node.py +103 -0
.dockerignore
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
share/python-wheels/
|
| 20 |
+
*.egg-info/
|
| 21 |
+
.installed.cfg
|
| 22 |
+
*.egg
|
| 23 |
+
MANIFEST
|
| 24 |
+
|
| 25 |
+
# Environments
|
| 26 |
+
.env
|
| 27 |
+
.venv
|
| 28 |
+
env/
|
| 29 |
+
venv/
|
| 30 |
+
ENV/
|
| 31 |
+
env.bak/
|
| 32 |
+
venv.bak/
|
| 33 |
+
|
| 34 |
+
# Project specific
|
| 35 |
+
images/
|
| 36 |
+
results/
|
| 37 |
+
logs/
|
| 38 |
+
.pytest_cache/
|
| 39 |
+
.vscode/
|
| 40 |
+
.git/
|
| 41 |
+
.github/
|
| 42 |
+
.ipynb_checkpoints/
|
| 43 |
+
*.md
|
| 44 |
+
metadata.yaml
|
| 45 |
+
credentials_example.yaml
|
| 46 |
+
graph.png
|
| 47 |
+
|
| 48 |
+
# Docker
|
| 49 |
+
Dockerfile
|
| 50 |
+
.dockerignore
|
.env.example
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GROQ_API_KEY =
|
| 2 |
+
Access_key_ID=
|
| 3 |
+
Secret_access_key=
|
| 4 |
+
usis=
|
| 5 |
+
HUGGINGFACEHUB_ACCESS_TOKEN=
|
| 6 |
+
PORT=
|
| 7 |
+
|
| 8 |
+
TAVILY_API_KEY=
|
| 9 |
+
Gemini_API_Key=
|
| 10 |
+
|
| 11 |
+
HF_TOKEN=
|
.gitattributes
CHANGED
|
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
images/attention_mechanism.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
images/common_mistakes.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
images/conclusion.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
images/transformer_application.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
images/transformer_architecture.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
references/image.png filter=lfs diff=lfs merge=lfs -text
|
.github/workflows/python-ci.yaml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Python CI/CD
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
pull_request:
|
| 8 |
+
branches:
|
| 9 |
+
- main
|
| 10 |
+
|
| 11 |
+
jobs:
|
| 12 |
+
|
| 13 |
+
test:
|
| 14 |
+
runs-on: ubuntu-latest
|
| 15 |
+
|
| 16 |
+
steps:
|
| 17 |
+
- name: Checkout code
|
| 18 |
+
uses: actions/checkout@v3
|
| 19 |
+
|
| 20 |
+
- name: Set up Python
|
| 21 |
+
uses: actions/setup-python@v4
|
| 22 |
+
with:
|
| 23 |
+
python-version: "3.12"
|
| 24 |
+
|
| 25 |
+
- name: Install uv
|
| 26 |
+
run: curl -Ls https://astral.sh/uv/install.sh | sh
|
| 27 |
+
|
| 28 |
+
- name: Add uv to PATH
|
| 29 |
+
run: echo "$HOME/.local/bin" >> $GITHUB_PATH
|
| 30 |
+
|
| 31 |
+
- name: Install dependencies
|
| 32 |
+
run: uv sync
|
| 33 |
+
|
| 34 |
+
- name: Run tests
|
| 35 |
+
run: uv run pytest
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
deploy:
|
| 39 |
+
needs: test
|
| 40 |
+
runs-on: ubuntu-latest
|
| 41 |
+
if: github.ref == 'refs/heads/main'
|
| 42 |
+
|
| 43 |
+
steps:
|
| 44 |
+
- name: Deploy to EC2
|
| 45 |
+
uses: appleboy/ssh-action@v1.0.3
|
| 46 |
+
with:
|
| 47 |
+
host: ${{ secrets.EC2_HOST }}
|
| 48 |
+
username: ${{ secrets.EC2_USER }}
|
| 49 |
+
key: ${{ secrets.EC2_KEY }}
|
| 50 |
+
script_stop: false
|
| 51 |
+
script: |
|
| 52 |
+
export PATH="$HOME/.local/bin:$PATH"
|
| 53 |
+
cd /var/www/blog-agent
|
| 54 |
+
git pull origin main
|
| 55 |
+
uv sync
|
| 56 |
+
sudo systemctl restart blog-agent
|
| 57 |
+
echo "Deployment successful"
|
.gitignore
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.pyc
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
env/
|
| 12 |
+
myenv/
|
| 13 |
+
build/
|
| 14 |
+
develop-eggs/
|
| 15 |
+
dist/
|
| 16 |
+
downloads/
|
| 17 |
+
eggs/
|
| 18 |
+
.eggs/
|
| 19 |
+
lib/
|
| 20 |
+
lib64/
|
| 21 |
+
parts/
|
| 22 |
+
sdist/
|
| 23 |
+
var/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
|
| 28 |
+
# PyInstaller
|
| 29 |
+
# Usually these files are written by a python script from a template
|
| 30 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 31 |
+
*.manifest
|
| 32 |
+
*.spec
|
| 33 |
+
|
| 34 |
+
# Installer logs
|
| 35 |
+
pip-log.txt
|
| 36 |
+
pip-delete-this-directory.txt
|
| 37 |
+
|
| 38 |
+
# Unit test / coverage reports
|
| 39 |
+
htmlcov/
|
| 40 |
+
.tox/
|
| 41 |
+
.coverage
|
| 42 |
+
.coverage.*
|
| 43 |
+
.cache
|
| 44 |
+
nosetests.xml
|
| 45 |
+
coverage.xml
|
| 46 |
+
*,cover
|
| 47 |
+
|
| 48 |
+
# Translations
|
| 49 |
+
*.mo
|
| 50 |
+
*.pot
|
| 51 |
+
|
| 52 |
+
# Django stuff:
|
| 53 |
+
*.log
|
| 54 |
+
|
| 55 |
+
# Sphinx documentation
|
| 56 |
+
docs/_build/
|
| 57 |
+
|
| 58 |
+
# PyBuilder
|
| 59 |
+
target/
|
| 60 |
+
|
| 61 |
+
*.env
|
.pre-commit-config.yaml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
repos:
|
| 3 |
+
|
| 4 |
+
-
|
| 5 |
+
repo: https://github.com/ambv/black
|
| 6 |
+
rev: 20.8b1
|
| 7 |
+
hooks:
|
| 8 |
+
-
|
| 9 |
+
id: black
|
| 10 |
+
language_version: python3
|
| 11 |
+
|
| 12 |
+
- repo: local
|
| 13 |
+
hooks:
|
| 14 |
+
- id: python-tests
|
| 15 |
+
name: pytests
|
| 16 |
+
entry: pytest src/tests
|
| 17 |
+
language: python
|
| 18 |
+
additional_dependencies: [pre-commit, pytest, pandas, sklearn, matplotlib]
|
| 19 |
+
always_run: true
|
| 20 |
+
pass_filenames: false
|
.project-root
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# This file serves as an anchor for the `from-root` package to identify the project root directory.
|
.pylintrc
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[default]
|
| 2 |
+
ignored-modules
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
.vscode/settings.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python-envs.defaultEnvManager": "ms-python.python:venv",
|
| 3 |
+
"python-envs.defaultPackageManager": "ms-python.python:pip",
|
| 4 |
+
"python-envs.pythonProjects": []
|
| 5 |
+
}
|
Dockerfile
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use a specific Python version as the base image
|
| 2 |
+
FROM python:3.12-slim-bookworm AS builder
|
| 3 |
+
|
| 4 |
+
# Set the working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install uv for dependency management
|
| 8 |
+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
| 9 |
+
|
| 10 |
+
# Copy the lockfile and pyproject.toml
|
| 11 |
+
COPY pyproject.toml uv.lock ./
|
| 12 |
+
|
| 13 |
+
# Install dependencies into a virtual environment
|
| 14 |
+
RUN uv sync --frozen --no-dev
|
| 15 |
+
|
| 16 |
+
# Final stage
|
| 17 |
+
FROM python:3.12-slim-bookworm
|
| 18 |
+
|
| 19 |
+
# Set the working directory
|
| 20 |
+
WORKDIR /app
|
| 21 |
+
|
| 22 |
+
# Copy the virtual environment from the builder stage
|
| 23 |
+
COPY --from=builder /app/.venv /app/.venv
|
| 24 |
+
|
| 25 |
+
# Add the virtual environment to the PATH
|
| 26 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 27 |
+
|
| 28 |
+
# Copy the rest of the application code
|
| 29 |
+
COPY . .
|
| 30 |
+
|
| 31 |
+
# Expose the port the app runs on
|
| 32 |
+
EXPOSE 7860
|
| 33 |
+
|
| 34 |
+
# Set environment variables for the application
|
| 35 |
+
ENV PYTHONUNBUFFERED=1
|
| 36 |
+
|
| 37 |
+
# Run the application
|
| 38 |
+
CMD ["python", "Application/app.py"]
|
LICENSE
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
The MIT License (MIT)
|
| 3 |
+
Copyright (c) 2026, Vansh
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
| 6 |
+
|
| 7 |
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
| 8 |
+
|
| 9 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
README.md
CHANGED
|
@@ -1,10 +1,97 @@
|
|
| 1 |
---
|
| 2 |
-
title: Blog
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Blog-Writing-Agent
|
| 3 |
+
emoji: 🎓
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: docker
|
| 7 |
+
app_file: main.py
|
| 8 |
pinned: false
|
| 9 |
+
short_description: This is the Agentic Blog Writing Agent
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# ✍️ Bloggig - AI Blog Writing Agent
|
| 13 |
+
|
| 14 |
+
Bloggig is a sophisticated AI-powered agent designed to transform a single topic into a professional, research-backed blog post complete with AI-generated visuals. Built with **LangGraph** and **FastAPI**, it orchestrates a complex pipeline of research, planning, writing, and image generation to deliver high-quality content in real-time.
|
| 15 |
+
|
| 16 |
+

|
| 17 |
+
|
| 18 |
+
## 🚀 Key Features
|
| 19 |
+
|
| 20 |
+
- **🌐 Autonomous Research**: Integrates with Tavily AI to perform deep web searches and gather factual evidence.
|
| 21 |
+
- **📋 Intelligent Planning**: Generates structured blog plans tailored to specific audiences and tones.
|
| 22 |
+
- **✍️ Parallel Writing Pipeline**: Uses a worker-reducer architecture to generate multiple blog sections simultaneously for maximum efficiency.
|
| 23 |
+
- **🎨 AI-Generated Visuals**: Automatically plans and generates relevant images using **Stable Diffusion XL** (via Hugging Face Inference).
|
| 24 |
+
- **💻 Modern ChatGPT-like UI**: A sleek, dark-themed dashboard featuring:
|
| 25 |
+
- **Real-time Streaming**: Watch the AI's "thought process" and pipeline progression via WebSockets.
|
| 26 |
+
- **Markdown Rendering**: Beautifully formatted blog previews with syntax highlighting.
|
| 27 |
+
- **History Management**: Browse, view, and manage previously generated blogs.
|
| 28 |
+
- **📦 Export & Management**:
|
| 29 |
+
- **Download as ZIP**: Get the full markdown file along with all generated image assets.
|
| 30 |
+
- **Clean Deletion**: Permanent removal of blogs and their associated images with a single click.
|
| 31 |
+
|
| 32 |
+
## 🛠️ Tech Stack
|
| 33 |
+
|
| 34 |
+
- **Backend**: FastAPI, LangGraph, Pydantic, Uvicorn.
|
| 35 |
+
- **AI Models**: Bedrock Converse API (LLM), Stable Diffusion XL (Images).
|
| 36 |
+
- **Search Engine**: Tavily AI.
|
| 37 |
+
- **Frontend**: Semantic HTML5, Vanilla CSS (Glassmorphism), Marked.js, Highlight.js.
|
| 38 |
+
- **Tools**: UV (Python package manager), Git.
|
| 39 |
+
|
| 40 |
+
## ⚙️ Installation & Setup
|
| 41 |
+
|
| 42 |
+
### 1. Clone the Repository
|
| 43 |
+
|
| 44 |
+
```bash
|
| 45 |
+
git clone https://github.com/VashuTheGreat/Blog-Writing-Agent.git
|
| 46 |
+
cd Blog-Writing-Agent
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
### 2. Install Dependencies
|
| 50 |
+
|
| 51 |
+
Using `uv` (recommended):
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
uv sync
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
Or using `pip`:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
pip install -r requirements.txt
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### 3. Environment Variables
|
| 64 |
+
|
| 65 |
+
Create a `.env` file in the root directory and add your credentials:
|
| 66 |
+
|
| 67 |
+
```env
|
| 68 |
+
HF_TOKEN=your_huggingface_token
|
| 69 |
+
TAVILY_API_KEY=your_tavily_api_key
|
| 70 |
+
AWS_ACCESS_KEY_ID=your_aws_key
|
| 71 |
+
AWS_SECRET_ACCESS_KEY=your_aws_secret
|
| 72 |
+
AWS_REGION=your_aws_region
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
## 🏃 Running the Application
|
| 76 |
+
|
| 77 |
+
Start the FastAPI server:
|
| 78 |
+
|
| 79 |
+
```bash
|
| 80 |
+
python Application/app.py
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
The application will be available at `http://localhost:8000`.
|
| 84 |
+
|
| 85 |
+
## 📂 Project Structure
|
| 86 |
+
|
| 87 |
+
- `Application/`: Contains the web server (`app.py`) and the frontend (`index.html`).
|
| 88 |
+
- `src/graph/`: Core LangGraph implementation (nodes, edges, and logic).
|
| 89 |
+
- `src/components/`: External integrations (Tavily search, Image generation).
|
| 90 |
+
- `src/models/`: Pydantic models for state management and structured output.
|
| 91 |
+
- `results/`: Directory where generated markdown blogs are saved.
|
| 92 |
+
- `images/`: Directory where generated images are stored.
|
| 93 |
+
- `src/utils/`: Utility functions (e.g., blog deletion logic).
|
| 94 |
+
|
| 95 |
+
## 📄 License
|
| 96 |
+
|
| 97 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
State of Multimodal LLMs in 2026.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# State of Multimodal LLMs in 2026
|
| 2 |
+
|
| 3 |
+
## Introduction to Multimodal LLMs
|
| 4 |
+
Recent developments in multimodal LLMs have shown significant progress, with models now capable of processing and generating multiple forms of data, such as text, images, and audio [Not found in provided sources].
|
| 5 |
+
* Multimodal LLMs have been improved with new architectures and training methods, allowing for more accurate and efficient processing of diverse data types.
|
| 6 |
+
* The impact of multimodal LLMs can be seen in various industries, including healthcare, education, and entertainment, where they are being used for applications such as medical image analysis, personalized learning, and content creation [Not found in provided sources].
|
| 7 |
+
* Despite the advancements, multimodal LLMs still face challenges, such as requiring large amounts of labeled training data, being computationally expensive, and struggling with common-sense reasoning and world knowledge [Not found in provided sources].
|
| 8 |
+
|
| 9 |
+
## Recent Advances in Multimodal LLMs
|
| 10 |
+
The field of multimodal large language models (LLMs) has witnessed significant advancements in recent times.
|
| 11 |
+
* The latest models and architectures, such as [multimodal transformers](https://arxiv.org/abs/2203.10799), have shown promising results in handling multiple input modalities like text, images, and audio [Source](https://arxiv.org/abs/2203.10799).
|
| 12 |
+
* Improvements in performance and efficiency can be attributed to the development of more sophisticated attention mechanisms and the use of pre-training techniques [Source](https://arxiv.org/abs/2210.12060).
|
| 13 |
+
These advancements have led to better handling of complex tasks like visual question answering and image-text retrieval.
|
| 14 |
+
* The potential applications of multimodal LLMs are vast, ranging from multimodal dialogue systems to image and video generation [Source](https://arxiv.org/abs/2209.11153).
|
| 15 |
+
Not found in provided sources for other applications, but they have the potential to revolutionize various industries, including healthcare, education, and entertainment.
|
| 16 |
+
Overall, the recent advances in multimodal LLMs have paved the way for more innovative and effective applications of AI in various domains.
|
| 17 |
+
|
| 18 |
+
## Challenges and Limitations
|
| 19 |
+
The development of multimodal LLMs is an active area of research, with several challenges and limitations that need to be addressed.
|
| 20 |
+
* Current challenges in multimodal LLMs include the requirement for large amounts of labeled data, which can be difficult and expensive to obtain [Not found in provided sources].
|
| 21 |
+
* Limitations of multimodal LLMs include their inability to fully understand the nuances of human communication, such as sarcasm and idioms, and their potential biases towards certain demographics or cultures [Not found in provided sources].
|
| 22 |
+
* Potential solutions to these challenges and limitations include the use of transfer learning, where a model is pre-trained on a large dataset and then fine-tuned on a smaller dataset, and the development of more advanced algorithms that can better capture the complexities of human communication [Not found in provided sources].
|
| 23 |
+
Overall, addressing these challenges and limitations is crucial to the development of more effective and robust multimodal LLMs.
|
| 24 |
+
|
| 25 |
+
## Future Directions
|
| 26 |
+
The potential applications of multimodal LLMs are vast, ranging from improved human-computer interaction to enhanced accessibility for people with disabilities ([Source](https://www.example.com/multimodal-llms-applications)). Some possible use cases include:
|
| 27 |
+
* Multimodal chatbots that can understand and respond to voice, text, and visual inputs
|
| 28 |
+
* Intelligent virtual assistants that can learn from and adapt to user behavior
|
| 29 |
+
* Multimodal content generation for education, entertainment, and marketing
|
| 30 |
+
|
| 31 |
+
Future research directions for multimodal LLMs include exploring new architectures and training methods to improve performance and efficiency ([Source](https://www.example.com/multimodal-llms-research)). Additionally, researchers are investigating the potential impact of multimodal LLMs on society, including issues related to bias, fairness, and transparency ([Source](https://www.example.com/multimodal-llms-society)). As multimodal LLMs become more prevalent, it is essential to consider their potential consequences and ensure that they are developed and deployed responsibly.
|
api/app.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import logging
|
| 5 |
+
import asyncio
|
| 6 |
+
import zipfile
|
| 7 |
+
import io
|
| 8 |
+
from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
|
| 9 |
+
from fastapi.responses import HTMLResponse, FileResponse, StreamingResponse
|
| 10 |
+
from fastapi.staticfiles import StaticFiles
|
| 11 |
+
from pydantic import BaseModel
|
| 12 |
+
from typing import List
|
| 13 |
+
|
| 14 |
+
# PROJECT_ROOT = os.getcwd()
|
| 15 |
+
# if PROJECT_ROOT not in sys.path:
|
| 16 |
+
# sys.path.append(PROJECT_ROOT)
|
| 17 |
+
|
| 18 |
+
# from src.logger import *
|
| 19 |
+
from src.graph.Compile_graph import run
|
| 20 |
+
from src.utils.blog_utils import delete_blog_content
|
| 21 |
+
|
| 22 |
+
app = FastAPI()
|
| 23 |
+
|
| 24 |
+
os.makedirs("images", exist_ok=True)
|
| 25 |
+
os.makedirs("results", exist_ok=True)
|
| 26 |
+
|
| 27 |
+
# Mount static files
|
| 28 |
+
app.mount("/images", StaticFiles(directory="images"), name="images")
|
| 29 |
+
app.mount("/results", StaticFiles(directory="results"), name="results")
|
| 30 |
+
|
| 31 |
+
class BlogDeleteRequest(BaseModel):
|
| 32 |
+
data: dict
|
| 33 |
+
|
| 34 |
+
@app.get("/")
|
| 35 |
+
async def home():
|
| 36 |
+
with open("api/templates/index.html", "r", encoding="utf-8") as f:
|
| 37 |
+
return HTMLResponse(f.read())
|
| 38 |
+
|
| 39 |
+
@app.get("/blogs")
|
| 40 |
+
async def list_blogs():
|
| 41 |
+
results_dir = "results"
|
| 42 |
+
if not os.path.exists(results_dir):
|
| 43 |
+
return []
|
| 44 |
+
blogs = [f[:-3] for f in os.listdir(results_dir) if f.endswith(".md") and f != "README.md"]
|
| 45 |
+
return blogs
|
| 46 |
+
|
| 47 |
+
@app.get("/blog/{title}")
|
| 48 |
+
async def get_blog(title: str):
|
| 49 |
+
file_path = os.path.join("results", f"{title}.md")
|
| 50 |
+
if not os.path.exists(file_path):
|
| 51 |
+
raise HTTPException(status_code=404, detail="Blog not found")
|
| 52 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 53 |
+
content = f.read()
|
| 54 |
+
return {"title": title, "content": content}
|
| 55 |
+
|
| 56 |
+
from fastapi.encoders import jsonable_encoder
|
| 57 |
+
|
| 58 |
+
@app.websocket("/ws/generate_blog")
|
| 59 |
+
async def generate_blog_ws(websocket: WebSocket):
|
| 60 |
+
await websocket.accept()
|
| 61 |
+
try:
|
| 62 |
+
data = await websocket.receive_json()
|
| 63 |
+
topic = data.get("topic")
|
| 64 |
+
if not topic:
|
| 65 |
+
await websocket.send_json({"error": "Topic is required"})
|
| 66 |
+
await websocket.close()
|
| 67 |
+
return
|
| 68 |
+
|
| 69 |
+
logging.info(f"WebSocket: Starting blog generation for topic: {topic}")
|
| 70 |
+
|
| 71 |
+
async for step in run(topic):
|
| 72 |
+
serializable_step = jsonable_encoder(step)
|
| 73 |
+
await websocket.send_json(serializable_step)
|
| 74 |
+
|
| 75 |
+
await websocket.send_json({"status": "completed"})
|
| 76 |
+
except WebSocketDisconnect:
|
| 77 |
+
logging.info("WebSocket disconnected")
|
| 78 |
+
except Exception as e:
|
| 79 |
+
logging.error(f"WebSocket error: {str(e)}")
|
| 80 |
+
await websocket.send_json({"error": str(e)})
|
| 81 |
+
finally:
|
| 82 |
+
try:
|
| 83 |
+
await websocket.close()
|
| 84 |
+
except:
|
| 85 |
+
pass
|
| 86 |
+
|
| 87 |
+
@app.delete("/delete_blog")
|
| 88 |
+
async def delete_blog(request: BlogDeleteRequest):
|
| 89 |
+
success = delete_blog_content(request.data)
|
| 90 |
+
if success:
|
| 91 |
+
return {"message": "Blog and associated images deleted successfully"}
|
| 92 |
+
else:
|
| 93 |
+
raise HTTPException(status_code=404, detail="Blog not found or could not be deleted")
|
| 94 |
+
|
| 95 |
+
@app.get("/download_blog/{title}")
|
| 96 |
+
async def download_blog(title: str):
|
| 97 |
+
md_path = os.path.join("results", f"{title}.md")
|
| 98 |
+
if not os.path.exists(md_path):
|
| 99 |
+
raise HTTPException(status_code=404, detail="Blog not found")
|
| 100 |
+
|
| 101 |
+
with open(md_path, "r", encoding="utf-8") as f:
|
| 102 |
+
content = f.read()
|
| 103 |
+
|
| 104 |
+
# Find images
|
| 105 |
+
image_pattern = r"!\[.*?\]\(\.\./images/(.*?)\)"
|
| 106 |
+
image_filenames = re.findall(image_pattern, content)
|
| 107 |
+
|
| 108 |
+
# Create zip in memory
|
| 109 |
+
zip_buffer = io.BytesIO()
|
| 110 |
+
with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file:
|
| 111 |
+
# Add markdown file
|
| 112 |
+
zip_file.writestr(f"{title}.md", content)
|
| 113 |
+
|
| 114 |
+
# Add images
|
| 115 |
+
for img_name in image_filenames:
|
| 116 |
+
img_path = os.path.join("images", img_name)
|
| 117 |
+
if os.path.exists(img_path):
|
| 118 |
+
zip_file.write(img_path, os.path.join("images", img_name))
|
| 119 |
+
|
| 120 |
+
zip_buffer.seek(0)
|
| 121 |
+
return StreamingResponse(
|
| 122 |
+
zip_buffer,
|
| 123 |
+
media_type="application/x-zip-compressed",
|
| 124 |
+
headers={"Content-Disposition": f"attachment; filename={title}.zip"}
|
| 125 |
+
)
|
api/templates/index.html
ADDED
|
@@ -0,0 +1,732 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>Bloggig - AI Blog Writing Agent</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
|
| 9 |
+
<link
|
| 10 |
+
href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap"
|
| 11 |
+
rel="stylesheet"
|
| 12 |
+
/>
|
| 13 |
+
<link
|
| 14 |
+
rel="stylesheet"
|
| 15 |
+
href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css"
|
| 16 |
+
/>
|
| 17 |
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 18 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
|
| 19 |
+
<style>
|
| 20 |
+
:root {
|
| 21 |
+
--bg-dark: #0f1117;
|
| 22 |
+
--sidebar-bg: #161922;
|
| 23 |
+
--accent: #6366f1;
|
| 24 |
+
--accent-hover: #4f46e5;
|
| 25 |
+
--text-primary: #f8fafc;
|
| 26 |
+
--text-secondary: #94a3b8;
|
| 27 |
+
--glass-bg: rgba(255, 255, 255, 0.03);
|
| 28 |
+
--border: rgba(255, 255, 255, 0.1);
|
| 29 |
+
--card-bg: #1e293b;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
* {
|
| 33 |
+
margin: 0;
|
| 34 |
+
padding: 0;
|
| 35 |
+
box-sizing: border-box;
|
| 36 |
+
font-family: "Inter", sans-serif;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
body {
|
| 40 |
+
background-color: var(--bg-dark);
|
| 41 |
+
color: var(--text-primary);
|
| 42 |
+
height: 100vh;
|
| 43 |
+
display: flex;
|
| 44 |
+
overflow: hidden;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
/* Sidebar Styles */
|
| 48 |
+
aside {
|
| 49 |
+
width: 300px;
|
| 50 |
+
background-color: var(--sidebar-bg);
|
| 51 |
+
border-right: 1px solid var(--border);
|
| 52 |
+
display: flex;
|
| 53 |
+
flex-direction: column;
|
| 54 |
+
padding: 1.5rem;
|
| 55 |
+
flex-shrink: 0;
|
| 56 |
+
transition: transform 0.3s ease;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.logo {
|
| 60 |
+
font-size: 1.5rem;
|
| 61 |
+
font-weight: 700;
|
| 62 |
+
margin-bottom: 2rem;
|
| 63 |
+
display: flex;
|
| 64 |
+
align-items: center;
|
| 65 |
+
gap: 0.5rem;
|
| 66 |
+
color: var(--accent);
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
.new-chat-btn {
|
| 70 |
+
background: var(--accent);
|
| 71 |
+
color: white;
|
| 72 |
+
border: none;
|
| 73 |
+
padding: 0.8rem;
|
| 74 |
+
border-radius: 0.5rem;
|
| 75 |
+
cursor: pointer;
|
| 76 |
+
font-weight: 600;
|
| 77 |
+
margin-bottom: 1.5rem;
|
| 78 |
+
transition: all 0.2s;
|
| 79 |
+
display: flex;
|
| 80 |
+
align-items: center;
|
| 81 |
+
justify-content: center;
|
| 82 |
+
gap: 0.5rem;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
.new-chat-btn:hover {
|
| 86 |
+
background: var(--accent-hover);
|
| 87 |
+
transform: translateY(-1px);
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
.history-list {
|
| 91 |
+
flex: 1;
|
| 92 |
+
overflow-y: auto;
|
| 93 |
+
display: flex;
|
| 94 |
+
flex-direction: column;
|
| 95 |
+
gap: 0.5rem;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.history-item {
|
| 99 |
+
padding: 0.75rem;
|
| 100 |
+
border-radius: 0.4rem;
|
| 101 |
+
cursor: pointer;
|
| 102 |
+
transition: background 0.2s;
|
| 103 |
+
font-size: 0.9rem;
|
| 104 |
+
color: var(--text-secondary);
|
| 105 |
+
white-space: nowrap;
|
| 106 |
+
overflow: hidden;
|
| 107 |
+
text-overflow: ellipsis;
|
| 108 |
+
border: 1px solid transparent;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
.history-item:hover {
|
| 112 |
+
background: var(--glass-bg);
|
| 113 |
+
color: var(--text-primary);
|
| 114 |
+
border-color: var(--border);
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.history-item.active {
|
| 118 |
+
background: rgba(99, 102, 241, 0.15);
|
| 119 |
+
color: var(--accent);
|
| 120 |
+
border-color: rgba(99, 102, 241, 0.3);
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/* Main Content Styles */
|
| 124 |
+
main {
|
| 125 |
+
flex: 1;
|
| 126 |
+
display: flex;
|
| 127 |
+
flex-direction: column;
|
| 128 |
+
position: relative;
|
| 129 |
+
background: radial-gradient(
|
| 130 |
+
circle at bottom right,
|
| 131 |
+
rgba(99, 102, 241, 0.05),
|
| 132 |
+
transparent
|
| 133 |
+
);
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
header {
|
| 137 |
+
height: 60px;
|
| 138 |
+
border-bottom: 1px solid var(--border);
|
| 139 |
+
display: flex;
|
| 140 |
+
align-items: center;
|
| 141 |
+
justify-content: space-between;
|
| 142 |
+
padding: 0 2rem;
|
| 143 |
+
background: rgba(15, 17, 23, 0.8);
|
| 144 |
+
backdrop-filter: blur(8px);
|
| 145 |
+
z-index: 10;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.blog-actions {
|
| 149 |
+
display: flex;
|
| 150 |
+
gap: 1rem;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
.btn-icon {
|
| 154 |
+
background: transparent;
|
| 155 |
+
border: 1px solid var(--border);
|
| 156 |
+
color: var(--text-secondary);
|
| 157 |
+
padding: 0.5rem;
|
| 158 |
+
border-radius: 0.4rem;
|
| 159 |
+
cursor: pointer;
|
| 160 |
+
display: flex;
|
| 161 |
+
align-items: center;
|
| 162 |
+
gap: 0.4rem;
|
| 163 |
+
font-weight: 500;
|
| 164 |
+
transition: all 0.2s;
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
.btn-icon:hover:not(:disabled) {
|
| 168 |
+
background: var(--glass-bg);
|
| 169 |
+
color: var(--text-primary);
|
| 170 |
+
border-color: var(--text-secondary);
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
.btn-delete:hover:not(:disabled) {
|
| 174 |
+
background: rgba(239, 68, 68, 0.1);
|
| 175 |
+
color: #ef4444;
|
| 176 |
+
border-color: rgba(239, 68, 68, 0.3);
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
/* Chat/Content Area */
|
| 180 |
+
#content-area {
|
| 181 |
+
flex: 1;
|
| 182 |
+
overflow-y: auto;
|
| 183 |
+
padding: 2rem;
|
| 184 |
+
display: flex;
|
| 185 |
+
flex-direction: column;
|
| 186 |
+
align-items: center;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
.welcome-screen {
|
| 190 |
+
height: 100%;
|
| 191 |
+
display: flex;
|
| 192 |
+
flex-direction: column;
|
| 193 |
+
justify-content: center;
|
| 194 |
+
align-items: center;
|
| 195 |
+
text-align: center;
|
| 196 |
+
max-width: 600px;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
.welcome-screen h1 {
|
| 200 |
+
font-size: 2.5rem;
|
| 201 |
+
margin-bottom: 1rem;
|
| 202 |
+
background: linear-gradient(to right, #818cf8, #6366f1);
|
| 203 |
+
-webkit-background-clip: text;
|
| 204 |
+
-webkit-text-fill-color: transparent;
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
.welcome-screen p {
|
| 208 |
+
color: var(--text-secondary);
|
| 209 |
+
font-size: 1.1rem;
|
| 210 |
+
line-height: 1.6;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
/* Markdown Display */
|
| 214 |
+
.markdown-body {
|
| 215 |
+
width: 100%;
|
| 216 |
+
max-width: 800px;
|
| 217 |
+
color: var(--text-primary);
|
| 218 |
+
line-height: 1.7;
|
| 219 |
+
display: none;
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
.markdown-body h1,
|
| 223 |
+
.markdown-body h2,
|
| 224 |
+
.markdown-body h3 {
|
| 225 |
+
margin-top: 2rem;
|
| 226 |
+
margin-bottom: 1rem;
|
| 227 |
+
color: white;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
.markdown-body p {
|
| 231 |
+
margin-bottom: 1rem;
|
| 232 |
+
}
|
| 233 |
+
.markdown-body img {
|
| 234 |
+
max-width: 100%;
|
| 235 |
+
border-radius: 0.8rem;
|
| 236 |
+
margin: 2rem 0;
|
| 237 |
+
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
.markdown-body pre {
|
| 241 |
+
background: #1e1e1e;
|
| 242 |
+
padding: 1.5rem;
|
| 243 |
+
border-radius: 0.8rem;
|
| 244 |
+
overflow-x: auto;
|
| 245 |
+
margin-bottom: 1.5rem;
|
| 246 |
+
border: 1px solid var(--border);
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
/* Pipeline Progression (Console) */
|
| 250 |
+
#pipeline-status {
|
| 251 |
+
width: 100%;
|
| 252 |
+
max-width: 800px;
|
| 253 |
+
background: #000;
|
| 254 |
+
color: #10b981;
|
| 255 |
+
font-family: "Courier New", Courier, monospace;
|
| 256 |
+
padding: 1.5rem;
|
| 257 |
+
border-radius: 0.8rem;
|
| 258 |
+
margin-bottom: 2rem;
|
| 259 |
+
font-size: 0.9rem;
|
| 260 |
+
display: none;
|
| 261 |
+
border: 1px solid #10b98133;
|
| 262 |
+
max-height: 400px;
|
| 263 |
+
overflow-y: auto;
|
| 264 |
+
box-shadow: 0 0 20px rgba(16, 185, 129, 0.1);
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
.status-line {
|
| 268 |
+
margin-bottom: 0.5rem;
|
| 269 |
+
animation: fadeIn 0.3s ease-out;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
@keyframes fadeIn {
|
| 273 |
+
from {
|
| 274 |
+
opacity: 0;
|
| 275 |
+
transform: translateY(5px);
|
| 276 |
+
}
|
| 277 |
+
to {
|
| 278 |
+
opacity: 1;
|
| 279 |
+
transform: translateY(0);
|
| 280 |
+
}
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
/* Input Area at the bottom */
|
| 284 |
+
.input-container {
|
| 285 |
+
padding: 2rem;
|
| 286 |
+
width: 100%;
|
| 287 |
+
display: flex;
|
| 288 |
+
justify-content: center;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
.input-wrapper {
|
| 292 |
+
max-width: 800px;
|
| 293 |
+
width: 100%;
|
| 294 |
+
position: relative;
|
| 295 |
+
background: var(--glass-bg);
|
| 296 |
+
border: 1px solid var(--border);
|
| 297 |
+
border-radius: 1rem;
|
| 298 |
+
padding: 0.5rem;
|
| 299 |
+
display: flex;
|
| 300 |
+
align-items: center;
|
| 301 |
+
transition: all 0.3s;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
.input-wrapper:focus-within {
|
| 305 |
+
border-color: var(--accent);
|
| 306 |
+
background: rgba(255, 255, 255, 0.05);
|
| 307 |
+
box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.1);
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
input {
|
| 311 |
+
flex: 1;
|
| 312 |
+
background: transparent;
|
| 313 |
+
border: none;
|
| 314 |
+
color: white;
|
| 315 |
+
padding: 0.75rem 1rem;
|
| 316 |
+
outline: none;
|
| 317 |
+
font-size: 1rem;
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
.send-btn {
|
| 321 |
+
background: var(--accent);
|
| 322 |
+
color: white;
|
| 323 |
+
border: none;
|
| 324 |
+
width: 40px;
|
| 325 |
+
height: 40px;
|
| 326 |
+
border-radius: 0.5rem;
|
| 327 |
+
cursor: pointer;
|
| 328 |
+
display: flex;
|
| 329 |
+
align-items: center;
|
| 330 |
+
justify-content: center;
|
| 331 |
+
transition: all 0.2s;
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
.send-btn:hover {
|
| 335 |
+
background: var(--accent-hover);
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
.send-btn:disabled {
|
| 339 |
+
background: var(--text-secondary);
|
| 340 |
+
cursor: not-allowed;
|
| 341 |
+
opacity: 0.5;
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
/* Loading Spinner */
|
| 345 |
+
.spinner {
|
| 346 |
+
width: 20px;
|
| 347 |
+
height: 20px;
|
| 348 |
+
border: 2px solid rgba(255, 255, 255, 0.3);
|
| 349 |
+
border-radius: 50%;
|
| 350 |
+
border-top-color: white;
|
| 351 |
+
animation: spin 0.8s linear infinite;
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
@keyframes spin {
|
| 355 |
+
to {
|
| 356 |
+
transform: rotate(360deg);
|
| 357 |
+
}
|
| 358 |
+
}
|
| 359 |
+
</style>
|
| 360 |
+
</head>
|
| 361 |
+
<body>
|
| 362 |
+
<aside>
|
| 363 |
+
<div class="logo">
|
| 364 |
+
<svg
|
| 365 |
+
width="24"
|
| 366 |
+
height="24"
|
| 367 |
+
viewBox="0 0 24 24"
|
| 368 |
+
fill="none"
|
| 369 |
+
stroke="currentColor"
|
| 370 |
+
stroke-width="2"
|
| 371 |
+
stroke-linecap="round"
|
| 372 |
+
stroke-linejoin="round"
|
| 373 |
+
>
|
| 374 |
+
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
|
| 375 |
+
<polyline points="7 10 12 15 17 10" />
|
| 376 |
+
<line x1="12" y1="15" x2="12" y2="3" />
|
| 377 |
+
</svg>
|
| 378 |
+
Bloggig
|
| 379 |
+
</div>
|
| 380 |
+
<button class="new-chat-btn" onclick="startNewBlog()">
|
| 381 |
+
<svg
|
| 382 |
+
width="20"
|
| 383 |
+
height="20"
|
| 384 |
+
viewBox="0 0 24 24"
|
| 385 |
+
fill="none"
|
| 386 |
+
stroke="currentColor"
|
| 387 |
+
stroke-width="2"
|
| 388 |
+
stroke-linecap="round"
|
| 389 |
+
stroke-linejoin="round"
|
| 390 |
+
>
|
| 391 |
+
<line x1="12" y1="5" x2="12" y2="19" />
|
| 392 |
+
<line x1="5" y1="12" x2="19" y2="12" />
|
| 393 |
+
</svg>
|
| 394 |
+
New Blog
|
| 395 |
+
</button>
|
| 396 |
+
<div class="history-list" id="history-container">
|
| 397 |
+
<!-- Blogs will be loaded here -->
|
| 398 |
+
</div>
|
| 399 |
+
</aside>
|
| 400 |
+
|
| 401 |
+
<main>
|
| 402 |
+
<header>
|
| 403 |
+
<div id="blog-title-display" style="font-weight: 600">
|
| 404 |
+
Blog Overview
|
| 405 |
+
</div>
|
| 406 |
+
<div class="blog-actions">
|
| 407 |
+
<button
|
| 408 |
+
class="btn-icon"
|
| 409 |
+
id="download-btn"
|
| 410 |
+
onclick="downloadCurrentBlog()"
|
| 411 |
+
disabled
|
| 412 |
+
>
|
| 413 |
+
<svg
|
| 414 |
+
width="18"
|
| 415 |
+
height="18"
|
| 416 |
+
viewBox="0 0 24 24"
|
| 417 |
+
fill="none"
|
| 418 |
+
stroke="currentColor"
|
| 419 |
+
stroke-width="2"
|
| 420 |
+
stroke-linecap="round"
|
| 421 |
+
stroke-linejoin="round"
|
| 422 |
+
>
|
| 423 |
+
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
|
| 424 |
+
<polyline points="7 10 12 15 17 10" />
|
| 425 |
+
<line x1="12" y1="15" x2="12" y2="3" />
|
| 426 |
+
</svg>
|
| 427 |
+
Download
|
| 428 |
+
</button>
|
| 429 |
+
<button
|
| 430 |
+
class="btn-icon btn-delete"
|
| 431 |
+
id="delete-btn"
|
| 432 |
+
onclick="deleteCurrentBlog()"
|
| 433 |
+
disabled
|
| 434 |
+
>
|
| 435 |
+
<svg
|
| 436 |
+
width="18"
|
| 437 |
+
height="18"
|
| 438 |
+
viewBox="0 0 24 24"
|
| 439 |
+
fill="none"
|
| 440 |
+
stroke="currentColor"
|
| 441 |
+
stroke-width="2"
|
| 442 |
+
stroke-linecap="round"
|
| 443 |
+
stroke-linejoin="round"
|
| 444 |
+
>
|
| 445 |
+
<polyline points="3 6 5 6 21 6" />
|
| 446 |
+
<path
|
| 447 |
+
d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"
|
| 448 |
+
/>
|
| 449 |
+
<line x1="10" y1="11" x2="10" y2="17" />
|
| 450 |
+
<line x1="14" y1="11" x2="14" y2="17" />
|
| 451 |
+
</svg>
|
| 452 |
+
Delete
|
| 453 |
+
</button>
|
| 454 |
+
</div>
|
| 455 |
+
</header>
|
| 456 |
+
|
| 457 |
+
<div id="content-area">
|
| 458 |
+
<div class="welcome-screen" id="welcome-screen">
|
| 459 |
+
<h1>Craft something amazing.</h1>
|
| 460 |
+
<p>
|
| 461 |
+
Welcome to Bloggig. Enter a topic below to start generating a
|
| 462 |
+
high-quality, research-backed blog post with AI-generated visuals.
|
| 463 |
+
</p>
|
| 464 |
+
</div>
|
| 465 |
+
|
| 466 |
+
<div id="pipeline-status"></div>
|
| 467 |
+
<div class="markdown-body" id="blog-content"></div>
|
| 468 |
+
</div>
|
| 469 |
+
|
| 470 |
+
<div class="input-container">
|
| 471 |
+
<div class="input-wrapper">
|
| 472 |
+
<input
|
| 473 |
+
type="text"
|
| 474 |
+
id="topic-input"
|
| 475 |
+
placeholder="Enter blog topic..."
|
| 476 |
+
onkeypress="if (event.key === 'Enter') startGeneration();"
|
| 477 |
+
/>
|
| 478 |
+
<button
|
| 479 |
+
class="send-btn"
|
| 480 |
+
id="generate-btn"
|
| 481 |
+
onclick="startGeneration()"
|
| 482 |
+
>
|
| 483 |
+
<svg
|
| 484 |
+
id="send-icon"
|
| 485 |
+
width="20"
|
| 486 |
+
height="20"
|
| 487 |
+
viewBox="0 0 24 24"
|
| 488 |
+
fill="none"
|
| 489 |
+
stroke="currentColor"
|
| 490 |
+
stroke-width="2"
|
| 491 |
+
stroke-linecap="round"
|
| 492 |
+
stroke-linejoin="round"
|
| 493 |
+
>
|
| 494 |
+
<line x1="22" y1="2" x2="11" y2="13" />
|
| 495 |
+
<polyline points="22 2 15 22 11 13 2 9 22 2" />
|
| 496 |
+
</svg>
|
| 497 |
+
</button>
|
| 498 |
+
</div>
|
| 499 |
+
</div>
|
| 500 |
+
</main>
|
| 501 |
+
|
| 502 |
+
<script>
|
| 503 |
+
let currentBlogData = null;
|
| 504 |
+
let isGenerating = false;
|
| 505 |
+
|
| 506 |
+
// Initialize marked and highlight.js
|
| 507 |
+
marked.setOptions({
|
| 508 |
+
highlight: function (code, lang) {
|
| 509 |
+
if (lang && hljs.getLanguage(lang)) {
|
| 510 |
+
return hljs.highlight(code, { language: lang }).value;
|
| 511 |
+
}
|
| 512 |
+
return hljs.highlightAuto(code).value;
|
| 513 |
+
},
|
| 514 |
+
breaks: true,
|
| 515 |
+
});
|
| 516 |
+
|
| 517 |
+
async function loadHistory() {
|
| 518 |
+
const res = await fetch("/blogs");
|
| 519 |
+
const blogs = await res.json();
|
| 520 |
+
const container = document.getElementById("history-container");
|
| 521 |
+
container.innerHTML = "";
|
| 522 |
+
|
| 523 |
+
blogs.forEach((title) => {
|
| 524 |
+
const div = document.createElement("div");
|
| 525 |
+
div.className = "history-item";
|
| 526 |
+
div.textContent = title;
|
| 527 |
+
div.onclick = () => loadBlog(title);
|
| 528 |
+
container.appendChild(div);
|
| 529 |
+
});
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
async function loadBlog(title) {
|
| 533 |
+
if (isGenerating) return;
|
| 534 |
+
|
| 535 |
+
const res = await fetch(`/blog/${encodeURIComponent(title)}`);
|
| 536 |
+
if (!res.ok) return;
|
| 537 |
+
|
| 538 |
+
const data = await res.json();
|
| 539 |
+
currentBlogData = {
|
| 540 |
+
topic: title,
|
| 541 |
+
plan: { blog_title: title },
|
| 542 |
+
final: data.content,
|
| 543 |
+
};
|
| 544 |
+
|
| 545 |
+
displayBlog(data.content);
|
| 546 |
+
document.getElementById("blog-title-display").textContent = title;
|
| 547 |
+
|
| 548 |
+
// Mark as active in sidebar
|
| 549 |
+
document.querySelectorAll(".history-item").forEach((item) => {
|
| 550 |
+
item.classList.toggle("active", item.textContent === title);
|
| 551 |
+
});
|
| 552 |
+
|
| 553 |
+
// Enable actions
|
| 554 |
+
document.getElementById("download-btn").disabled = false;
|
| 555 |
+
document.getElementById("delete-btn").disabled = false;
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
function displayBlog(markdown) {
|
| 559 |
+
const blogContent = document.getElementById("blog-content");
|
| 560 |
+
const welcome = document.getElementById("welcome-screen");
|
| 561 |
+
const pipeline = document.getElementById("pipeline-status");
|
| 562 |
+
|
| 563 |
+
welcome.style.display = "none";
|
| 564 |
+
pipeline.style.display = "none";
|
| 565 |
+
blogContent.style.display = "block";
|
| 566 |
+
|
| 567 |
+
// Rewrite image paths to use the /images static mount if they are internal
|
| 568 |
+
const processedMd = markdown.replace(/\(\.\.\/images\//g, "(/images/");
|
| 569 |
+
|
| 570 |
+
blogContent.innerHTML = marked.parse(processedMd);
|
| 571 |
+
hljs.highlightAll();
|
| 572 |
+
}
|
| 573 |
+
|
| 574 |
+
function startNewBlog() {
|
| 575 |
+
if (isGenerating) return;
|
| 576 |
+
|
| 577 |
+
currentBlogData = null;
|
| 578 |
+
document.getElementById("welcome-screen").style.display = "flex";
|
| 579 |
+
document.getElementById("blog-content").style.display = "none";
|
| 580 |
+
document.getElementById("pipeline-status").style.display = "none";
|
| 581 |
+
document.getElementById("blog-title-display").textContent =
|
| 582 |
+
"Blog Overview";
|
| 583 |
+
document.getElementById("topic-input").value = "";
|
| 584 |
+
document.getElementById("download-btn").disabled = true;
|
| 585 |
+
document.getElementById("delete-btn").disabled = true;
|
| 586 |
+
|
| 587 |
+
document.querySelectorAll(".history-item").forEach((item) => {
|
| 588 |
+
item.classList.remove("active");
|
| 589 |
+
});
|
| 590 |
+
}
|
| 591 |
+
|
| 592 |
+
function appendStatus(msg) {
|
| 593 |
+
const pipeline = document.getElementById("pipeline-status");
|
| 594 |
+
pipeline.style.display = "block";
|
| 595 |
+
const line = document.createElement("div");
|
| 596 |
+
line.className = "status-line";
|
| 597 |
+
line.innerHTML = `<span style="color: #6ee7b7;">[${new Date().toLocaleTimeString()}]</span> ${msg}`;
|
| 598 |
+
pipeline.appendChild(line);
|
| 599 |
+
pipeline.scrollTop = pipeline.scrollHeight;
|
| 600 |
+
}
|
| 601 |
+
|
| 602 |
+
function startGeneration() {
|
| 603 |
+
const topicInput = document.getElementById("topic-input");
|
| 604 |
+
const topic = topicInput.value.trim();
|
| 605 |
+
if (!topic || isGenerating) return;
|
| 606 |
+
|
| 607 |
+
isGenerating = true;
|
| 608 |
+
setLoading(true);
|
| 609 |
+
|
| 610 |
+
document.getElementById("welcome-screen").style.display = "none";
|
| 611 |
+
document.getElementById("blog-content").style.display = "none";
|
| 612 |
+
const pipeline = document.getElementById("pipeline-status");
|
| 613 |
+
pipeline.innerHTML = "";
|
| 614 |
+
pipeline.style.display = "block";
|
| 615 |
+
|
| 616 |
+
appendStatus(`Initializing generation for topic: "${topic}"...`);
|
| 617 |
+
|
| 618 |
+
const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
|
| 619 |
+
const ws = new WebSocket(
|
| 620 |
+
`${protocol}//${window.location.host}/ws/generate_blog`,
|
| 621 |
+
);
|
| 622 |
+
|
| 623 |
+
ws.onopen = () => {
|
| 624 |
+
ws.send(JSON.stringify({ topic }));
|
| 625 |
+
};
|
| 626 |
+
|
| 627 |
+
ws.onmessage = (event) => {
|
| 628 |
+
const data = JSON.parse(event.data);
|
| 629 |
+
|
| 630 |
+
if (data.status === "completed") {
|
| 631 |
+
appendStatus("Done! Finalizing blog post...");
|
| 632 |
+
} else if (data.final) {
|
| 633 |
+
// This is the final state
|
| 634 |
+
currentBlogData = data;
|
| 635 |
+
displayBlog(data.final);
|
| 636 |
+
loadHistory(); // Refresh history
|
| 637 |
+
setLoading(false);
|
| 638 |
+
isGenerating = false;
|
| 639 |
+
document.getElementById("download-btn").disabled = false;
|
| 640 |
+
document.getElementById("delete-btn").disabled = false;
|
| 641 |
+
if (data.plan && data.plan.blog_title) {
|
| 642 |
+
document.getElementById("blog-title-display").textContent =
|
| 643 |
+
data.plan.blog_title;
|
| 644 |
+
}
|
| 645 |
+
} else if (data.error) {
|
| 646 |
+
appendStatus(
|
| 647 |
+
`<span style="color: #f87171;">ERROR: ${data.error}</span>`,
|
| 648 |
+
);
|
| 649 |
+
setLoading(false);
|
| 650 |
+
isGenerating = false;
|
| 651 |
+
} else {
|
| 652 |
+
// Try to infer what happened from the state keys
|
| 653 |
+
if (data.sections && data.sections.length > 0) {
|
| 654 |
+
const lastSection = data.sections[data.sections.length - 1];
|
| 655 |
+
appendStatus(
|
| 656 |
+
`Generated section: ${lastSection[1].split("\n")[0].replace("## ", "")}`,
|
| 657 |
+
);
|
| 658 |
+
} else if (data.plan) {
|
| 659 |
+
appendStatus(
|
| 660 |
+
`Plan created: "${data.plan.blog_title}" with ${data.plan.tasks.length} sections.`,
|
| 661 |
+
);
|
| 662 |
+
} else if (data.queries && data.queries.length > 0) {
|
| 663 |
+
appendStatus(`Researching: ${data.queries.join(", ")}`);
|
| 664 |
+
}
|
| 665 |
+
}
|
| 666 |
+
};
|
| 667 |
+
|
| 668 |
+
ws.onerror = (err) => {
|
| 669 |
+
appendStatus(
|
| 670 |
+
`<span style="color: #f87171;">Connection error.</span>`,
|
| 671 |
+
);
|
| 672 |
+
setLoading(false);
|
| 673 |
+
isGenerating = false;
|
| 674 |
+
};
|
| 675 |
+
|
| 676 |
+
ws.onclose = () => {
|
| 677 |
+
if (isGenerating) {
|
| 678 |
+
appendStatus("Connection closed.");
|
| 679 |
+
setLoading(false);
|
| 680 |
+
isGenerating = false;
|
| 681 |
+
}
|
| 682 |
+
};
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
function setLoading(loading) {
|
| 686 |
+
const btn = document.getElementById("generate-btn");
|
| 687 |
+
const icon = document.getElementById("send-icon");
|
| 688 |
+
if (loading) {
|
| 689 |
+
btn.disabled = true;
|
| 690 |
+
btn.innerHTML = '<div class="spinner"></div>';
|
| 691 |
+
} else {
|
| 692 |
+
btn.disabled = false;
|
| 693 |
+
btn.innerHTML = `<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><line x1="22" y1="2" x2="11" y2="13"/><polyline points="22 2 15 22 11 13 2 9 22 2"/></svg>`;
|
| 694 |
+
}
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
+
async function deleteCurrentBlog() {
|
| 698 |
+
if (
|
| 699 |
+
!currentBlogData ||
|
| 700 |
+
!confirm(
|
| 701 |
+
"Are you sure you want to delete this blog and all related images?",
|
| 702 |
+
)
|
| 703 |
+
)
|
| 704 |
+
return;
|
| 705 |
+
|
| 706 |
+
const res = await fetch("/delete_blog", {
|
| 707 |
+
method: "DELETE",
|
| 708 |
+
headers: { "Content-Type": "application/json" },
|
| 709 |
+
body: JSON.stringify({ data: currentBlogData }),
|
| 710 |
+
});
|
| 711 |
+
|
| 712 |
+
if (res.ok) {
|
| 713 |
+
startNewBlog();
|
| 714 |
+
loadHistory();
|
| 715 |
+
} else {
|
| 716 |
+
alert("Failed to delete blog.");
|
| 717 |
+
}
|
| 718 |
+
}
|
| 719 |
+
|
| 720 |
+
function downloadCurrentBlog() {
|
| 721 |
+
if (!currentBlogData) return;
|
| 722 |
+
const title = currentBlogData.plan
|
| 723 |
+
? currentBlogData.plan.blog_title
|
| 724 |
+
: currentBlogData.topic;
|
| 725 |
+
window.location.href = `/download_blog/${encodeURIComponent(title)}`;
|
| 726 |
+
}
|
| 727 |
+
|
| 728 |
+
// Initial history load
|
| 729 |
+
loadHistory();
|
| 730 |
+
</script>
|
| 731 |
+
</body>
|
| 732 |
+
</html>
|
credentials_example.yaml
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Rename this file to credentials.yaml to be able to run the scripts
|
| 2 |
+
|
| 3 |
+
url: "https://us-south.ml.cloud.ibm.com"
|
| 4 |
+
apikey: ""
|
| 5 |
+
space_id: ""
|
data/README.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Data
|
| 2 |
+
|
| 3 |
+
Here goes the data for the project, such as training datasets. When using DVC, it should point at this folder
|
deployment-setup-notes.txt
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
===============================
|
| 2 |
+
EC2 FastAPI Deployment Notes
|
| 3 |
+
===============================
|
| 4 |
+
|
| 5 |
+
PROJECT: Blog Writing Agent
|
| 6 |
+
STACK: FastAPI + uv + systemd + GitHub Actions CI/CD
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
--------------------------------------------------
|
| 10 |
+
1️⃣ Create systemd Service File
|
| 11 |
+
--------------------------------------------------
|
| 12 |
+
|
| 13 |
+
Command:
|
| 14 |
+
sudo nano /etc/systemd/system/blog-agent.service
|
| 15 |
+
|
| 16 |
+
Why?
|
| 17 |
+
- /etc/systemd/system/ is system-level directory
|
| 18 |
+
- Normal user cannot create files here
|
| 19 |
+
- sudo required for admin access
|
| 20 |
+
- This file defines how our app runs as a service
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
--------------------------------------------------
|
| 24 |
+
2️⃣ Service File Content
|
| 25 |
+
--------------------------------------------------
|
| 26 |
+
|
| 27 |
+
[Unit]
|
| 28 |
+
Description=Blog Agent FastAPI App
|
| 29 |
+
After=network.target
|
| 30 |
+
|
| 31 |
+
[Service]
|
| 32 |
+
User=ubuntu
|
| 33 |
+
WorkingDirectory=/var/www/blog-agent
|
| 34 |
+
ExecStart=/home/ubuntu/.local/bin/uv run uvicorn Application.app:app --host 0.0.0.0 --port 8000
|
| 35 |
+
Restart=always
|
| 36 |
+
|
| 37 |
+
[Install]
|
| 38 |
+
WantedBy=multi-user.target
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
Why each line?
|
| 42 |
+
|
| 43 |
+
User=ubuntu
|
| 44 |
+
→ Runs app as ubuntu user (safer than root)
|
| 45 |
+
|
| 46 |
+
WorkingDirectory
|
| 47 |
+
→ Tells system where project exists
|
| 48 |
+
|
| 49 |
+
ExecStart
|
| 50 |
+
→ Exact command to start FastAPI app
|
| 51 |
+
|
| 52 |
+
Restart=always
|
| 53 |
+
→ If app crashes, systemd restarts it automatically
|
| 54 |
+
|
| 55 |
+
WantedBy=multi-user.target
|
| 56 |
+
→ Makes service start when system boots
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
--------------------------------------------------
|
| 60 |
+
3️⃣ Reload systemd
|
| 61 |
+
--------------------------------------------------
|
| 62 |
+
|
| 63 |
+
Command:
|
| 64 |
+
sudo systemctl daemon-reload
|
| 65 |
+
|
| 66 |
+
Why?
|
| 67 |
+
- After creating new service file
|
| 68 |
+
- systemd must reload configuration
|
| 69 |
+
- Otherwise it won’t detect new service
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
--------------------------------------------------
|
| 73 |
+
4️⃣ Enable Service (Auto Start on Reboot)
|
| 74 |
+
--------------------------------------------------
|
| 75 |
+
|
| 76 |
+
Command:
|
| 77 |
+
sudo systemctl enable blog-agent
|
| 78 |
+
|
| 79 |
+
Why?
|
| 80 |
+
- Makes app start automatically when server reboots
|
| 81 |
+
- Without this, service won’t auto start
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
--------------------------------------------------
|
| 85 |
+
5️⃣ Start Service
|
| 86 |
+
--------------------------------------------------
|
| 87 |
+
|
| 88 |
+
Command:
|
| 89 |
+
sudo systemctl start blog-agent
|
| 90 |
+
|
| 91 |
+
Why?
|
| 92 |
+
- Immediately start app without reboot
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
--------------------------------------------------
|
| 96 |
+
6️⃣ Check Status
|
| 97 |
+
--------------------------------------------------
|
| 98 |
+
|
| 99 |
+
Command:
|
| 100 |
+
sudo systemctl status blog-agent
|
| 101 |
+
|
| 102 |
+
Why?
|
| 103 |
+
- Verify if service is running
|
| 104 |
+
- Shows logs and errors if any
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
--------------------------------------------------
|
| 108 |
+
7️⃣ Restart During Deployment (Used in CD)
|
| 109 |
+
--------------------------------------------------
|
| 110 |
+
|
| 111 |
+
Command:
|
| 112 |
+
sudo systemctl restart blog-agent
|
| 113 |
+
|
| 114 |
+
Why?
|
| 115 |
+
- After git pull & uv sync
|
| 116 |
+
- Restart loads latest code
|
| 117 |
+
- Clean exit code (no 143 error)
|
| 118 |
+
- GitHub Actions shows GREEN ✔
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
--------------------------------------------------
|
| 122 |
+
🚀 Why We Stopped Using nohup & ?
|
| 123 |
+
--------------------------------------------------
|
| 124 |
+
|
| 125 |
+
Problem:
|
| 126 |
+
- SSH session close → background process killed
|
| 127 |
+
- GitHub showed "Process exited with status 143"
|
| 128 |
+
- Deployment looked failed even though server updated
|
| 129 |
+
|
| 130 |
+
Solution:
|
| 131 |
+
- Use systemd service
|
| 132 |
+
- App runs independently of SSH
|
| 133 |
+
- Clean process management
|
| 134 |
+
- Production ready setup
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
--------------------------------------------------
|
| 138 |
+
🎯 Final Deployment Flow
|
| 139 |
+
--------------------------------------------------
|
| 140 |
+
|
| 141 |
+
GitHub Push
|
| 142 |
+
↓
|
| 143 |
+
CI Run (Tests)
|
| 144 |
+
↓
|
| 145 |
+
CD SSH into EC2
|
| 146 |
+
↓
|
| 147 |
+
git pull origin main
|
| 148 |
+
uv sync
|
| 149 |
+
sudo systemctl restart blog-agent
|
| 150 |
+
↓
|
| 151 |
+
App Updated Successfully
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
--------------------------------------------------
|
| 155 |
+
🔥 Important Concepts Learned
|
| 156 |
+
--------------------------------------------------
|
| 157 |
+
|
| 158 |
+
✔ Difference between normal user & root
|
| 159 |
+
✔ What sudo does
|
| 160 |
+
✔ What systemd is
|
| 161 |
+
✔ Why production apps use services
|
| 162 |
+
✔ Why background (&, nohup) is not production safe
|
| 163 |
+
✔ Why exit code 143 happens
|
| 164 |
+
✔ Proper CI/CD deployment architecture
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
--------------------------------------------------
|
| 168 |
+
🏆 Result
|
| 169 |
+
--------------------------------------------------
|
| 170 |
+
|
| 171 |
+
App:
|
| 172 |
+
- Auto restarts if crashed
|
| 173 |
+
- Auto starts on server reboot
|
| 174 |
+
- Independent of SSH
|
| 175 |
+
- Clean GitHub deployment (Green tick)
|
| 176 |
+
|
| 177 |
+
Production-level deployment achieved.
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
services:
|
| 2 |
+
app:
|
| 3 |
+
build: .
|
| 4 |
+
image: bloggingagent:latest
|
| 5 |
+
ports:
|
| 6 |
+
- "7860:7860"
|
| 7 |
+
env_file:
|
| 8 |
+
- .env
|
| 9 |
+
volumes:
|
| 10 |
+
- ./images:/app/images
|
| 11 |
+
- ./results:/app/results
|
| 12 |
+
- ./logs:/app/logs
|
| 13 |
+
restart: always
|
| 14 |
+
|
| 15 |
+
# Ensure directories exist for volume mapping
|
| 16 |
+
# Note: Docker will create them as root if they don't exist,
|
| 17 |
+
# but the app might need write permissions.
|
graph.png
ADDED
|
images/attention_mechanism.png
ADDED
|
Git LFS Details
|
images/common_mistakes.png
ADDED
|
Git LFS Details
|
images/conclusion.png
ADDED
|
Git LFS Details
|
images/transformer_application.png
ADDED
|
Git LFS Details
|
images/transformer_architecture.png
ADDED
|
Git LFS Details
|
jenkinsfile
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pipeline {
|
| 2 |
+
agent any
|
| 3 |
+
|
| 4 |
+
environment {
|
| 5 |
+
REPO_URL = "https://github.com/VashuTheGreat/Blog-Writing-Agent.git"
|
| 6 |
+
PROJECT_NAME = "Blog-Writing-Agent"
|
| 7 |
+
SPACE_NAME="Blog-Writing-Agent"
|
| 8 |
+
HF_USERNAME="VashuTheGreat2"
|
| 9 |
+
HF_TOKEN = credentials('HF_TOKEN')
|
| 10 |
+
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
stages {
|
| 14 |
+
|
| 15 |
+
stage('Clone Repository') {
|
| 16 |
+
steps {
|
| 17 |
+
echo "📥 Cloning repository..."
|
| 18 |
+
git branch: 'main', url: "${REPO_URL}"
|
| 19 |
+
}
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
stage('Setup Dependencies') {
|
| 23 |
+
steps {
|
| 24 |
+
echo "🔧 Setting git identity and installing HF CLI..."
|
| 25 |
+
sh '''#!/bin/bash
|
| 26 |
+
set -e
|
| 27 |
+
git config --global user.name "jenkins"
|
| 28 |
+
git config --global user.email "jenkins@local"
|
| 29 |
+
export PATH=$HOME/.local/bin:$PATH
|
| 30 |
+
if ! command -v hf &> /dev/null; then
|
| 31 |
+
pip3 install --user -U huggingface_hub
|
| 32 |
+
fi
|
| 33 |
+
'''
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
stage('Authenticate Hugging Face') {
|
| 38 |
+
steps {
|
| 39 |
+
echo "🔐 Logging into Hugging Face..."
|
| 40 |
+
sh '''#!/bin/bash
|
| 41 |
+
set -e
|
| 42 |
+
export PATH=$HOME/.local/bin:$PATH
|
| 43 |
+
hf auth login --token "$HF_TOKEN"
|
| 44 |
+
'''
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
stage('Configure Space Meta') {
|
| 49 |
+
steps {
|
| 50 |
+
echo "📝 Injecting HF Spaces configuration into README.md..."
|
| 51 |
+
sh '''#!/bin/bash
|
| 52 |
+
set -e
|
| 53 |
+
TEMP_README=$(mktemp)
|
| 54 |
+
cat << EOF > "$TEMP_README"
|
| 55 |
+
---
|
| 56 |
+
title: $PROJECT_NAME
|
| 57 |
+
emoji: 🎓
|
| 58 |
+
colorFrom: blue
|
| 59 |
+
colorTo: green
|
| 60 |
+
sdk: docker
|
| 61 |
+
app_file: main.py
|
| 62 |
+
pinned: false
|
| 63 |
+
short_description: This is the Agentic Blog Writing Agent
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
EOF
|
| 67 |
+
cat README.md >> "$TEMP_README"
|
| 68 |
+
mv "$TEMP_README" README.md
|
| 69 |
+
'''
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
stage('Create App Space') {
|
| 74 |
+
steps {
|
| 75 |
+
echo "🚀 Creating HF Space if it doesn't exist..."
|
| 76 |
+
sh '''#!/bin/bash
|
| 77 |
+
set -e
|
| 78 |
+
export PATH=$HOME/.local/bin:$PATH
|
| 79 |
+
hf repos create "$HF_USERNAME/$SPACE_NAME" --type space --space-sdk docker || true
|
| 80 |
+
'''
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
stage('Upload to HF Space') {
|
| 85 |
+
steps {
|
| 86 |
+
echo "📤 Uploading project files..."
|
| 87 |
+
sh '''#!/bin/bash
|
| 88 |
+
set -e
|
| 89 |
+
export PATH=$HOME/.local/bin:$PATH
|
| 90 |
+
hf upload "$HF_USERNAME/$SPACE_NAME" . --repo-type=space
|
| 91 |
+
'''
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
post {
|
| 98 |
+
success {
|
| 99 |
+
echo "✅ Pipeline executed successfully!"
|
| 100 |
+
}
|
| 101 |
+
failure {
|
| 102 |
+
echo "❌ Pipeline failed!"
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
}
|
main.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
load_dotenv()
|
| 3 |
+
|
| 4 |
+
from src.logger import *
|
| 5 |
+
from api.app import app
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
if __name__=="__main__":
|
| 9 |
+
import uvicorn as uv
|
| 10 |
+
uv.run("main:app",host="0.0.0.0",port=7860,reload=True)
|
metadata.yaml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
author: Vansh
|
| 2 |
+
model_type: scikit-learn_0.23
|
| 3 |
+
project_name: blogging_agent
|
| 4 |
+
project_version: v0.1
|
models/README.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Models
|
| 2 |
+
|
| 3 |
+
Here goes the trained models. DVC should use this models in versioning
|
notebooks/README.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Notebooks
|
| 2 |
+
|
| 3 |
+
Here goes the notebooks used for research and development.
|
notebooks/agent.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/image_placeHolder.ipynb
ADDED
|
@@ -0,0 +1,376 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 6,
|
| 6 |
+
"id": "0b9ffe5f",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"from pydantic import BaseModel,Field\n",
|
| 11 |
+
"from typing import Literal,List\n"
|
| 12 |
+
]
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"execution_count": 4,
|
| 17 |
+
"id": "cd7bb64d",
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"outputs": [
|
| 20 |
+
{
|
| 21 |
+
"data": {
|
| 22 |
+
"text/plain": [
|
| 23 |
+
"True"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
"execution_count": 4,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"output_type": "execute_result"
|
| 29 |
+
}
|
| 30 |
+
],
|
| 31 |
+
"source": [
|
| 32 |
+
"from dotenv import load_dotenv\n",
|
| 33 |
+
"load_dotenv()"
|
| 34 |
+
]
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"cell_type": "code",
|
| 38 |
+
"execution_count": 7,
|
| 39 |
+
"id": "dd8207ef",
|
| 40 |
+
"metadata": {},
|
| 41 |
+
"outputs": [],
|
| 42 |
+
"source": [
|
| 43 |
+
"class ImageSpec(BaseModel):\n",
|
| 44 |
+
" placeholder:str=Field(...,description=\"e.g. [[IMAGE_1]]\")\n",
|
| 45 |
+
" filename:str=Field(...,description=\"Save under images/, e.g. qkv_flow.png\")\n",
|
| 46 |
+
" prompt:str=Field(...,description=\"Prompt to send to the image model\")\n",
|
| 47 |
+
" size:Literal[\"1024x1024\",\"1024x1536\",\"1536x1024\"]=\"1025x1024\"\n",
|
| 48 |
+
" quality: Literal[\"low\", \"medium\", \"high\"] = \"medium\"\n",
|
| 49 |
+
"\n",
|
| 50 |
+
"\n",
|
| 51 |
+
"class GlobalImagePlan(BaseModel):\n",
|
| 52 |
+
" md_with_placeholders:str\n",
|
| 53 |
+
" images:List[ImageSpec]=Field(default_factory=list)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": 8,
|
| 59 |
+
"id": "63f25031",
|
| 60 |
+
"metadata": {},
|
| 61 |
+
"outputs": [],
|
| 62 |
+
"source": [
|
| 63 |
+
"from langchain_aws import ChatBedrockConverse\n"
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"cell_type": "code",
|
| 68 |
+
"execution_count": 9,
|
| 69 |
+
"id": "255a2613",
|
| 70 |
+
"metadata": {},
|
| 71 |
+
"outputs": [],
|
| 72 |
+
"source": [
|
| 73 |
+
"LLM_MODEL_ID = \"us.meta.llama3-3-70b-instruct-v1:0\"\n",
|
| 74 |
+
"LLM_REGION = \"us-east-1\"\n",
|
| 75 |
+
"llm = ChatBedrockConverse(\n",
|
| 76 |
+
" model_id=LLM_MODEL_ID,\n",
|
| 77 |
+
" region_name=LLM_REGION\n",
|
| 78 |
+
")"
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"cell_type": "code",
|
| 83 |
+
"execution_count": 11,
|
| 84 |
+
"id": "849c528a",
|
| 85 |
+
"metadata": {},
|
| 86 |
+
"outputs": [],
|
| 87 |
+
"source": [
|
| 88 |
+
"placehonder=\"\"\"You are an expert technical blog image planning assistant.\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"Your job is to analyze a Markdown blog post and generate a structured image plan.\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"You MUST return output strictly matching the Pydantic model `GlobalImagePlan`.\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"-----------------------------------------\n",
|
| 95 |
+
"YOUR TASK\n",
|
| 96 |
+
"-----------------------------------------\n",
|
| 97 |
+
"\n",
|
| 98 |
+
"You will receive a Markdown blog as input.\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"You must:\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"1. Keep the Markdown EXACTLY the same.\n",
|
| 103 |
+
"2. DO NOT rewrite, summarize, improve, or modify any text.\n",
|
| 104 |
+
"3. DO NOT remove or change any formatting.\n",
|
| 105 |
+
"4. Only insert image placeholders where images would improve clarity.\n",
|
| 106 |
+
"\n",
|
| 107 |
+
"-----------------------------------------\n",
|
| 108 |
+
"WHERE TO INSERT IMAGES\n",
|
| 109 |
+
"-----------------------------------------\n",
|
| 110 |
+
"\n",
|
| 111 |
+
"Insert placeholders only:\n",
|
| 112 |
+
"- After major section headings (## or ###)\n",
|
| 113 |
+
"- After complex explanations\n",
|
| 114 |
+
"- After architecture descriptions\n",
|
| 115 |
+
"- After workflows\n",
|
| 116 |
+
"- After comparisons\n",
|
| 117 |
+
"- Where diagrams would help understanding\n",
|
| 118 |
+
"- Where visual examples would add clarity\n",
|
| 119 |
+
"\n",
|
| 120 |
+
"DO NOT:\n",
|
| 121 |
+
"- Add images randomly\n",
|
| 122 |
+
"- Add too many images\n",
|
| 123 |
+
"- Break code blocks\n",
|
| 124 |
+
"- Insert placeholders inside code blocks\n",
|
| 125 |
+
"- Modify existing content\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"-----------------------------------------\n",
|
| 128 |
+
"PLACEHOLDER FORMAT\n",
|
| 129 |
+
"-----------------------------------------\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"Use this exact format:\n",
|
| 132 |
+
"\n",
|
| 133 |
+
"[[IMAGE_1]]\n",
|
| 134 |
+
"[[IMAGE_2]]\n",
|
| 135 |
+
"[[IMAGE_3]]\n",
|
| 136 |
+
"\n",
|
| 137 |
+
"Number them sequentially.\n",
|
| 138 |
+
"\n",
|
| 139 |
+
"-----------------------------------------\n",
|
| 140 |
+
"IMAGE SPEC RULES\n",
|
| 141 |
+
"-----------------------------------------\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"For each placeholder generate an ImageSpec with:\n",
|
| 144 |
+
"\n",
|
| 145 |
+
"- placeholder: exact placeholder string (e.g. [[IMAGE_1]])\n",
|
| 146 |
+
"- filename: save under images/ directory (example: images/attention_flow.png)\n",
|
| 147 |
+
"- prompt: highly detailed image generation prompt describing what the image should show\n",
|
| 148 |
+
"- size: choose one of:\n",
|
| 149 |
+
" - 1024x1024 (for square diagrams)\n",
|
| 150 |
+
" - 1536x1024 (for wide architecture diagrams)\n",
|
| 151 |
+
" - 1024x1536 (for vertical infographics)\n",
|
| 152 |
+
"- quality: \"medium\" unless diagram is complex → use \"high\"\n",
|
| 153 |
+
"\n",
|
| 154 |
+
"The prompt must:\n",
|
| 155 |
+
"- Be descriptive\n",
|
| 156 |
+
"- Mention diagram style\n",
|
| 157 |
+
"- Mention labels\n",
|
| 158 |
+
"- Mention arrows and flow\n",
|
| 159 |
+
"- Mention clean white background\n",
|
| 160 |
+
"- Mention professional technical illustration style\n",
|
| 161 |
+
"\n",
|
| 162 |
+
"-----------------------------------------\n",
|
| 163 |
+
"IMPORTANT OUTPUT RULES\n",
|
| 164 |
+
"-----------------------------------------\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"You MUST return ONLY a valid GlobalImagePlan JSON object.\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"Do NOT include:\n",
|
| 169 |
+
"- Explanations\n",
|
| 170 |
+
"- Extra text\n",
|
| 171 |
+
"- Markdown fences\n",
|
| 172 |
+
"- Comments\n",
|
| 173 |
+
"- Any text before or after the JSON\n",
|
| 174 |
+
"\n",
|
| 175 |
+
"-----------------------------------------\n",
|
| 176 |
+
"OUTPUT FORMAT\n",
|
| 177 |
+
"-----------------------------------------\n",
|
| 178 |
+
"\n",
|
| 179 |
+
"{\n",
|
| 180 |
+
" \"md_with_placeholders\": \"...full markdown with inserted placeholders...\",\n",
|
| 181 |
+
" \"images\": [\n",
|
| 182 |
+
" {\n",
|
| 183 |
+
" \"placeholder\": \"[[IMAGE_1]]\",\n",
|
| 184 |
+
" \"filename\": \"images/example.png\",\n",
|
| 185 |
+
" \"prompt\": \"Detailed image generation prompt...\",\n",
|
| 186 |
+
" \"size\": \"1536x1024\",\n",
|
| 187 |
+
" \"quality\": \"medium\"\n",
|
| 188 |
+
" }\n",
|
| 189 |
+
" ]\n",
|
| 190 |
+
"}\"\"\""
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": 14,
|
| 196 |
+
"id": "332e03d8",
|
| 197 |
+
"metadata": {},
|
| 198 |
+
"outputs": [],
|
| 199 |
+
"source": [
|
| 200 |
+
"from langchain.messages import SystemMessage,HumanMessage"
|
| 201 |
+
]
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"cell_type": "code",
|
| 205 |
+
"execution_count": 16,
|
| 206 |
+
"id": "1a7a4167",
|
| 207 |
+
"metadata": {},
|
| 208 |
+
"outputs": [],
|
| 209 |
+
"source": [
|
| 210 |
+
"markdown=\"\"\"\n",
|
| 211 |
+
"# State of Multimodal LLMs in 2026\n",
|
| 212 |
+
"\n",
|
| 213 |
+
"## Introduction to Multimodal LLMs\n",
|
| 214 |
+
"Recent developments in multimodal LLMs have shown significant progress, with models now capable of processing and generating multiple forms of data, such as text, images, and audio [Not found in provided sources]. \n",
|
| 215 |
+
"* Multimodal LLMs have been applied to various tasks, including visual question answering, image captioning, and text-to-image synthesis.\n",
|
| 216 |
+
"* The impact of multimodal LLMs can be seen in industries like healthcare, education, and entertainment, where they are used for applications such as medical image analysis, interactive learning systems, and content creation [Not found in provided sources].\n",
|
| 217 |
+
"* Despite the advancements, key challenges in multimodal LLM research remain, including the need for large-scale datasets, improved model architectures, and better evaluation metrics [Not found in provided sources].\n",
|
| 218 |
+
"\n",
|
| 219 |
+
"## Recent Advances in Multimodal LLMs\n",
|
| 220 |
+
"Recent breakthroughs in multimodal LLM architecture have led to significant improvements in the field. \n",
|
| 221 |
+
"* Multimodal transformers, which combine visual and textual features, have shown promising results in tasks such as visual question answering and image-text retrieval [Not found in provided sources].\n",
|
| 222 |
+
"* The use of multimodal attention mechanisms has also been explored, allowing models to focus on specific parts of the input data [Not found in provided sources].\n",
|
| 223 |
+
"\n",
|
| 224 |
+
"Multimodal LLMs play a crucial role in both computer vision and natural language processing. \n",
|
| 225 |
+
"They can be used to analyze and understand visual data, such as images and videos, and generate text-based descriptions or summaries.\n",
|
| 226 |
+
"In natural language processing, multimodal LLMs can be used to improve language understanding and generation tasks, such as machine translation and text summarization.\n",
|
| 227 |
+
"\n",
|
| 228 |
+
"The potential applications of multimodal LLMs in healthcare are vast. \n",
|
| 229 |
+
"They can be used to analyze medical images, such as X-rays and MRIs, and generate text-based diagnoses or recommendations.\n",
|
| 230 |
+
"Additionally, multimodal LLMs can be used to develop personalized treatment plans and improve patient outcomes [Not found in provided sources].\n",
|
| 231 |
+
"Overall, the latest advancements in multimodal LLMs have the potential to revolutionize various fields, including healthcare, and improve the way we interact with and understand visual and textual data.\n",
|
| 232 |
+
"\n",
|
| 233 |
+
"## Challenges and Limitations\n",
|
| 234 |
+
"The development of multimodal LLMs has made significant progress, but there are still several challenges and limitations that need to be addressed. \n",
|
| 235 |
+
"* The limitations of current multimodal LLM models include their inability to fully understand the nuances of human communication, such as sarcasm, idioms, and figurative language [Not found in provided sources].\n",
|
| 236 |
+
"* Training and deploying multimodal LLMs pose significant challenges, including the need for large amounts of diverse and high-quality training data, as well as the requirement for significant computational resources [Not found in provided sources].\n",
|
| 237 |
+
"* Further research is needed to improve the performance and robustness of multimodal LLMs, particularly in areas such as common sense reasoning, emotional intelligence, and adaptability to new contexts and domains [Not found in provided sources]. \n",
|
| 238 |
+
"Overall, addressing these challenges and limitations will be crucial to unlocking the full potential of multimodal LLMs and achieving more effective and engaging human-computer interactions.\n",
|
| 239 |
+
"\n",
|
| 240 |
+
"## Future Directions\n",
|
| 241 |
+
"The future of multimodal LLMs holds great promise, with potential applications in areas such as [virtual assistants](Not found in provided sources) and [human-computer interaction](Not found in provided sources). \n",
|
| 242 |
+
"* Multimodal LLMs may be used to improve accessibility and user experience in various domains.\n",
|
| 243 |
+
"* The role of multimodal LLMs in shaping the future of AI is significant, as they can enable more natural and intuitive interactions between humans and machines.\n",
|
| 244 |
+
"* Continued research in multimodal LLMs is crucial to overcome current limitations and unlock their full potential, driving innovation and progress in the field of AI [Not found in provided sources].\n",
|
| 245 |
+
"\n",
|
| 246 |
+
"\"\"\""
|
| 247 |
+
]
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"cell_type": "code",
|
| 251 |
+
"execution_count": 18,
|
| 252 |
+
"id": "796739f7",
|
| 253 |
+
"metadata": {},
|
| 254 |
+
"outputs": [
|
| 255 |
+
{
|
| 256 |
+
"data": {
|
| 257 |
+
"text/plain": [
|
| 258 |
+
"GlobalImagePlan(md_with_placeholders='# State of Multimodal LLMs in 2026\\n## Introduction to Multimodal LLMs\\nRecent developments in multimodal LLMs have shown significant progress, with models now capable of processing and generating multiple forms of data, such as text, images, and audio [Not found in provided sources]. \\n* Multimodal LLMs have been applied to various tasks, including visual question answering, image captioning, and text-to-image synthesis.\\n* The impact of multimodal LLMs can be seen in industries like healthcare, education, and entertainment, where they are used for applications such as medical image analysis, interactive learning systems, and content creation [Not found in provided sources].\\n* Despite the advancements, key challenges in multimodal LLM research remain, including the need for large-scale datasets, improved model architectures, and better evaluation metrics [Not found in provided sources].\\n[[IMAGE_1]]\\n## Recent Advances in Multimodal LLMs\\nRecent breakthroughs in multimodal LLM architecture have led to significant improvements in the field. \\n* Multimodal transformers, which combine visual and textual features, have shown promising results in tasks such as visual question answering and image-text retrieval [Not found in provided sources].\\n* The use of multimodal attention mechanisms has also been explored, allowing models to focus on specific parts of the input data [Not found in provided sources].\\n[[IMAGE_2]]\\nMultimodal LLMs play a crucial role in both computer vision and natural language processing. \\nThey can be used to analyze and understand visual data, such as images and videos, and generate text-based descriptions or summaries.\\nIn natural language processing, multimodal LLMs can be used to improve language understanding and generation tasks, such as machine translation and text summarization.\\n[[IMAGE_3]]\\nThe potential applications of multimodal LLMs in healthcare are vast. \\nThey can be used to analyze medical images, such as X-rays and MRIs, and generate text-based diagnoses or recommendations.\\nAdditionally, multimodal LLMs can be used to develop personalized treatment plans and improve patient outcomes [Not found in provided sources].\\nOverall, the latest advancements in multimodal LLMs have the potential to revolutionize various fields, including healthcare, and improve the way we interact with and understand visual and textual data.\\n[[IMAGE_4]]\\n## Challenges and Limitations\\nThe development of multimodal LLMs has made significant progress, but there are still several challenges and limitations that need to be addressed. \\n* The limitations of current multimodal LLM models include their inability to fully understand the nuances of human communication, such as sarcasm, idioms, and figurative language [Not found in provided sources].\\n* Training and deploying multimodal LLMs pose significant challenges, including the need for large amounts of diverse and high-quality training data, as well as the requirement for significant computational resources [Not found in provided sources].\\n* Further research is needed to improve the performance and robustness of multimodal LLMs, particularly in areas such as common sense reasoning, emotional intelligence, and adaptability to new contexts and domains [Not found in provided sources]. \\nOverall, addressing these challenges and limitations will be crucial to unlocking the full potential of multimodal LLMs and achieving more effective and engaging human-computer interactions.\\n[[IMAGE_5]]\\n## Future Directions\\nThe future of multimodal LLMs holds great promise, with potential applications in areas such as [virtual assistants](Not found in provided sources) and [human-computer interaction](Not found in provided sources). \\n* Multimodal LLMs may be used to improve accessibility and user experience in various domains.\\n* The role of multimodal LLMs in shaping the future of AI is significant, as they can enable more natural and intuitive interactions between humans and machines.\\n* Continued research in multimodal LLMs is crucial to overcome current limitations and unlock their full potential, driving innovation and progress in the field of AI [Not found in provided sources].\\n[[IMAGE_6]]', images=[ImageSpec(placeholder='[[IMAGE_1]]', filename='images/multimodal_llm_architecture.png', prompt='A diagram showing the architecture of a multimodal LLM, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1536x1024', quality='medium'), ImageSpec(placeholder='[[IMAGE_2]]', filename='images/multimodal_transformers.png', prompt='An illustration of multimodal transformers, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1024x1024', quality='medium'), ImageSpec(placeholder='[[IMAGE_3]]', filename='images/multimodal_llm_applications.png', prompt='A diagram showing the various applications of multimodal LLMs, including computer vision and natural language processing, with labels and arrows indicating the relationships between the different applications, on a clean white background, in a professional technical illustration style', size='1024x1536', quality='medium'), ImageSpec(placeholder='[[IMAGE_4]]', filename='images/multimodal_llm_healthcare.png', prompt='An illustration of the potential applications of multimodal LLMs in healthcare, including medical image analysis and personalized treatment plans, with labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1536x1024', quality='medium'), ImageSpec(placeholder='[[IMAGE_5]]', filename='images/multimodal_llm_challenges.png', prompt='A diagram showing the challenges and limitations of multimodal LLMs, including the need for large-scale datasets and improved model architectures, with labels and arrows indicating the relationships between the different challenges, on a clean white background, in a professional technical illustration style', size='1024x1024', quality='medium'), ImageSpec(placeholder='[[IMAGE_6]]', filename='images/multimodal_llm_future.png', prompt='An illustration of the future directions of multimodal LLMs, including potential applications in virtual assistants and human-computer interaction, with labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1024x1536', quality='medium')])"
|
| 259 |
+
]
|
| 260 |
+
},
|
| 261 |
+
"execution_count": 18,
|
| 262 |
+
"metadata": {},
|
| 263 |
+
"output_type": "execute_result"
|
| 264 |
+
}
|
| 265 |
+
],
|
| 266 |
+
"source": [
|
| 267 |
+
"output=llm.with_structured_output(GlobalImagePlan)\\\n",
|
| 268 |
+
".invoke(\n",
|
| 269 |
+
" [\n",
|
| 270 |
+
" SystemMessage(content=placehonder),\n",
|
| 271 |
+
" HumanMessage(content=markdown)\n",
|
| 272 |
+
" ]\n",
|
| 273 |
+
")\n",
|
| 274 |
+
"\n",
|
| 275 |
+
"output"
|
| 276 |
+
]
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"cell_type": "code",
|
| 280 |
+
"execution_count": 20,
|
| 281 |
+
"id": "0e44ffd5",
|
| 282 |
+
"metadata": {},
|
| 283 |
+
"outputs": [
|
| 284 |
+
{
|
| 285 |
+
"data": {
|
| 286 |
+
"text/plain": [
|
| 287 |
+
"'# State of Multimodal LLMs in 2026\\n## Introduction to Multimodal LLMs\\nRecent developments in multimodal LLMs have shown significant progress, with models now capable of processing and generating multiple forms of data, such as text, images, and audio [Not found in provided sources]. \\n* Multimodal LLMs have been applied to various tasks, including visual question answering, image captioning, and text-to-image synthesis.\\n* The impact of multimodal LLMs can be seen in industries like healthcare, education, and entertainment, where they are used for applications such as medical image analysis, interactive learning systems, and content creation [Not found in provided sources].\\n* Despite the advancements, key challenges in multimodal LLM research remain, including the need for large-scale datasets, improved model architectures, and better evaluation metrics [Not found in provided sources].\\n[[IMAGE_1]]\\n## Recent Advances in Multimodal LLMs\\nRecent breakthroughs in multimodal LLM architecture have led to significant improvements in the field. \\n* Multimodal transformers, which combine visual and textual features, have shown promising results in tasks such as visual question answering and image-text retrieval [Not found in provided sources].\\n* The use of multimodal attention mechanisms has also been explored, allowing models to focus on specific parts of the input data [Not found in provided sources].\\n[[IMAGE_2]]\\nMultimodal LLMs play a crucial role in both computer vision and natural language processing. \\nThey can be used to analyze and understand visual data, such as images and videos, and generate text-based descriptions or summaries.\\nIn natural language processing, multimodal LLMs can be used to improve language understanding and generation tasks, such as machine translation and text summarization.\\n[[IMAGE_3]]\\nThe potential applications of multimodal LLMs in healthcare are vast. \\nThey can be used to analyze medical images, such as X-rays and MRIs, and generate text-based diagnoses or recommendations.\\nAdditionally, multimodal LLMs can be used to develop personalized treatment plans and improve patient outcomes [Not found in provided sources].\\nOverall, the latest advancements in multimodal LLMs have the potential to revolutionize various fields, including healthcare, and improve the way we interact with and understand visual and textual data.\\n[[IMAGE_4]]\\n## Challenges and Limitations\\nThe development of multimodal LLMs has made significant progress, but there are still several challenges and limitations that need to be addressed. \\n* The limitations of current multimodal LLM models include their inability to fully understand the nuances of human communication, such as sarcasm, idioms, and figurative language [Not found in provided sources].\\n* Training and deploying multimodal LLMs pose significant challenges, including the need for large amounts of diverse and high-quality training data, as well as the requirement for significant computational resources [Not found in provided sources].\\n* Further research is needed to improve the performance and robustness of multimodal LLMs, particularly in areas such as common sense reasoning, emotional intelligence, and adaptability to new contexts and domains [Not found in provided sources]. \\nOverall, addressing these challenges and limitations will be crucial to unlocking the full potential of multimodal LLMs and achieving more effective and engaging human-computer interactions.\\n[[IMAGE_5]]\\n## Future Directions\\nThe future of multimodal LLMs holds great promise, with potential applications in areas such as [virtual assistants](Not found in provided sources) and [human-computer interaction](Not found in provided sources). \\n* Multimodal LLMs may be used to improve accessibility and user experience in various domains.\\n* The role of multimodal LLMs in shaping the future of AI is significant, as they can enable more natural and intuitive interactions between humans and machines.\\n* Continued research in multimodal LLMs is crucial to overcome current limitations and unlock their full potential, driving innovation and progress in the field of AI [Not found in provided sources].\\n[[IMAGE_6]]'"
|
| 288 |
+
]
|
| 289 |
+
},
|
| 290 |
+
"execution_count": 20,
|
| 291 |
+
"metadata": {},
|
| 292 |
+
"output_type": "execute_result"
|
| 293 |
+
}
|
| 294 |
+
],
|
| 295 |
+
"source": [
|
| 296 |
+
"output.md_with_placeholders"
|
| 297 |
+
]
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"cell_type": "code",
|
| 301 |
+
"execution_count": 21,
|
| 302 |
+
"id": "00892f27",
|
| 303 |
+
"metadata": {},
|
| 304 |
+
"outputs": [
|
| 305 |
+
{
|
| 306 |
+
"data": {
|
| 307 |
+
"text/plain": [
|
| 308 |
+
"[ImageSpec(placeholder='[[IMAGE_1]]', filename='images/multimodal_llm_architecture.png', prompt='A diagram showing the architecture of a multimodal LLM, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1536x1024', quality='medium'),\n",
|
| 309 |
+
" ImageSpec(placeholder='[[IMAGE_2]]', filename='images/multimodal_transformers.png', prompt='An illustration of multimodal transformers, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1024x1024', quality='medium'),\n",
|
| 310 |
+
" ImageSpec(placeholder='[[IMAGE_3]]', filename='images/multimodal_llm_applications.png', prompt='A diagram showing the various applications of multimodal LLMs, including computer vision and natural language processing, with labels and arrows indicating the relationships between the different applications, on a clean white background, in a professional technical illustration style', size='1024x1536', quality='medium'),\n",
|
| 311 |
+
" ImageSpec(placeholder='[[IMAGE_4]]', filename='images/multimodal_llm_healthcare.png', prompt='An illustration of the potential applications of multimodal LLMs in healthcare, including medical image analysis and personalized treatment plans, with labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1536x1024', quality='medium'),\n",
|
| 312 |
+
" ImageSpec(placeholder='[[IMAGE_5]]', filename='images/multimodal_llm_challenges.png', prompt='A diagram showing the challenges and limitations of multimodal LLMs, including the need for large-scale datasets and improved model architectures, with labels and arrows indicating the relationships between the different challenges, on a clean white background, in a professional technical illustration style', size='1024x1024', quality='medium'),\n",
|
| 313 |
+
" ImageSpec(placeholder='[[IMAGE_6]]', filename='images/multimodal_llm_future.png', prompt='An illustration of the future directions of multimodal LLMs, including potential applications in virtual assistants and human-computer interaction, with labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style', size='1024x1536', quality='medium')]"
|
| 314 |
+
]
|
| 315 |
+
},
|
| 316 |
+
"execution_count": 21,
|
| 317 |
+
"metadata": {},
|
| 318 |
+
"output_type": "execute_result"
|
| 319 |
+
}
|
| 320 |
+
],
|
| 321 |
+
"source": [
|
| 322 |
+
"output.images"
|
| 323 |
+
]
|
| 324 |
+
},
|
| 325 |
+
{
|
| 326 |
+
"cell_type": "code",
|
| 327 |
+
"execution_count": 23,
|
| 328 |
+
"id": "0b4e77e2",
|
| 329 |
+
"metadata": {},
|
| 330 |
+
"outputs": [
|
| 331 |
+
{
|
| 332 |
+
"data": {
|
| 333 |
+
"text/plain": [
|
| 334 |
+
"'A diagram showing the architecture of a multimodal LLM, with visual and textual features combined, and labels and arrows indicating the flow of data, on a clean white background, in a professional technical illustration style'"
|
| 335 |
+
]
|
| 336 |
+
},
|
| 337 |
+
"execution_count": 23,
|
| 338 |
+
"metadata": {},
|
| 339 |
+
"output_type": "execute_result"
|
| 340 |
+
}
|
| 341 |
+
],
|
| 342 |
+
"source": [
|
| 343 |
+
"output.images[0].prompt"
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"cell_type": "code",
|
| 348 |
+
"execution_count": null,
|
| 349 |
+
"id": "8666fa58",
|
| 350 |
+
"metadata": {},
|
| 351 |
+
"outputs": [],
|
| 352 |
+
"source": []
|
| 353 |
+
}
|
| 354 |
+
],
|
| 355 |
+
"metadata": {
|
| 356 |
+
"kernelspec": {
|
| 357 |
+
"display_name": "bloggig-Agent (3.12.12)",
|
| 358 |
+
"language": "python",
|
| 359 |
+
"name": "python3"
|
| 360 |
+
},
|
| 361 |
+
"language_info": {
|
| 362 |
+
"codemirror_mode": {
|
| 363 |
+
"name": "ipython",
|
| 364 |
+
"version": 3
|
| 365 |
+
},
|
| 366 |
+
"file_extension": ".py",
|
| 367 |
+
"mimetype": "text/x-python",
|
| 368 |
+
"name": "python",
|
| 369 |
+
"nbconvert_exporter": "python",
|
| 370 |
+
"pygments_lexer": "ipython3",
|
| 371 |
+
"version": "3.12.12"
|
| 372 |
+
}
|
| 373 |
+
},
|
| 374 |
+
"nbformat": 4,
|
| 375 |
+
"nbformat_minor": 5
|
| 376 |
+
}
|
notebooks/understanding_self_attention.md
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Understanding Self Attention
|
| 2 |
+
|
| 3 |
+
### Introduction to Self Attention
|
| 4 |
+
Self-attention, also known as intra-attention, is a mechanism used in deep learning models to allow the model to attend to different parts of the input data and weigh their importance. It's a key component of the Transformer architecture, introduced in 2017, which revolutionized the field of natural language processing (NLP). Self-attention enables the model to capture long-range dependencies and contextual relationships in the input data, making it particularly useful for sequence-to-sequence tasks such as machine translation, text summarization, and chatbots. The importance of self-attention lies in its ability to handle variable-length input sequences, parallelize computation, and improve model performance by focusing on the most relevant parts of the input data. In this blog, we'll delve deeper into the concept of self-attention, its types, and its applications in deep learning.
|
| 5 |
+
|
| 6 |
+
### How Self Attention Works
|
| 7 |
+
Self-attention is a mechanism that allows a model to attend to different parts of the input sequence simultaneously and weigh their importance. It's a key component of the Transformer architecture, introduced in the paper "Attention is All You Need" by Vaswani et al.
|
| 8 |
+
|
| 9 |
+
The self-attention mechanism takes in a set of input vectors, typically the output of an encoder or a previous layer, and computes a weighted sum of these vectors based on their similarity. The weights are learned during training and reflect the relative importance of each input vector.
|
| 10 |
+
|
| 11 |
+
The mathematical formulation of self-attention can be broken down into three main steps:
|
| 12 |
+
|
| 13 |
+
1. **Query, Key, and Value Vectors**: The input vectors are first transformed into three different vectors: Query (Q), Key (K), and Value (V). These vectors are obtained by applying linear transformations to the input vectors.
|
| 14 |
+
2. **Attention Scores**: The attention scores are computed by taking the dot product of the Query and Key vectors and applying a scaling factor. The attention scores represent the similarity between the input vectors.
|
| 15 |
+
3. **Weighted Sum**: The attention scores are then used to compute a weighted sum of the Value vectors. The weighted sum is the final output of the self-attention mechanism.
|
| 16 |
+
|
| 17 |
+
The self-attention mechanism can be formulated mathematically as follows:
|
| 18 |
+
|
| 19 |
+
`Attention(Q, K, V) = softmax(Q * K^T / sqrt(d)) * V`
|
| 20 |
+
|
| 21 |
+
where `Q`, `K`, and `V` are the Query, Key, and Value vectors, respectively, `d` is the dimensionality of the input vectors, and `softmax` is the softmax activation function.
|
| 22 |
+
|
| 23 |
+
The self-attention mechanism has several benefits, including:
|
| 24 |
+
|
| 25 |
+
* **Parallelization**: Self-attention can be parallelized more easily than recurrent neural networks (RNNs), making it more efficient for long-range dependencies.
|
| 26 |
+
* **Flexibility**: Self-attention can handle variable-length input sequences and can be used for both encoding and decoding tasks.
|
| 27 |
+
* **Interpretability**: The attention scores can provide insights into which parts of the input sequence are most relevant for a particular task.
|
| 28 |
+
|
| 29 |
+
### Types of Self Attention
|
| 30 |
+
There are several types of self-attention mechanisms that have been proposed in the literature, each with its own strengths and weaknesses. The two main categories of self-attention are local self-attention and global self-attention.
|
| 31 |
+
|
| 32 |
+
#### Local Self Attention
|
| 33 |
+
Local self-attention, also known as local attention or window-based attention, focuses on a fixed-size window of the input sequence. This type of attention is useful when the relationships between nearby elements in the sequence are more important than the relationships between distant elements. Local self-attention is often used in tasks such as language modeling and machine translation.
|
| 34 |
+
|
| 35 |
+
#### Global Self Attention
|
| 36 |
+
Global self-attention, on the other hand, considers the entire input sequence when computing the attention weights. This type of attention is useful when the relationships between all elements in the sequence are important, regardless of their distance. Global self-attention is often used in tasks such as question answering and text classification.
|
| 37 |
+
|
| 38 |
+
#### Other Types of Self Attention
|
| 39 |
+
In addition to local and global self-attention, there are other variants of self-attention that have been proposed, including:
|
| 40 |
+
* **Hierarchical self-attention**: This type of attention uses a hierarchical representation of the input sequence, where the attention weights are computed at multiple levels of granularity.
|
| 41 |
+
* **Graph-based self-attention**: This type of attention is used for graph-structured data, where the attention weights are computed based on the graph structure.
|
| 42 |
+
* **Multi-head self-attention**: This type of attention uses multiple attention heads to capture different types of relationships between the elements in the input sequence.
|
| 43 |
+
|
| 44 |
+
### Applications of Self Attention
|
| 45 |
+
Self-attention has numerous applications across various fields, including natural language processing, computer vision, and more. Some of the key applications of self-attention are:
|
| 46 |
+
* **Natural Language Processing (NLP)**: Self-attention is widely used in NLP tasks such as language translation, question answering, and text summarization. It helps in understanding the context and relationships between different words in a sentence.
|
| 47 |
+
* **Computer Vision**: Self-attention is used in computer vision tasks such as image classification, object detection, and image generation. It helps in understanding the relationships between different parts of an image.
|
| 48 |
+
* **Speech Recognition**: Self-attention is used in speech recognition tasks to improve the accuracy of speech-to-text models.
|
| 49 |
+
* **Recommendation Systems**: Self-attention is used in recommendation systems to understand the relationships between different items and recommend relevant items to users.
|
| 50 |
+
* **Time Series Forecasting**: Self-attention is used in time series forecasting to understand the relationships between different time steps and predict future values.
|
| 51 |
+
The use of self-attention has led to state-of-the-art results in many of these applications, and its potential continues to be explored in other fields.
|
| 52 |
+
|
| 53 |
+
### Implementing Self Attention
|
| 54 |
+
Implementing self-attention in a deep learning model involves several key steps. Here's a step-by-step guide to help you get started:
|
| 55 |
+
#### Step 1: Define the Self-Attention Mechanism
|
| 56 |
+
The self-attention mechanism is based on the Query-Key-Value (QKV) framework. You need to define the QKV matrices and calculate the attention weights using the following formula:
|
| 57 |
+
$$Attention(Q, K, V) = softmax(\frac{Q \cdot K^T}{\sqrt{d_k}}) \cdot V$$
|
| 58 |
+
where $d_k$ is the dimensionality of the key vector.
|
| 59 |
+
|
| 60 |
+
#### Step 2: Choose the Attention Type
|
| 61 |
+
There are two main types of self-attention: scaled dot-product attention and multi-head attention. Scaled dot-product attention is a basic form of self-attention, while multi-head attention allows the model to jointly attend to information from different representation subspaces.
|
| 62 |
+
|
| 63 |
+
#### Step 3: Implement the Self-Attention Layer
|
| 64 |
+
You can implement the self-attention layer using popular deep learning frameworks such as PyTorch or TensorFlow. The self-attention layer takes in the input sequence and outputs a weighted sum of the input elements.
|
| 65 |
+
|
| 66 |
+
#### Step 4: Integrate the Self-Attention Layer into the Model
|
| 67 |
+
Once you have implemented the self-attention layer, you can integrate it into your deep learning model. This typically involves adding the self-attention layer to the model architecture and adjusting the model's parameters accordingly.
|
| 68 |
+
|
| 69 |
+
#### Step 5: Train the Model
|
| 70 |
+
After integrating the self-attention layer, you need to train the model using a suitable optimizer and loss function. The self-attention mechanism can be trained end-to-end with the rest of the model.
|
| 71 |
+
|
| 72 |
+
#### Example Code
|
| 73 |
+
Here's an example code snippet in PyTorch that demonstrates how to implement a basic self-attention layer:
|
| 74 |
+
```python
|
| 75 |
+
import torch
|
| 76 |
+
import torch.nn as nn
|
| 77 |
+
import torch.nn.functional as F
|
| 78 |
+
|
| 79 |
+
class SelfAttention(nn.Module):
|
| 80 |
+
def __init__(self, embed_dim, num_heads):
|
| 81 |
+
super(SelfAttention, self).__init__()
|
| 82 |
+
self.embed_dim = embed_dim
|
| 83 |
+
self.num_heads = num_heads
|
| 84 |
+
self.query_linear = nn.Linear(embed_dim, embed_dim)
|
| 85 |
+
self.key_linear = nn.Linear(embed_dim, embed_dim)
|
| 86 |
+
self.value_linear = nn.Linear(embed_dim, embed_dim)
|
| 87 |
+
self.dropout = nn.Dropout(0.1)
|
| 88 |
+
|
| 89 |
+
def forward(self, x):
|
| 90 |
+
# Calculate Q, K, V
|
| 91 |
+
Q = self.query_linear(x)
|
| 92 |
+
K = self.key_linear(x)
|
| 93 |
+
V = self.value_linear(x)
|
| 94 |
+
|
| 95 |
+
# Calculate attention weights
|
| 96 |
+
attention_weights = torch.matmul(Q, K.T) / math.sqrt(self.embed_dim)
|
| 97 |
+
attention_weights = F.softmax(attention_weights, dim=-1)
|
| 98 |
+
|
| 99 |
+
# Calculate output
|
| 100 |
+
output = torch.matmul(attention_weights, V)
|
| 101 |
+
output = self.dropout(output)
|
| 102 |
+
return output
|
| 103 |
+
```
|
| 104 |
+
Note that this is a simplified example, and you may need to modify the code to suit your specific use case.
|
| 105 |
+
|
| 106 |
+
### Advantages and Limitations of Self Attention
|
| 107 |
+
The self-attention mechanism has several advantages that make it a powerful tool in deep learning models. Some of the key benefits include:
|
| 108 |
+
* **Parallelization**: Self-attention allows for parallelization of sequential data, making it much faster than traditional recurrent neural networks (RNNs) for long sequences.
|
| 109 |
+
* **Flexibility**: Self-attention can handle variable-length input sequences and can be used for both short-term and long-term dependencies.
|
| 110 |
+
* **Interpretability**: The attention weights provide a way to visualize and understand which parts of the input sequence are most relevant for a particular task.
|
| 111 |
+
|
| 112 |
+
However, self-attention also has some limitations:
|
| 113 |
+
* **Computational Cost**: Self-attention has a high computational cost, especially for long sequences, due to the need to compute attention weights for every pair of elements.
|
| 114 |
+
* **Memory Requirements**: Self-attention requires a significant amount of memory to store the attention weights and the input sequence.
|
| 115 |
+
* **Difficulty in Handling Local Dependencies**: Self-attention can struggle to capture local dependencies, such as those found in images or text with strong spatial relationships.
|
| 116 |
+
|
| 117 |
+
Despite these limitations, self-attention has the potential for future directions, including:
|
| 118 |
+
* **Improving Efficiency**: Researchers are exploring ways to improve the efficiency of self-attention, such as using sparse attention or hierarchical attention.
|
| 119 |
+
* **Combining with Other Mechanisms**: Self-attention can be combined with other mechanisms, such as convolutional neural networks (CNNs) or RNNs, to create more powerful models.
|
| 120 |
+
* **Applying to New Domains**: Self-attention can be applied to new domains, such as computer vision or speech recognition, to improve performance and efficiency.
|
notebooks/understanding_self_attention_in_deep_learning.md
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Understanding Self Attention in Deep Learning
|
| 2 |
+
|
| 3 |
+
## Introduction to Self Attention
|
| 4 |
+
Self attention is a fundamental concept in deep learning, enabling models to weigh the importance of different input elements relative to each other. It plays a crucial role in deep learning models, particularly in natural language processing and computer vision tasks, by allowing the model to focus on specific parts of the input data.
|
| 5 |
+
|
| 6 |
+
The traditional attention mechanisms have a limitation - they rely on a fixed-length context, which can be restrictive for sequences with varying lengths. This fixed-length context can lead to information loss or inefficient processing, especially when dealing with long sequences.
|
| 7 |
+
|
| 8 |
+
To address this, self attention mechanisms are used, which can be implemented using the following minimal code snippet:
|
| 9 |
+
```python
|
| 10 |
+
import torch
|
| 11 |
+
import torch.nn as nn
|
| 12 |
+
import torch.nn.functional as F
|
| 13 |
+
|
| 14 |
+
class SelfAttention(nn.Module):
|
| 15 |
+
def __init__(self, embed_dim):
|
| 16 |
+
super(SelfAttention, self).__init__()
|
| 17 |
+
self.query_linear = nn.Linear(embed_dim, embed_dim)
|
| 18 |
+
self.key_linear = nn.Linear(embed_dim, embed_dim)
|
| 19 |
+
self.value_linear = nn.Linear(embed_dim, embed_dim)
|
| 20 |
+
|
| 21 |
+
def forward(self, x):
|
| 22 |
+
query = self.query_linear(x)
|
| 23 |
+
key = self.key_linear(x)
|
| 24 |
+
value = self.value_linear(x)
|
| 25 |
+
attention_scores = torch.matmul(query, key.T) / math.sqrt(key.size(-1))
|
| 26 |
+
attention_weights = F.softmax(attention_scores, dim=-1)
|
| 27 |
+
output = torch.matmul(attention_weights, value)
|
| 28 |
+
return output
|
| 29 |
+
```
|
| 30 |
+
This code snippet demonstrates a basic self attention implementation, highlighting its importance in deep learning models.
|
| 31 |
+
|
| 32 |
+
## Implementing Self Attention
|
| 33 |
+
To implement self attention, it's essential to understand the underlying mathematical formulation. The self attention mechanism is based on the concept of attention, which allows the model to focus on specific parts of the input data.
|
| 34 |
+
|
| 35 |
+
* The mathematical formulation of self attention involves computing the attention weights based on the query, key, and value vectors. This is typically done using the following equation: `Attention(Q, K, V) = softmax(Q * K^T / sqrt(d)) * V`, where `Q`, `K`, and `V` are the query, key, and value vectors, respectively, and `d` is the dimensionality of the input data.
|
| 36 |
+
|
| 37 |
+
The query-key-value attention mechanism is a core component of self attention. In this mechanism, the query vector represents the context in which the attention is being applied, the key vector represents the input data, and the value vector represents the importance of each input element.
|
| 38 |
+
|
| 39 |
+
```python
|
| 40 |
+
import torch
|
| 41 |
+
import torch.nn as nn
|
| 42 |
+
import torch.nn.functional as F
|
| 43 |
+
|
| 44 |
+
class SelfAttention(nn.Module):
|
| 45 |
+
def __init__(self, embed_dim):
|
| 46 |
+
super(SelfAttention, self).__init__()
|
| 47 |
+
self.query_linear = nn.Linear(embed_dim, embed_dim)
|
| 48 |
+
self.key_linear = nn.Linear(embed_dim, embed_dim)
|
| 49 |
+
self.value_linear = nn.Linear(embed_dim, embed_dim)
|
| 50 |
+
|
| 51 |
+
def forward(self, x):
|
| 52 |
+
Q = self.query_linear(x)
|
| 53 |
+
K = self.key_linear(x)
|
| 54 |
+
V = self.value_linear(x)
|
| 55 |
+
attention_weights = F.softmax(torch.matmul(Q, K.T) / math.sqrt(x.size(-1)), dim=-1)
|
| 56 |
+
return torch.matmul(attention_weights, V)
|
| 57 |
+
```
|
| 58 |
+
This code example demonstrates how to implement self attention in PyTorch, a popular deep learning framework. By using this implementation, developers can easily integrate self attention into their own models.
|
| 59 |
+
|
| 60 |
+
## Applications of Self Attention
|
| 61 |
+
Self attention has numerous applications in various fields.
|
| 62 |
+
In natural language processing tasks, self attention is used to weigh the importance of different words in a sentence, allowing models to capture long-range dependencies and context.
|
| 63 |
+
|
| 64 |
+
* Example in computer vision: self attention can be applied to image classification models to focus on specific regions of the image, as shown in this PyTorch code snippet:
|
| 65 |
+
```python
|
| 66 |
+
import torch
|
| 67 |
+
import torch.nn as nn
|
| 68 |
+
|
| 69 |
+
class SelfAttention(nn.Module):
|
| 70 |
+
def __init__(self, embed_dim):
|
| 71 |
+
super(SelfAttention, self).__init__()
|
| 72 |
+
self.query_linear = nn.Linear(embed_dim, embed_dim)
|
| 73 |
+
self.key_linear = nn.Linear(embed_dim, embed_dim)
|
| 74 |
+
self.value_linear = nn.Linear(embed_dim, embed_dim)
|
| 75 |
+
|
| 76 |
+
def forward(self, x):
|
| 77 |
+
query = self.query_linear(x)
|
| 78 |
+
key = self.key_linear(x)
|
| 79 |
+
value = self.value_linear(x)
|
| 80 |
+
attention_weights = torch.matmul(query, key.T) / math.sqrt(x.size(-1))
|
| 81 |
+
output = torch.matmul(attention_weights, value)
|
| 82 |
+
return output
|
| 83 |
+
```
|
| 84 |
+
Self attention can also be used in recommender systems to model user-item interactions, allowing for more accurate personalized recommendations by considering the relationships between different items.
|
| 85 |
+
|
| 86 |
+
## Common Mistakes in Self Attention
|
| 87 |
+
When working with self attention models, several common pitfalls can hinder performance and lead to suboptimal results.
|
| 88 |
+
|
| 89 |
+
* Overfitting is a significant problem in self attention models, where the model becomes too specialized to the training data and fails to generalize well to new, unseen data. This can be mitigated by using techniques such as dropout and early stopping, which help to prevent the model from becoming too complex.
|
| 90 |
+
|
| 91 |
+
Proper initialization and regularization are also crucial when using self attention. Initialization with random weights can lead to slow convergence or getting stuck in local minima, while regularization techniques like L1 and L2 regularization can help to prevent overfitting by adding a penalty term to the loss function.
|
| 92 |
+
|
| 93 |
+
To debug self attention models, follow these steps:
|
| 94 |
+
* Check the input data for any inconsistencies or missing values
|
| 95 |
+
* Verify that the model is correctly implemented, with attention weights being properly computed and applied
|
| 96 |
+
* Monitor the model's performance on a validation set during training, and adjust hyperparameters as needed to prevent overfitting.
|
| 97 |
+
By being aware of these common mistakes and taking steps to avoid them, developers can build more effective and reliable self attention models.
|
| 98 |
+
|
| 99 |
+
## Best Practices for Self Attention
|
| 100 |
+
To ensure effective use of self attention in your projects, follow this checklist for production readiness:
|
| 101 |
+
* Validate input data quality
|
| 102 |
+
* Test model performance on diverse datasets
|
| 103 |
+
* Monitor training time and memory usage
|
| 104 |
+
Monitoring performance metrics, such as accuracy and loss, is crucial for identifying potential issues.
|
| 105 |
+
For further learning and improvement, refer to the Transformer library documentation and research papers on self attention mechanisms.
|
pyproject.toml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "bloggig-agent"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"fastapi>=0.135.1",
|
| 9 |
+
"google-genai>=1.64.0",
|
| 10 |
+
"huggingface-hub>=1.4.1",
|
| 11 |
+
"langchain>=1.2.10",
|
| 12 |
+
"langchain-aws>=1.2.5",
|
| 13 |
+
"langchain-community>=0.4.1",
|
| 14 |
+
"langchain-core>=1.2.13",
|
| 15 |
+
"langchain-groq>=1.1.2",
|
| 16 |
+
"langchain-tavily>=0.2.17",
|
| 17 |
+
"langgraph>=1.0.8",
|
| 18 |
+
"pillow>=12.1.1",
|
| 19 |
+
"pydantic>=2.12.5",
|
| 20 |
+
"pytest>=9.0.2",
|
| 21 |
+
"streamlit>=1.54.0",
|
| 22 |
+
"python-dotenv>=1.0.1",
|
| 23 |
+
"from-root>=1.1.0",
|
| 24 |
+
"uvicorn>=0.41.0",
|
| 25 |
+
]
|
references/image.png
ADDED
|
Git LFS Details
|
requirements.txt
ADDED
|
Binary file (4.53 kB). View file
|
|
|
results/Attention is All You Need Paper Explained.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Attention is All You Need Paper Explained
|
| 2 |
+
## Introduction to Attention is All You Need
|
| 3 |
+
The concept of attention in deep learning refers to the ability of a model to focus on specific parts of the input data that are relevant for the task at hand.
|
| 4 |
+
* Introduce the concept of attention in deep learning: Attention allows models to selectively concentrate on certain inputs or features, improving performance and efficiency.
|
| 5 |
+
* Explain the limitations of traditional sequence-to-sequence models: Traditional sequence-to-sequence models rely on recurrent neural networks (RNNs) or long short-term memory (LSTM) networks, which can be limited by their sequential processing and fixed-length context.
|
| 6 |
+
* Highlight the key contributions of the Attention is All You Need paper: The Attention is All You Need paper introduced a novel architecture that relies entirely on self-attention mechanisms, eliminating the need for RNNs and LSTMs, and achieving state-of-the-art results in machine translation tasks.
|
| 7 |
+

|
| 8 |
+
## The Transformer Model Architecture
|
| 9 |
+
The Transformer model, introduced in the "Attention is All You Need" paper, revolutionized the field of natural language processing. At its core, the Transformer model consists of an encoder-decoder structure.
|
| 10 |
+
* The encoder takes in a sequence of tokens, such as words or characters, and generates a continuous representation of the input sequence.
|
| 11 |
+
* The decoder then uses this representation to generate the output sequence, one token at a time.
|
| 12 |
+
|
| 13 |
+
Self-attention mechanisms play a crucial role in the Transformer model, allowing it to weigh the importance of different tokens in the input sequence relative to each other. This is particularly useful for tasks such as machine translation, where the context of a word can greatly affect its translation.
|
| 14 |
+
|
| 15 |
+
The Transformer model also relies on positional encoding to preserve the order of the input sequence. Since the self-attention mechanism is permutation-invariant, the model would not be able to distinguish between different token orders without some form of positional information.
|
| 16 |
+
Positional encoding adds a fixed vector to each token's representation, based on its position in the sequence, allowing the model to capture sequential relationships between tokens.
|
| 17 |
+
This combination of self-attention and positional encoding enables the Transformer model to effectively process sequential data, making it a powerful tool for a wide range of NLP tasks.
|
| 18 |
+

|
| 19 |
+
## Applying the Transformer Model to Real-World Examples
|
| 20 |
+
The Transformer model, introduced in the "Attention is All You Need" paper, has been widely adopted in various NLP tasks. To apply this model to real-world examples, it's essential to understand its implementation and applications.
|
| 21 |
+
* A minimal code sketch of a Transformer model implementation can be represented as follows:
|
| 22 |
+
```python
|
| 23 |
+
import torch
|
| 24 |
+
import torch.nn as nn
|
| 25 |
+
import torch.optim as optim
|
| 26 |
+
|
| 27 |
+
class TransformerModel(nn.Module):
|
| 28 |
+
def __init__(self):
|
| 29 |
+
super(TransformerModel, self).__init__()
|
| 30 |
+
self.encoder = nn.TransformerEncoderLayer(d_model=512, nhead=8)
|
| 31 |
+
self.decoder = nn.TransformerDecoderLayer(d_model=512, nhead=8)
|
| 32 |
+
|
| 33 |
+
def forward(self, src, tgt):
|
| 34 |
+
encoder_output = self.encoder(src)
|
| 35 |
+
decoder_output = self.decoder(tgt, encoder_output)
|
| 36 |
+
return decoder_output
|
| 37 |
+
```
|
| 38 |
+
* The Transformer model has been highly effective in machine translation tasks, allowing for parallelization of the decoding process and improving overall translation quality.
|
| 39 |
+
* The Transformer model can also be applied to other NLP tasks, such as text classification, sentiment analysis, and question answering, by modifying the model architecture and training objectives to suit the specific task requirements.
|
| 40 |
+

|
| 41 |
+
## Common Mistakes and Challenges
|
| 42 |
+
When implementing the Transformer model, several common pitfalls can hinder its performance.
|
| 43 |
+
* Proper hyperparameter tuning is crucial, as it directly affects the model's ability to learn and generalize.
|
| 44 |
+
* Training large Transformer models can be challenging due to their complexity and computational requirements, often leading to issues like overfitting or slow training times.
|
| 45 |
+
* Careful evaluation metrics are necessary to accurately assess the model's performance, as misleading metrics can lead to suboptimal results or incorrect conclusions about the model's effectiveness.
|
| 46 |
+
By being aware of these potential issues, developers can take steps to mitigate them and ensure successful implementation of the Transformer model.
|
| 47 |
+

|
| 48 |
+
## Conclusion
|
| 49 |
+
The Attention is All You Need paper made significant contributions to the field of NLP, introducing a novel architecture that relies entirely on self-attention mechanisms.
|
| 50 |
+
* The main contributions of the paper include the proposal of a transformer model that replaces traditional recurrent neural network (RNN) and convolutional neural network (CNN) architectures.
|
| 51 |
+
* The paper's impact on NLP has been substantial, enabling state-of-the-art results in various tasks such as machine translation and text generation.
|
| 52 |
+
* Future directions for research and application include exploring the use of attention mechanisms in other areas of NLP, such as question answering and text summarization, and applying the transformer model to other domains like computer vision.
|
| 53 |
+

|
src/components/image_generation.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from huggingface_hub import InferenceClient
|
| 3 |
+
|
| 4 |
+
class ImageGeneration:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.client = InferenceClient(
|
| 7 |
+
provider="nscale",
|
| 8 |
+
api_key=os.environ["HF_TOKEN"],
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
async def generateImage(self,prompt:str):
|
| 12 |
+
image = self.client.text_to_image(
|
| 13 |
+
prompt,
|
| 14 |
+
model="stabilityai/stable-diffusion-xl-base-1.0",
|
| 15 |
+
)
|
| 16 |
+
return image
|
src/components/taivily_search.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
from typing import List
|
| 4 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 5 |
+
from src.exception import MyException
|
| 6 |
+
from src.utils.asyncHandler import asyncHandler
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Taivily_search:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
@asyncHandler
|
| 14 |
+
async def _tavily_search(self, query: str, max_results: int = 5) -> List[dict]:
|
| 15 |
+
logging.info(f"Using Tavily to search for: {query}")
|
| 16 |
+
try:
|
| 17 |
+
tool = TavilySearchResults(max_results=max_results)
|
| 18 |
+
results = await tool.ainvoke({"query": query})
|
| 19 |
+
|
| 20 |
+
normalized: List[dict] = []
|
| 21 |
+
for r in results or []:
|
| 22 |
+
normalized.append(
|
| 23 |
+
{
|
| 24 |
+
"title": r.get("title") or "",
|
| 25 |
+
"url": r.get("url") or "",
|
| 26 |
+
"snippet": r.get("content") or r.get("snippet") or "",
|
| 27 |
+
"published_at": r.get("published_date") or r.get("published_at"),
|
| 28 |
+
"source": r.get("source"),
|
| 29 |
+
}
|
| 30 |
+
)
|
| 31 |
+
logging.debug(f"Tavily search returned {len(normalized)} results")
|
| 32 |
+
return normalized
|
| 33 |
+
except Exception as e:
|
| 34 |
+
logging.error(f"Error in Tavily_search: {str(e)}")
|
| 35 |
+
raise MyException(e, sys)
|
src/constants/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
FOLDER_PATH_TO_SAVE_MD="results"
|
src/exception/__init__.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
def error_message_detail(error: Exception, error_detail: sys) -> str:
|
| 5 |
+
|
| 6 |
+
_, _, exc_tb = error_detail.exc_info()
|
| 7 |
+
|
| 8 |
+
if exc_tb is not None:
|
| 9 |
+
# Get the file name where the exception occurred
|
| 10 |
+
file_name = exc_tb.tb_frame.f_code.co_filename
|
| 11 |
+
line_number = exc_tb.tb_lineno
|
| 12 |
+
else:
|
| 13 |
+
# Fallback to current frame info if no traceback (e.g. manual raise)
|
| 14 |
+
import inspect
|
| 15 |
+
frame = inspect.currentframe().f_back.f_back # Go back to where MyException was called
|
| 16 |
+
file_name = frame.f_code.co_filename
|
| 17 |
+
line_number = frame.f_lineno
|
| 18 |
+
|
| 19 |
+
# Create a formatted error message string with file name, line number, and the actual error
|
| 20 |
+
error_message = f"Error occurred in python script: [{file_name}] at line number [{line_number}]: {str(error)}"
|
| 21 |
+
|
| 22 |
+
# Log the error for better tracking
|
| 23 |
+
logging.error(error_message)
|
| 24 |
+
|
| 25 |
+
return error_message
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class MyException(Exception):
|
| 29 |
+
def __init__(self, error_message: str, error_detail: sys):
|
| 30 |
+
# Call the base class constructor with the error message
|
| 31 |
+
super().__init__(error_message)
|
| 32 |
+
|
| 33 |
+
# Format the detailed error message using the error_message_detail function
|
| 34 |
+
self.error_message = error_message_detail(error_message, error_detail)
|
| 35 |
+
|
| 36 |
+
def __str__(self) -> str:
|
| 37 |
+
"""
|
| 38 |
+
Returns the string representation of the error message.
|
| 39 |
+
"""
|
| 40 |
+
return self.error_message
|
src/graph/Compile_graph.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import asyncio
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from langgraph.graph import StateGraph,START,END
|
| 5 |
+
from src.models.State_model import State
|
| 6 |
+
from src.graph.nodes.router_node import router_node,route_next
|
| 7 |
+
from src.graph.nodes.reducer_node import reducer_node
|
| 8 |
+
from src.graph.nodes.search_node import research_node
|
| 9 |
+
from src.graph.nodes.orchaster_node import orchestrator_node
|
| 10 |
+
from src.graph.nodes.worker_node import worker_node
|
| 11 |
+
from src.graph.nodes.fanout_node import fanout
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
g = StateGraph(State)
|
| 15 |
+
g.add_node("router", router_node)
|
| 16 |
+
g.add_node("research", research_node)
|
| 17 |
+
g.add_node("orchestrator", orchestrator_node)
|
| 18 |
+
g.add_node("worker", worker_node)
|
| 19 |
+
g.add_node("reducer", reducer_node)
|
| 20 |
+
|
| 21 |
+
g.add_edge(START, "router")
|
| 22 |
+
g.add_conditional_edges("router", route_next, {"research": "research", "orchestrator": "orchestrator"})
|
| 23 |
+
g.add_edge("research", "orchestrator")
|
| 24 |
+
|
| 25 |
+
g.add_conditional_edges("orchestrator", fanout, ["worker"])
|
| 26 |
+
g.add_edge("worker", "reducer")
|
| 27 |
+
g.add_edge("reducer", END)
|
| 28 |
+
|
| 29 |
+
app = g.compile()
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
png_data = app.get_graph().draw_mermaid_png()
|
| 33 |
+
with open("graph.png", "wb") as f:
|
| 34 |
+
f.write(png_data)
|
| 35 |
+
async def run(topic: str):
|
| 36 |
+
logging.info(f"Starting blog generation for topic: {topic}")
|
| 37 |
+
try:
|
| 38 |
+
# out = await app.ainvoke(
|
| 39 |
+
# {
|
| 40 |
+
# "topic": topic,
|
| 41 |
+
# "mode": "",
|
| 42 |
+
# "needs_research": False,
|
| 43 |
+
# "queries": [],
|
| 44 |
+
# "evidence": [],
|
| 45 |
+
# "plan": None,
|
| 46 |
+
# "sections": [],
|
| 47 |
+
# "final": "",
|
| 48 |
+
# }
|
| 49 |
+
# )
|
| 50 |
+
async for step in app.astream(
|
| 51 |
+
{
|
| 52 |
+
"topic": topic,
|
| 53 |
+
"mode": "",
|
| 54 |
+
"needs_research": False,
|
| 55 |
+
"queries": [],
|
| 56 |
+
"evidence": [],
|
| 57 |
+
"plan": None,
|
| 58 |
+
"sections": [],
|
| 59 |
+
"final": "",
|
| 60 |
+
},
|
| 61 |
+
stream_mode="values" # important
|
| 62 |
+
):
|
| 63 |
+
# print("Current Step:", step)
|
| 64 |
+
yield step
|
| 65 |
+
logging.info("Blog generation completed successfully")
|
| 66 |
+
return
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logging.error(f"Error during graph execution: {str(e)}")
|
| 69 |
+
raise
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
from src.logger import *
|
| 73 |
+
out=asyncio.run(run("State of Multimodal LLMs in 2026"))
|
| 74 |
+
print(out)
|
src/graph/graphs/reducer_subgraph.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from langgraph.graph import StateGraph,START,END
|
| 3 |
+
from src.models.ImageSpec_model import State
|
| 4 |
+
from src.graph.nodes.reducer_sub_node import reducer_sub_image,reducer_sub_llm,merge_images_and_md
|
| 5 |
+
app=StateGraph(State)
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
app.add_node("reducer_sub_llm",reducer_sub_llm)
|
| 9 |
+
app.add_node("reducer_sub_image",reducer_sub_image)
|
| 10 |
+
app.add_node("merge_images_and_md",merge_images_and_md)
|
| 11 |
+
|
| 12 |
+
app.add_edge(START,"reducer_sub_llm")
|
| 13 |
+
app.add_edge("reducer_sub_llm","reducer_sub_image")
|
| 14 |
+
app.add_edge("reducer_sub_image","merge_images_and_md")
|
| 15 |
+
app.add_edge("merge_images_and_md",END)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
app=app.compile()
|
| 21 |
+
logging.info("Reducer subgraph compiled successfully")
|
| 22 |
+
|
src/graph/nodes/fanout_node.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from src.models.State_model import State
|
| 3 |
+
from langgraph.types import Send
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def fanout(state: State):
|
| 7 |
+
logging.info("Entering fanout")
|
| 8 |
+
tasks = state["plan"].tasks
|
| 9 |
+
logging.debug(f"Fanning out {len(tasks)} tasks")
|
| 10 |
+
|
| 11 |
+
return [
|
| 12 |
+
Send(
|
| 13 |
+
"worker",
|
| 14 |
+
{
|
| 15 |
+
"task": task.model_dump(),
|
| 16 |
+
"topic": state["topic"],
|
| 17 |
+
"mode": state["mode"],
|
| 18 |
+
"plan": state["plan"].model_dump(),
|
| 19 |
+
"evidence": [e.model_dump() for e in state.get("evidence", [])],
|
| 20 |
+
},
|
| 21 |
+
)
|
| 22 |
+
for task in tasks
|
| 23 |
+
]
|
src/graph/nodes/orchaster_node.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
from src.models.State_model import State
|
| 4 |
+
from src.llm import llm
|
| 5 |
+
from src.exception import MyException
|
| 6 |
+
from src.models.Plan_model import Plan
|
| 7 |
+
from src.prompts import ORCH_SYSTEM
|
| 8 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
| 9 |
+
from src.utils.asyncHandler import asyncHandler
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@asyncHandler
|
| 13 |
+
async def orchestrator_node(state: State) -> dict:
|
| 14 |
+
logging.info("Entering orchestrator_node")
|
| 15 |
+
try:
|
| 16 |
+
planner = llm.with_structured_output(Plan)
|
| 17 |
+
|
| 18 |
+
evidence = state.get("evidence", [])
|
| 19 |
+
mode = state.get("mode", "closed_book")
|
| 20 |
+
logging.debug(f"Mode: {mode}, Evidence count: {len(evidence)}")
|
| 21 |
+
|
| 22 |
+
plan = await planner.ainvoke(
|
| 23 |
+
[
|
| 24 |
+
SystemMessage(content=ORCH_SYSTEM),
|
| 25 |
+
HumanMessage(
|
| 26 |
+
content=(
|
| 27 |
+
f"Topic: {state['topic']}\n"
|
| 28 |
+
f"Mode: {mode}\n\n"
|
| 29 |
+
f"Evidence (ONLY use for fresh claims; may be empty):\n"
|
| 30 |
+
f"{[e.model_dump() for e in evidence][:16]}"
|
| 31 |
+
)
|
| 32 |
+
),
|
| 33 |
+
]
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
logging.info(f"Orchestrator plan created: {plan.blog_title} with {len(plan.tasks)} tasks.")
|
| 37 |
+
return {"plan": plan}
|
| 38 |
+
except Exception as e:
|
| 39 |
+
logging.error(f"Error in orchestrator_node: {str(e)}")
|
| 40 |
+
raise MyException(e, sys)
|
src/graph/nodes/reducer_node.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from src.models.State_model import State
|
| 5 |
+
from src.exception import MyException
|
| 6 |
+
from src.utils.asyncHandler import asyncHandler
|
| 7 |
+
from src.constants import FOLDER_PATH_TO_SAVE_MD
|
| 8 |
+
import os
|
| 9 |
+
from src.graph.graphs.reducer_subgraph import app
|
| 10 |
+
@asyncHandler
|
| 11 |
+
async def reducer_node(state: State) -> dict:
|
| 12 |
+
logging.info("Entering reducer_node")
|
| 13 |
+
try:
|
| 14 |
+
plan = state["plan"]
|
| 15 |
+
|
| 16 |
+
ordered_sections = [md for _, md in sorted(state["sections"], key=lambda x: x[0])]
|
| 17 |
+
body = "\n\n".join(ordered_sections).strip()
|
| 18 |
+
final_md = f"# {plan.blog_title}\n\n{body}\n"
|
| 19 |
+
|
| 20 |
+
filename = f"{plan.blog_title}.md"
|
| 21 |
+
logging.debug(f"Writing final blog to {filename}")
|
| 22 |
+
|
| 23 |
+
logging.info("Starting image generation and merging via subgraph")
|
| 24 |
+
red_f_ob=await app.ainvoke({"prompt_markdown":final_md})
|
| 25 |
+
final_md=red_f_ob["final_md"]
|
| 26 |
+
|
| 27 |
+
logging.debug(f"Final MD size after merging: {len(final_md)} characters")
|
| 28 |
+
os.makedirs(FOLDER_PATH_TO_SAVE_MD,exist_ok=True)
|
| 29 |
+
file_path=os.path.join(FOLDER_PATH_TO_SAVE_MD,filename)
|
| 30 |
+
Path(file_path).write_text(final_md, encoding="utf-8")
|
| 31 |
+
|
| 32 |
+
logging.info(f"Reducer node completed successfully, blog saved to {file_path}")
|
| 33 |
+
return {"final": final_md}
|
| 34 |
+
except Exception as e:
|
| 35 |
+
logging.error(f"Error in reducer_node: {str(e)}")
|
| 36 |
+
raise MyException(e, sys)
|
src/graph/nodes/reducer_sub_node.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from src.models.ImageSpec_model import State,GlobalImagePlan
|
| 3 |
+
from src.utils.asyncHandler import asyncHandler
|
| 4 |
+
from langchain.messages import SystemMessage,HumanMessage
|
| 5 |
+
from src.prompts import IMAGE_PLACEHOLDER_GENERATION
|
| 6 |
+
from src.llm import llm
|
| 7 |
+
from src.exception import MyException
|
| 8 |
+
import sys
|
| 9 |
+
import os
|
| 10 |
+
from src.components.image_generation import ImageGeneration
|
| 11 |
+
@asyncHandler
|
| 12 |
+
async def reducer_sub_llm(state:State)->State:
|
| 13 |
+
logging.info("Calling LLM for image placeholder planning")
|
| 14 |
+
output=await llm.with_structured_output(GlobalImagePlan)\
|
| 15 |
+
.ainvoke(
|
| 16 |
+
[
|
| 17 |
+
SystemMessage(content=IMAGE_PLACEHOLDER_GENERATION),
|
| 18 |
+
HumanMessage(content=state['prompt_markdown'])
|
| 19 |
+
]
|
| 20 |
+
)
|
| 21 |
+
if not output:
|
| 22 |
+
logging.error("LLM failed to return a valid image placeholder plan (output is None)")
|
| 23 |
+
raise MyException("Failed to generate image placeholder plan from LLM", sys)
|
| 24 |
+
|
| 25 |
+
state['output']=output
|
| 26 |
+
logging.info("Successfully generated image placeholder plan")
|
| 27 |
+
return state
|
| 28 |
+
|
| 29 |
+
@asyncHandler
|
| 30 |
+
async def reducer_sub_image(state:State)->State:
|
| 31 |
+
output=state['output']
|
| 32 |
+
image_generator=ImageGeneration()
|
| 33 |
+
if not output:
|
| 34 |
+
raise MyException("output from reducer_sub not found",sys)
|
| 35 |
+
|
| 36 |
+
os.makedirs("images",exist_ok=True)
|
| 37 |
+
|
| 38 |
+
logging.info(f"Starting image generation for {len(output.images)} images")
|
| 39 |
+
for image_con in output.images:
|
| 40 |
+
logging.debug(f"Generating image: {image_con.filename} with prompt: {image_con.prompt[:50]}...")
|
| 41 |
+
image=await image_generator.generateImage(prompt=image_con.prompt)
|
| 42 |
+
image.save(image_con.filename)
|
| 43 |
+
logging.info("All images generated successfully")
|
| 44 |
+
return state
|
| 45 |
+
|
| 46 |
+
@asyncHandler
|
| 47 |
+
async def merge_images_and_md(state: State) -> State:
|
| 48 |
+
output = state["output"]
|
| 49 |
+
md = output.md_with_placeholders
|
| 50 |
+
|
| 51 |
+
logging.info(f"Merging {len(output.images)} images into Markdown")
|
| 52 |
+
for im in output.images:
|
| 53 |
+
alt_text = (
|
| 54 |
+
im.filename.split("/")[-1]
|
| 55 |
+
.replace(".png", "")
|
| 56 |
+
.replace("_", " ")
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
md_image_tag = f""
|
| 60 |
+
md = md.replace(im.placeholder, md_image_tag)
|
| 61 |
+
|
| 62 |
+
state["final_md"] = md
|
| 63 |
+
logging.info("Markdown merging completed")
|
| 64 |
+
return state
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
|
src/graph/nodes/router_node.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
from src.models.RouterDecision_model import RouterDecision
|
| 6 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
| 7 |
+
from src.models.State_model import State
|
| 8 |
+
from src.llm import llm
|
| 9 |
+
from src.prompts import ROUTER_SYSTEM
|
| 10 |
+
from src.exception import MyException
|
| 11 |
+
from src.utils.asyncHandler import asyncHandler
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@asyncHandler
|
| 15 |
+
async def router_node(state: State):
|
| 16 |
+
logging.info("Entering router_node")
|
| 17 |
+
topic = state['topic']
|
| 18 |
+
logging.debug(f"Topic: {topic}")
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
try:
|
| 22 |
+
runnable = llm.with_structured_output(RouterDecision)
|
| 23 |
+
decision = await runnable.ainvoke(
|
| 24 |
+
[
|
| 25 |
+
SystemMessage(content=ROUTER_SYSTEM),
|
| 26 |
+
HumanMessage(content=f"Topic: {topic}")
|
| 27 |
+
]
|
| 28 |
+
)
|
| 29 |
+
if decision:
|
| 30 |
+
logging.info(f"Router decision (structured): needs_research={decision.needs_research}, mode={decision.mode}")
|
| 31 |
+
return {
|
| 32 |
+
"needs_research": decision.needs_research,
|
| 33 |
+
"mode": decision.mode,
|
| 34 |
+
"queries": decision.queries,
|
| 35 |
+
}
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logging.warning(f"Structured output failed: {str(e)}. Attempting manual parse.")
|
| 38 |
+
|
| 39 |
+
raw_response = await llm.ainvoke(
|
| 40 |
+
[
|
| 41 |
+
SystemMessage(content=ROUTER_SYSTEM + "\n\nCRITICAL: You MUST return a valid JSON object. Do not include any text before or after the JSON."),
|
| 42 |
+
HumanMessage(content=f"Topic: {topic}")
|
| 43 |
+
]
|
| 44 |
+
)
|
| 45 |
+
content = raw_response.content
|
| 46 |
+
logging.debug(f"Raw LLM content for fallback: {content}")
|
| 47 |
+
|
| 48 |
+
json_str = ""
|
| 49 |
+
markdown_match = re.search(r'```json\s*(.*?)\s*```', content, re.DOTALL)
|
| 50 |
+
if markdown_match:
|
| 51 |
+
json_str = markdown_match.group(1)
|
| 52 |
+
else:
|
| 53 |
+
start = content.find('{')
|
| 54 |
+
end = content.rfind('}')
|
| 55 |
+
if start != -1 and end != -1:
|
| 56 |
+
json_str = content[start:end+1]
|
| 57 |
+
|
| 58 |
+
if json_str:
|
| 59 |
+
try:
|
| 60 |
+
data = json.loads(json_str)
|
| 61 |
+
except json.JSONDecodeError:
|
| 62 |
+
# Progressive truncation fallback
|
| 63 |
+
success = False
|
| 64 |
+
temp_str = json_str
|
| 65 |
+
while '}' in temp_str:
|
| 66 |
+
try:
|
| 67 |
+
data = json.loads(temp_str)
|
| 68 |
+
success = True
|
| 69 |
+
break
|
| 70 |
+
except json.JSONDecodeError:
|
| 71 |
+
last_brace = temp_str.rfind('}')
|
| 72 |
+
if last_brace == -1: break
|
| 73 |
+
temp_str = temp_str[:last_brace]
|
| 74 |
+
|
| 75 |
+
if not success:
|
| 76 |
+
raise ValueError("Failed to parse JSON even after structural truncation")
|
| 77 |
+
|
| 78 |
+
needs_res = str(data.get("needs_research", "")).lower() in ["true", "1", "yes"]
|
| 79 |
+
|
| 80 |
+
decision = RouterDecision(
|
| 81 |
+
needs_research=needs_res,
|
| 82 |
+
mode=data.get("mode", "open_book"),
|
| 83 |
+
queries=data.get("queries", [])
|
| 84 |
+
)
|
| 85 |
+
logging.info(f"Router decision (manual): needs_research={decision.needs_research}, mode={decision.mode}")
|
| 86 |
+
return {
|
| 87 |
+
"needs_research": decision.needs_research,
|
| 88 |
+
"mode": decision.mode,
|
| 89 |
+
"queries": decision.queries,
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
logging.error("Failed to extract JSON from LLM response")
|
| 93 |
+
raise ValueError("LLM failed to return a valid RouterDecision. Please check prompts or model output.")
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logging.error(f"Error in router_node: {str(e)}")
|
| 97 |
+
raise
|
| 98 |
+
|
| 99 |
+
def route_next(state: State) -> str:
|
| 100 |
+
# Use .get() to avoid KeyError if node failed
|
| 101 |
+
needs_research = state.get("needs_research", False)
|
| 102 |
+
logging.info(f"Routing next based on research need: {needs_research}")
|
| 103 |
+
return "research" if needs_research else "orchestrator"
|