import subprocess import os mermaid_code = """ graph TD %% Styling classDef default fill:#1A1A1A,stroke:#444,stroke-width:2px,color:#FFF,rx:8px,ry:8px,font-family:arial; classDef mobile fill:#003366,stroke:#0055AA,stroke-width:2px,color:#FFF; classDef preproc fill:#333333,stroke:#555,stroke-width:2px,color:#FFF; classDef model fill:#4B0082,stroke:#8A2BE2,stroke-width:2px,color:#FFF; classDef condition fill:#2B2B2B,stroke:#F4A460,stroke-width:2px,color:#FFF,shape:rhombus; classDef external fill:#004d00,stroke:#009900,stroke-width:2px,color:#FFF; classDef final fill:#660000,stroke:#CC0000,stroke-width:2px,color:#FFF; %% Nodes UserApp[📱 Mobile App]:::mobile ImgUpload[🖼️ Image]:::preproc Question[⌨️ Question Text]:::preproc PIL[🐍 PIL Preprocessing
RGB conversion]:::preproc CLIP[👁️ OpenAI CLIP ViT-B/32
Image Features 512-dim]:::model GPT2[🤗 DistilGPT-2
Tokenized Question]:::model Route1{Question
spatial?}:::condition Spatial[📐 Spatial VQA Model
8-head attention]:::model Base[🧠 Base VQA Model
General VQA]:::model Decoder[🤗 GPT-2 Decoder
vocab decode]:::model NeuralAns[💬 Neural Answer]:::final Route2{Knowledge
question?}:::condition ObjDet[👁️ CLIP Object Detector
Top-3 objects]:::model Wikidata[🌍 Wikidata SPARQL
P31, P186, P366]:::external GroqV[⚡ Groq Llama-3.3
Verbalizer]:::external KGAns[🧩 KG Enhancement]:::final FastAPI[🚀 FastAPI]:::preproc GroqA[⚡ Groq Llama-3.3
Accessibility]:::external Audio[🔊 2-sentence description]:::final %% Edges UserApp -- "Image uploaded" --> ImgUpload UserApp -- "Question typed" --> Question ImgUpload --> PIL PIL --> CLIP Question --> GPT2 CLIP & GPT2 --> Route1 Route1 -- "YES" --> Spatial Route1 -- "NO" --> Base Spatial & Base -- "Beam search (width=5)" --> Decoder Decoder --> NeuralAns CLIP -- "Anchor similarity" --> Route2 Route2 -- "YES" --> ObjDet ObjDet -- "Detected objects" --> Wikidata Wikidata -- "Structured facts" --> GroqV GroqV --> KGAns FastAPI -- "Narration request" --> GroqA GroqA --> Audio NeuralAns & KGAns & Audio -- "JSON output" --> FastAPI FastAPI --> UserApp """ file_path = r"C:\Users\rdeva\Downloads\vqa_coes\architecture_draft.mmd" with open(file_path, "w", encoding="utf-8") as f: f.write(mermaid_code) print(f"Mermaid file saved to {file_path}") # Note: In a real environment, we would use mermaid-cli (mmdc) to convert this to SVG/PNG. # Since it might not be installed globally, we will just provide the mermaid file and # instructions, or generate an HTML wrapper that renders it in browser. html_path = r"C:\Users\rdeva\Downloads\vqa_coes\architecture_draft.html" html_content = f""" VQA Architecture Draft
{mermaid_code}
""" with open(html_path, "w", encoding="utf-8") as f: f.write(html_content) print(f"HTML viewer saved to {html_path}")