File size: 3,770 Bytes
bb8f662
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import subprocess
import os

mermaid_code = """
graph TD
    %% Styling
    classDef default fill:#1A1A1A,stroke:#444,stroke-width:2px,color:#FFF,rx:8px,ry:8px,font-family:arial;
    classDef mobile fill:#003366,stroke:#0055AA,stroke-width:2px,color:#FFF;
    classDef preproc fill:#333333,stroke:#555,stroke-width:2px,color:#FFF;
    classDef model fill:#4B0082,stroke:#8A2BE2,stroke-width:2px,color:#FFF;
    classDef condition fill:#2B2B2B,stroke:#F4A460,stroke-width:2px,color:#FFF,shape:rhombus;
    classDef external fill:#004d00,stroke:#009900,stroke-width:2px,color:#FFF;
    classDef final fill:#660000,stroke:#CC0000,stroke-width:2px,color:#FFF;

    %% Nodes
    UserApp[πŸ“± Mobile App]:::mobile
    
    ImgUpload[πŸ–ΌοΈ Image]:::preproc
    Question[⌨️ Question Text]:::preproc
    
    PIL[🐍 PIL Preprocessing<br/>RGB conversion]:::preproc
    
    CLIP[πŸ‘οΈ OpenAI CLIP ViT-B/32<br/>Image Features 512-dim]:::model
    GPT2[πŸ€— DistilGPT-2<br/>Tokenized Question]:::model
    
    Route1{Question<br/>spatial?}:::condition
    
    Spatial[πŸ“ Spatial VQA Model<br/>8-head attention]:::model
    Base[🧠 Base VQA Model<br/>General VQA]:::model
    
    Decoder[πŸ€— GPT-2 Decoder<br/>vocab decode]:::model
    NeuralAns[πŸ’¬ Neural Answer]:::final
    
    Route2{Knowledge<br/>question?}:::condition
    
    ObjDet[πŸ‘οΈ CLIP Object Detector<br/>Top-3 objects]:::model
    Wikidata[🌍 Wikidata SPARQL<br/>P31, P186, P366]:::external
    GroqV[⚑ Groq Llama-3.3<br/>Verbalizer]:::external
    KGAns[🧩 KG Enhancement]:::final
    
    FastAPI[πŸš€ FastAPI]:::preproc
    GroqA[⚑ Groq Llama-3.3<br/>Accessibility]:::external
    Audio[πŸ”Š 2-sentence description]:::final

    %% Edges
    UserApp -- "Image uploaded" --> ImgUpload
    UserApp -- "Question typed" --> Question
    
    ImgUpload --> PIL
    PIL --> CLIP
    Question --> GPT2
    
    CLIP & GPT2 --> Route1
    
    Route1 -- "YES" --> Spatial
    Route1 -- "NO" --> Base
    
    Spatial & Base -- "Beam search (width=5)" --> Decoder
    Decoder --> NeuralAns
    
    CLIP -- "Anchor similarity" --> Route2
    
    Route2 -- "YES" --> ObjDet
    ObjDet -- "Detected objects" --> Wikidata
    Wikidata -- "Structured facts" --> GroqV
    GroqV --> KGAns
    
    FastAPI -- "Narration request" --> GroqA
    GroqA --> Audio
    
    NeuralAns & KGAns & Audio -- "JSON output" --> FastAPI
    FastAPI --> UserApp
"""

file_path = r"C:\Users\rdeva\Downloads\vqa_coes\architecture_draft.mmd"

with open(file_path, "w", encoding="utf-8") as f:
    f.write(mermaid_code)

print(f"Mermaid file saved to {file_path}")

# Note: In a real environment, we would use mermaid-cli (mmdc) to convert this to SVG/PNG.
# Since it might not be installed globally, we will just provide the mermaid file and 
# instructions, or generate an HTML wrapper that renders it in browser.

html_path = r"C:\Users\rdeva\Downloads\vqa_coes\architecture_draft.html"
html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <title>VQA Architecture Draft</title>
    <script type="module">
      import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.esm.min.mjs';
      mermaid.initialize({{ startOnLoad: true, theme: 'dark', flowchart: {{ curve: 'basis' }} }});
    </script>
    <style>
        body {{ background-color: #0D1117; color: white; font-family: sans-serif; display: flex; justify-content: center; padding: 20px; }}
        .mermaid {{ background-color: #161B22; padding: 20px; border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.5); }}
    </style>
</head>
<body>
    <div class="mermaid">
{mermaid_code}
    </div>
</body>
</html>
"""

with open(html_path, "w", encoding="utf-8") as f:
    f.write(html_content)

print(f"HTML viewer saved to {html_path}")