vqa-backend / draft_generator.py
Deva8's picture
Deploy VQA Space with model downloader
bb8f662
import subprocess
import os
mermaid_code = """
graph TD
%% Styling
classDef default fill:#1A1A1A,stroke:#444,stroke-width:2px,color:#FFF,rx:8px,ry:8px,font-family:arial;
classDef mobile fill:#003366,stroke:#0055AA,stroke-width:2px,color:#FFF;
classDef preproc fill:#333333,stroke:#555,stroke-width:2px,color:#FFF;
classDef model fill:#4B0082,stroke:#8A2BE2,stroke-width:2px,color:#FFF;
classDef condition fill:#2B2B2B,stroke:#F4A460,stroke-width:2px,color:#FFF,shape:rhombus;
classDef external fill:#004d00,stroke:#009900,stroke-width:2px,color:#FFF;
classDef final fill:#660000,stroke:#CC0000,stroke-width:2px,color:#FFF;
%% Nodes
UserApp[πŸ“± Mobile App]:::mobile
ImgUpload[πŸ–ΌοΈ Image]:::preproc
Question[⌨️ Question Text]:::preproc
PIL[🐍 PIL Preprocessing<br/>RGB conversion]:::preproc
CLIP[πŸ‘οΈ OpenAI CLIP ViT-B/32<br/>Image Features 512-dim]:::model
GPT2[πŸ€— DistilGPT-2<br/>Tokenized Question]:::model
Route1{Question<br/>spatial?}:::condition
Spatial[πŸ“ Spatial VQA Model<br/>8-head attention]:::model
Base[🧠 Base VQA Model<br/>General VQA]:::model
Decoder[πŸ€— GPT-2 Decoder<br/>vocab decode]:::model
NeuralAns[πŸ’¬ Neural Answer]:::final
Route2{Knowledge<br/>question?}:::condition
ObjDet[πŸ‘οΈ CLIP Object Detector<br/>Top-3 objects]:::model
Wikidata[🌍 Wikidata SPARQL<br/>P31, P186, P366]:::external
GroqV[⚑ Groq Llama-3.3<br/>Verbalizer]:::external
KGAns[🧩 KG Enhancement]:::final
FastAPI[πŸš€ FastAPI]:::preproc
GroqA[⚑ Groq Llama-3.3<br/>Accessibility]:::external
Audio[πŸ”Š 2-sentence description]:::final
%% Edges
UserApp -- "Image uploaded" --> ImgUpload
UserApp -- "Question typed" --> Question
ImgUpload --> PIL
PIL --> CLIP
Question --> GPT2
CLIP & GPT2 --> Route1
Route1 -- "YES" --> Spatial
Route1 -- "NO" --> Base
Spatial & Base -- "Beam search (width=5)" --> Decoder
Decoder --> NeuralAns
CLIP -- "Anchor similarity" --> Route2
Route2 -- "YES" --> ObjDet
ObjDet -- "Detected objects" --> Wikidata
Wikidata -- "Structured facts" --> GroqV
GroqV --> KGAns
FastAPI -- "Narration request" --> GroqA
GroqA --> Audio
NeuralAns & KGAns & Audio -- "JSON output" --> FastAPI
FastAPI --> UserApp
"""
file_path = r"C:\Users\rdeva\Downloads\vqa_coes\architecture_draft.mmd"
with open(file_path, "w", encoding="utf-8") as f:
f.write(mermaid_code)
print(f"Mermaid file saved to {file_path}")
# Note: In a real environment, we would use mermaid-cli (mmdc) to convert this to SVG/PNG.
# Since it might not be installed globally, we will just provide the mermaid file and
# instructions, or generate an HTML wrapper that renders it in browser.
html_path = r"C:\Users\rdeva\Downloads\vqa_coes\architecture_draft.html"
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>VQA Architecture Draft</title>
<script type="module">
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.esm.min.mjs';
mermaid.initialize({{ startOnLoad: true, theme: 'dark', flowchart: {{ curve: 'basis' }} }});
</script>
<style>
body {{ background-color: #0D1117; color: white; font-family: sans-serif; display: flex; justify-content: center; padding: 20px; }}
.mermaid {{ background-color: #161B22; padding: 20px; border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.5); }}
</style>
</head>
<body>
<div class="mermaid">
{mermaid_code}
</div>
</body>
</html>
"""
with open(html_path, "w", encoding="utf-8") as f:
f.write(html_content)
print(f"HTML viewer saved to {html_path}")