vqa-backend / enterprise_architecture.drawio
Deva8's picture
Deploy VQA Space with model downloader
bb8f662
<?xml version="1.0" encoding="UTF-8"?>
<mxGraphModel dx="1800" dy="1100" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1920" pageHeight="1080" math="0" shadow="1">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="bg" value="" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#0D1117;strokeColor=none;" vertex="1" parent="1">
<mxGeometry x="-20" y="-20" width="1960" height="1120" as="geometry" />
</mxCell>
<mxCell id="title_bg" value="" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#161B22;strokeColor=#30363D;" vertex="1" parent="1">
<mxGeometry x="20" y="20" width="1880" height="70" as="geometry" />
</mxCell>
<mxCell id="title" value="&lt;font style=&quot;font-size:24px;font-weight:bold;&quot; color=&quot;#58A6FF&quot;&gt;Semantic Neuro-Symbolic VQA -- Enterprise Architecture&lt;/font&gt;&lt;br&gt;&lt;font style=&quot;font-size:11px;&quot; color=&quot;#8B949E&quot;&gt;React Native Mobile UI | FastAPI (Uvicorn) | PyTorch | OpenAI CLIP | Wikidata SPARQL | Groq LLM (Llama-3.3-70B-Versatile)&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;" vertex="1" parent="1">
<mxGeometry x="20" y="20" width="1880" height="70" as="geometry" />
</mxCell>
<!-- ===================== CLIENT LAYER ===================== -->
<mxCell id="client_layer" value="&lt;font style=&quot;font-size:14px;font-weight:bold;&quot; color=&quot;#79C0FF&quot;&gt;[1] CLIENT LAYER&lt;/font&gt;" style="swimlane;startSize=30;fillColor=#161B22;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontStyle=1;fontSize=13;rounded=10;" vertex="1" parent="1">
<mxGeometry x="20" y="110" width="350" height="870" as="geometry" />
</mxCell>
<mxCell id="mobile_label" value="[React Native / Expo]" style="text;html=1;fontSize=20;align=center;fillColor=none;strokeColor=none;fontColor=#58A6FF;" vertex="1" parent="client_layer">
<mxGeometry x="80" y="38" width="190" height="35" as="geometry" />
</mxCell>
<mxCell id="mobile_app" value="&lt;b&gt;React Native Mobile App&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Expo Framework | iOS and Android&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#1C2128;strokeColor=#30363D;fontColor=#FFFFFF;fontSize=12;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="85" width="290" height="60" as="geometry" />
</mxCell>
<mxCell id="screen_login" value="&lt;b&gt;LoginScreen.js&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Auth | Session Management&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#0D2137;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="165" width="290" height="50" as="geometry" />
</mxCell>
<mxCell id="screen_camera" value="&lt;b&gt;CameraScreen.js&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Image Capture | Upload&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#0D2137;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="225" width="290" height="50" as="geometry" />
</mxCell>
<mxCell id="screen_home" value="&lt;b&gt;HomeScreen.js&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Main Dashboard | History&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#0D2137;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="285" width="290" height="50" as="geometry" />
</mxCell>
<mxCell id="screen_qa" value="&lt;b&gt;QuestionScreen.js&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Q and A Interface | Conversation&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#0D2137;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="345" width="290" height="50" as="geometry" />
</mxCell>
<mxCell id="screen_result" value="&lt;b&gt;ResultScreen.js&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Answer Display | KG Enhancement&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#0D2137;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="405" width="290" height="50" as="geometry" />
</mxCell>
<mxCell id="api_js" value="&lt;b&gt;api.js (API Service)&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Axios | FormData | Session Tokens&lt;br&gt;REST calls to FastAPI backend&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#1A2820;strokeColor=#3FB950;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="478" width="290" height="70" as="geometry" />
</mxCell>
<mxCell id="ep1" value="POST /api/answer" style="rounded=5;whiteSpace=wrap;html=1;fillColor=#0D1117;strokeColor=#3FB950;fontColor=#3FB950;fontSize=10;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="565" width="135" height="30" as="geometry" />
</mxCell>
<mxCell id="ep2" value="POST /api/conversation/answer" style="rounded=5;whiteSpace=wrap;html=1;fillColor=#0D1117;strokeColor=#3FB950;fontColor=#3FB950;fontSize=10;" vertex="1" parent="client_layer">
<mxGeometry x="177" y="565" width="143" height="30" as="geometry" />
</mxCell>
<mxCell id="ep3" value="GET /api/models/info" style="rounded=5;whiteSpace=wrap;html=1;fillColor=#0D1117;strokeColor=#3FB950;fontColor=#3FB950;fontSize=10;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="605" width="135" height="30" as="geometry" />
</mxCell>
<mxCell id="ep4" value="GET/DELETE /api/conversation/{id}" style="rounded=5;whiteSpace=wrap;html=1;fillColor=#0D1117;strokeColor=#3FB950;fontColor=#3FB950;fontSize=10;" vertex="1" parent="client_layer">
<mxGeometry x="177" y="605" width="143" height="30" as="geometry" />
</mxCell>
<mxCell id="client_tech" value="&lt;b&gt;Tech:&lt;/b&gt; Expo | React Navigation | Axios | FormData&lt;br&gt;&lt;b&gt;Auth:&lt;/b&gt; Session tokens | Context API" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#161B22;strokeColor=#21262D;fontColor=#8B949E;fontSize=10;" vertex="1" parent="client_layer">
<mxGeometry x="30" y="660" width="290" height="55" as="geometry" />
</mxCell>
<!-- ===================== API GATEWAY LAYER ===================== -->
<mxCell id="api_layer" value="&lt;font style=&quot;font-size:14px;font-weight:bold;&quot; color=&quot;#56D364&quot;&gt;[2] API GATEWAY LAYER&lt;/font&gt;" style="swimlane;startSize=30;fillColor=#161B22;strokeColor=#3FB950;fontColor=#FFFFFF;fontStyle=1;fontSize=13;rounded=10;" vertex="1" parent="1">
<mxGeometry x="400" y="110" width="360" height="870" as="geometry" />
</mxCell>
<mxCell id="apigw_label" value="[FastAPI + Uvicorn]" style="text;html=1;fontSize=20;align=center;fillColor=none;strokeColor=none;fontColor=#3FB950;" vertex="1" parent="api_layer">
<mxGeometry x="85" y="38" width="190" height="35" as="geometry" />
</mxCell>
<mxCell id="fastapi_main" value="&lt;b&gt;FastAPI Backend (Uvicorn)&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;backend_api.py&lt;br&gt;Host: 0.0.0.0 | Port: 8000&lt;br&gt;CORS enabled | Auto-reload dev mode&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#162415;strokeColor=#3FB950;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="88" width="320" height="80" as="geometry" />
</mxCell>
<mxCell id="startup" value="&lt;b&gt;Startup Event&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Load checkpoints | Init models&lt;br&gt;Init Groq service | Health check&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#1C2128;strokeColor=#30363D;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="188" width="320" height="60" as="geometry" />
</mxCell>
<mxCell id="ep_health" value="GET /health&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Model status check&lt;/font&gt;" style="rounded=6;whiteSpace=wrap;html=1;fillColor=#0D2137;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="268" width="145" height="50" as="geometry" />
</mxCell>
<mxCell id="ep_root" value="GET /&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;API info and docs&lt;/font&gt;" style="rounded=6;whiteSpace=wrap;html=1;fillColor=#0D2137;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="175" y="268" width="145" height="50" as="geometry" />
</mxCell>
<mxCell id="ep_answer" value="POST /api/answer&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;image + question -&gt; JSON answer&lt;/font&gt;" style="rounded=6;whiteSpace=wrap;html=1;fillColor=#132D0E;strokeColor=#3FB950;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="328" width="300" height="50" as="geometry" />
</mxCell>
<mxCell id="ep_conv" value="POST /api/conversation/answer&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Multi-turn | session_id | pronouns&lt;/font&gt;" style="rounded=6;whiteSpace=wrap;html=1;fillColor=#132D0E;strokeColor=#3FB950;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="388" width="300" height="50" as="geometry" />
</mxCell>
<mxCell id="ep_hist" value="GET /api/conversation/{id}/history" style="rounded=6;whiteSpace=wrap;html=1;fillColor=#1C2128;strokeColor=#30363D;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="448" width="300" height="38" as="geometry" />
</mxCell>
<mxCell id="ep_del" value="DELETE /api/conversation/{id}" style="rounded=6;whiteSpace=wrap;html=1;fillColor=#1C2128;strokeColor=#30363D;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="496" width="300" height="38" as="geometry" />
</mxCell>
<mxCell id="ep_models" value="GET /api/models/info" style="rounded=6;whiteSpace=wrap;html=1;fillColor=#1C2128;strokeColor=#30363D;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="544" width="300" height="38" as="geometry" />
</mxCell>
<mxCell id="middleware" value="&lt;b&gt;Middleware&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;CORS | Error handling | HTTP 400/503/500&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#1C2128;strokeColor=#30363D;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="600" width="320" height="50" as="geometry" />
</mxCell>
<mxCell id="conv_manager" value="&lt;b&gt;ConversationManager&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;conversation_manager.py&lt;br&gt;Session 30min timeout | Pronoun resolution&lt;br&gt;History storage | Context retrieval&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#1A1A2E;strokeColor=#7B2FBE;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="api_layer">
<mxGeometry x="20" y="670" width="320" height="80" as="geometry" />
</mxCell>
<!-- ===================== ML INFERENCE ENGINE ===================== -->
<mxCell id="ml_layer" value="&lt;font style=&quot;font-size:14px;font-weight:bold;&quot; color=&quot;#FFA657&quot;&gt;[3] ML INFERENCE ENGINE&lt;/font&gt;" style="swimlane;startSize=30;fillColor=#161B22;strokeColor=#D29922;fontColor=#FFFFFF;fontStyle=1;fontSize=13;rounded=10;" vertex="1" parent="1">
<mxGeometry x="800" y="110" width="380" height="870" as="geometry" />
</mxCell>
<mxCell id="ml_label" value="[PyTorch + CLIP + DistilGPT-2]" style="text;html=1;fontSize=16;align=center;fillColor=none;strokeColor=none;fontColor=#D29922;" vertex="1" parent="ml_layer">
<mxGeometry x="40" y="38" width="300" height="35" as="geometry" />
</mxCell>
<mxCell id="ensemble_vqa" value="&lt;b&gt;ProductionEnsembleVQA&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;ensemble_vqa_app.py&lt;br&gt;Device: CUDA / CPU auto-detect&lt;br&gt;Beam Search width=5 | Top-K Decoding&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#2D2000;strokeColor=#D29922;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ml_layer">
<mxGeometry x="20" y="88" width="340" height="80" as="geometry" />
</mxCell>
<mxCell id="router" value="&lt;b&gt;Question Router (Keyword Classifier)&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;is_spatial_question()&lt;br&gt;Spatial keywords: left, right, above, below, next to...&lt;br&gt;Routes to Base or Spatial model&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#1E1E00;strokeColor=#D29922;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ml_layer">
<mxGeometry x="20" y="188" width="340" height="75" as="geometry" />
</mxCell>
<mxCell id="base_model_box" value="&lt;b&gt;Base VQA Model&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;model.py | VQAModel&lt;br&gt;CLIP ViT-B/32 + GPT-2&lt;br&gt;vqa_checkpoint.pt (731 MB)&lt;br&gt;hidden=512 | layers=2 | acc~50%&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#162415;strokeColor=#3FB950;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ml_layer">
<mxGeometry x="20" y="285" width="158" height="120" as="geometry" />
</mxCell>
<mxCell id="spatial_model_box" value="&lt;b&gt;Spatial VQA Model&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;model_spatial.py&lt;br&gt;SpatialAdapter + 8-head attn&lt;br&gt;vqa_spatial_checkpoint.pt (739 MB)&lt;br&gt;dropout=0.3 | acc~40%&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#0D2137;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ml_layer">
<mxGeometry x="192" y="285" width="168" height="120" as="geometry" />
</mxCell>
<mxCell id="gpt2" value="&lt;b&gt;DistilGPT-2 Tokenizer&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Text tokenization | Vocab&lt;br&gt;BOS / EOS / PAD tokens | Beam search decoding&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#1C2128;strokeColor=#30363D;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ml_layer">
<mxGeometry x="20" y="425" width="340" height="65" as="geometry" />
</mxCell>
<mxCell id="clip_box" value="&lt;b&gt;OpenAI CLIP (ViT-B/32)&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;Image encoder + Text encoder&lt;br&gt;Zero-shot object detection (80+ nouns)&lt;br&gt;Question routing: visual vs knowledge&lt;br&gt;Anchor similarity | Softmax x10&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#1A1A0D;strokeColor=#E3B341;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ml_layer">
<mxGeometry x="20" y="508" width="340" height="90" as="geometry" />
</mxCell>
<mxCell id="img_proc" value="&lt;b&gt;Image Preprocessor (PIL)&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;JPEG/PNG -&gt; RGB | CLIP preprocess | Tensor&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#1C2128;strokeColor=#30363D;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ml_layer">
<mxGeometry x="20" y="615" width="340" height="55" as="geometry" />
</mxCell>
<mxCell id="pt_files" value="&lt;b&gt;PyTorch Checkpoints (Local Disk)&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;vqa_checkpoint.pt (731 MB)&lt;br&gt;vqa_spatial_checkpoint.pt (739 MB)&lt;br&gt;state_dict | vocab | tokenizer config&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#251A00;strokeColor=#D29922;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ml_layer">
<mxGeometry x="20" y="688" width="340" height="80" as="geometry" />
</mxCell>
<mxCell id="gpu_badge" value="GPU: CUDA | ~4 GB VRAM | 2x Model Parallel loading" style="rounded=5;whiteSpace=wrap;html=1;fillColor=#0D1117;strokeColor=#D29922;fontColor=#E3B341;fontSize=10;" vertex="1" parent="ml_layer">
<mxGeometry x="20" y="785" width="340" height="28" as="geometry" />
</mxCell>
<!-- ===================== NEURO-SYMBOLIC PIPELINE ===================== -->
<mxCell id="ns_layer" value="&lt;font style=&quot;font-size:14px;font-weight:bold;&quot; color=&quot;#BC8CFF&quot;&gt;[4] NEURO-SYMBOLIC PIPELINE&lt;/font&gt;" style="swimlane;startSize=30;fillColor=#161B22;strokeColor=#8957E5;fontColor=#FFFFFF;fontStyle=1;fontSize=13;rounded=10;" vertex="1" parent="1">
<mxGeometry x="1220" y="110" width="370" height="870" as="geometry" />
</mxCell>
<mxCell id="ns_label" value="[CLIP + Wikidata SPARQL + Groq LLM]" style="text;html=1;fontSize=14;align=center;fillColor=none;strokeColor=none;fontColor=#8957E5;" vertex="1" parent="ns_layer">
<mxGeometry x="15" y="38" width="340" height="35" as="geometry" />
</mxCell>
<mxCell id="ns_main" value="&lt;b&gt;SemanticNeurosymbolicVQA&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;semantic_neurosymbolic_vqa.py&lt;br&gt;Neural -&gt; Symbolic -&gt; Verbalize pipeline&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#1A0D2E;strokeColor=#8957E5;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ns_layer">
<mxGeometry x="20" y="88" width="330" height="65" as="geometry" />
</mxCell>
<mxCell id="ns_step1" value="&lt;b&gt;Step 1: CLIP Routing&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;should_use_neurosymbolic()&lt;br&gt;VISUAL anchor vs KNOWLEDGE anchor&lt;br&gt;Temperature softmax x10&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#0D1A30;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ns_layer">
<mxGeometry x="20" y="173" width="330" height="78" as="geometry" />
</mxCell>
<mxCell id="route_decision" value="VISUAL question?&lt;br&gt;-&gt; Neural VQA only&lt;br&gt;KNOWLEDGE question?&lt;br&gt;-&gt; Neuro-Symbolic" style="rhombus;whiteSpace=wrap;html=1;fillColor=#21262D;strokeColor=#8957E5;fontColor=#FFFFFF;fontSize=10;" vertex="1" parent="ns_layer">
<mxGeometry x="75" y="268" width="220" height="88" as="geometry" />
</mxCell>
<mxCell id="ns_step2" value="&lt;b&gt;Step 2: CLIP Object Detection&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;detect_objects_with_clip()&lt;br&gt;80+ noun vocabulary | Top-3 objects&lt;br&gt;Cosine similarity | prompt: &apos;a photo of a {label}&apos;&lt;/font&gt;" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#0D1A30;strokeColor=#1F6FEB;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ns_layer">
<mxGeometry x="20" y="375" width="330" height="80" as="geometry" />
</mxCell>
<mxCell id="wikidata_box" value="&lt;b&gt;Step 3: WikidataKnowledgeBase&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;SPARQL: query.wikidata.org&lt;br&gt;P31 (category) | P186 (material) | P366 (uses)&lt;br&gt;P2101 (melting pt) | P2054 (density)&lt;br&gt;lru_cache(500) | timeout=10s&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#0D2E2E;strokeColor=#2EA8A8;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ns_layer">
<mxGeometry x="20" y="473" width="330" height="100" as="geometry" />
</mxCell>
<mxCell id="groq_box" value="&lt;b&gt;Step 4: Groq LLM Verbalizer&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;WikidataGroqAnswerer&lt;br&gt;Model: llama-3.3-70b-versatile&lt;br&gt;Temp=0.1 | max_tokens=180 | top_p=0.9&lt;br&gt;Answers ONLY from Wikidata facts&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#1A2B1A;strokeColor=#F85149;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ns_layer">
<mxGeometry x="20" y="592" width="330" height="95" as="geometry" />
</mxCell>
<mxCell id="groq_access" value="&lt;b&gt;Groq Accessibility Service&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;groq_service.py | GroqDescriptionService&lt;br&gt;2-sentence narrations for blind users&lt;br&gt;Temp=0.7 | max_tokens=150&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#1A2B1A;strokeColor=#F85149;fontColor=#FFFFFF;fontSize=11;" vertex="1" parent="ns_layer">
<mxGeometry x="20" y="706" width="330" height="85" as="geometry" />
</mxCell>
<mxCell id="groq_badge" value="Groq API | Llama-3.3-70B-Versatile | GROQ_API_KEY env var" style="rounded=5;whiteSpace=wrap;html=1;fillColor=#0D1117;strokeColor=#F85149;fontColor=#F85149;fontSize=10;" vertex="1" parent="ns_layer">
<mxGeometry x="20" y="808" width="330" height="28" as="geometry" />
</mxCell>
<!-- ===================== EXTERNAL SERVICES ===================== -->
<mxCell id="wikidata_ext" value="&lt;b&gt;Wikidata SPARQL API&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;query.wikidata.org/sparql&lt;br&gt;wikidata.org/w/api.php&lt;br&gt;Entity lookup | Property values&lt;br&gt;Free and Open Knowledge Base&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#0A2525;strokeColor=#2EA8A8;fontColor=#FFFFFF;fontSize=12;" vertex="1" parent="1">
<mxGeometry x="1640" y="200" width="250" height="130" as="geometry" />
</mxCell>
<mxCell id="groq_cloud" value="&lt;b&gt;Groq Cloud API&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;api.groq.com&lt;br&gt;Llama-3.3-70B-Versatile&lt;br&gt;Ultra-low latency inference&lt;br&gt;chat.completions endpoint&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#1A0A0A;strokeColor=#F85149;fontColor=#FFFFFF;fontSize=12;" vertex="1" parent="1">
<mxGeometry x="1640" y="385" width="250" height="130" as="geometry" />
</mxCell>
<mxCell id="hf_clip" value="&lt;b&gt;OpenAI / HuggingFace Hub&lt;/b&gt;&lt;br&gt;&lt;font color=&quot;#8B949E&quot;&gt;CLIP ViT-B/32 weights&lt;br&gt;GPT-2 / DistilGPT-2 tokenizer&lt;br&gt;Cached locally after first download&lt;/font&gt;" style="rounded=10;whiteSpace=wrap;html=1;fillColor=#1A1000;strokeColor=#E3B341;fontColor=#FFFFFF;fontSize=12;" vertex="1" parent="1">
<mxGeometry x="1640" y="565" width="250" height="105" as="geometry" />
</mxCell>
<!-- ===================== LEGEND ===================== -->
<mxCell id="legend" value="&lt;b&gt;LEGEND&lt;/b&gt;&lt;br&gt;[1] Blue = Client Layer (React Native)&lt;br&gt;[2] Green = API Gateway (FastAPI)&lt;br&gt;[3] Orange = ML Inference (PyTorch)&lt;br&gt;[4] Purple = Neuro-Symbolic Pipeline&lt;br&gt;Solid arrow = Primary data flow&lt;br&gt;Dashed arrow = Conditional / supplement&lt;br&gt;Animated = Live request flow" style="rounded=8;whiteSpace=wrap;html=1;fillColor=#161B22;strokeColor=#30363D;fontColor=#8B949E;fontSize=11;align=left;" vertex="1" parent="1">
<mxGeometry x="1640" y="710" width="250" height="155" as="geometry" />
</mxCell>
<!-- ===================== EDGES / ANIMATED FLOWS ===================== -->
<!-- 1. api.js -> FastAPI (HTTP REST) -->
<mxCell id="flow_1" value="&lt;font color=&quot;#3FB950&quot;&gt;HTTP REST (JSON/FormData)&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;orthogonalLoop=1;jettySize=auto;strokeColor=#3FB950;strokeWidth=3;fontSize=10;fontColor=#3FB950;animation=1;endArrow=block;endFill=1;" edge="1" parent="1" source="api_js" target="fastapi_main">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 2. FastAPI -> Ensemble VQA -->
<mxCell id="flow_2" value="&lt;font color=&quot;#FFA657&quot;&gt;answer()&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;orthogonalLoop=1;jettySize=auto;strokeColor=#D29922;strokeWidth=3;fontSize=10;fontColor=#FFA657;animation=1;endArrow=block;endFill=1;" edge="1" parent="1" source="fastapi_main" target="ensemble_vqa">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 3. Ensemble -> Router -->
<mxCell id="flow_3" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#D29922;strokeWidth=2;animation=1;endArrow=block;endFill=1;" edge="1" parent="1" source="ensemble_vqa" target="router">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 4a. Router -> Base Model -->
<mxCell id="flow_4a" value="&lt;font color=&quot;#3FB950&quot;&gt;General Q&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#3FB950;strokeWidth=2;animation=1;endArrow=block;endFill=1;fontSize=10;fontColor=#3FB950;" edge="1" parent="1" source="router" target="base_model_box">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 4b. Router -> Spatial Model -->
<mxCell id="flow_4b" value="&lt;font color=&quot;#58A6FF&quot;&gt;Spatial Q&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#1F6FEB;strokeWidth=2;animation=1;endArrow=block;endFill=1;fontSize=10;fontColor=#58A6FF;" edge="1" parent="1" source="router" target="spatial_model_box">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 5. Ensemble -> NS Pipeline (supplement) -->
<mxCell id="flow_5" value="&lt;font color=&quot;#BC8CFF&quot;&gt;NS supplement&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;orthogonalLoop=1;jettySize=auto;strokeColor=#8957E5;strokeWidth=3;fontSize=10;fontColor=#BC8CFF;animation=1;dashed=1;endArrow=block;endFill=1;" edge="1" parent="1" source="ensemble_vqa" target="ns_main">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 6. NS main -> CLIP Routing -->
<mxCell id="flow_6" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#8957E5;strokeWidth=2;animation=1;endArrow=block;endFill=1;" edge="1" parent="1" source="ns_main" target="ns_step1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 7. CLIP Routing -> Decision diamond -->
<mxCell id="flow_7" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#8957E5;strokeWidth=2;animation=1;endArrow=block;endFill=1;" edge="1" parent="1" source="ns_step1" target="route_decision">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 8. Decision -> Object Detection -->
<mxCell id="flow_8" value="&lt;font color=&quot;#BC8CFF&quot;&gt;Knowledge Q&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#8957E5;strokeWidth=2;animation=1;dashed=1;endArrow=block;endFill=1;fontSize=10;fontColor=#BC8CFF;" edge="1" parent="1" source="route_decision" target="ns_step2">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 9. Object Detection -> Wikidata box -->
<mxCell id="flow_9" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#2EA8A8;strokeWidth=2;animation=1;endArrow=block;endFill=1;" edge="1" parent="1" source="ns_step2" target="wikidata_box">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 10. Wikidata box -> Wikidata external API -->
<mxCell id="flow_10" value="&lt;font color=&quot;#2EA8A8&quot;&gt;SPARQL queries&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#2EA8A8;strokeWidth=3;fontSize=10;fontColor=#2EA8A8;animation=1;endArrow=block;endFill=1;" edge="1" parent="1" source="wikidata_box" target="wikidata_ext">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 11. Wikidata facts -> Groq verbalizer -->
<mxCell id="flow_11" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#F85149;strokeWidth=2;animation=1;endArrow=block;endFill=1;" edge="1" parent="1" source="wikidata_box" target="groq_box">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 12. Groq box -> Groq Cloud -->
<mxCell id="flow_12" value="&lt;font color=&quot;#F85149&quot;&gt;API call | Llama-3.3-70B&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#F85149;strokeWidth=3;fontSize=10;fontColor=#F85149;animation=1;endArrow=block;endFill=1;" edge="1" parent="1" source="groq_box" target="groq_cloud">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 13. Groq accessibility -> Groq Cloud -->
<mxCell id="flow_13" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#F85149;strokeWidth=2;animation=1;dashed=1;endArrow=block;endFill=1;" edge="1" parent="1" source="groq_access" target="groq_cloud">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 14. FastAPI -> Groq Accessibility (top arc) -->
<mxCell id="flow_14" value="&lt;font color=&quot;#F85149&quot;&gt;accessibility narration&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#F85149;strokeWidth=2;fontSize=10;fontColor=#F85149;animation=1;dashed=1;endArrow=block;endFill=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="fastapi_main" target="groq_access">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="580" y="140" />
<mxPoint x="1385" y="140" />
</Array>
</mxGeometry>
</mxCell>
<!-- 15. CLIP box -> HuggingFace (model weights) -->
<mxCell id="flow_15" value="&lt;font color=&quot;#E3B341&quot;&gt;model weights (cached)&lt;/font&gt;" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#E3B341;strokeWidth=2;fontSize=10;fontColor=#E3B341;dashed=1;endArrow=block;endFill=1;" edge="1" parent="1" source="clip_box" target="hf_clip">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 16a. Base model -> GPT2 Tokenizer -->
<mxCell id="flow_16a" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#30363D;strokeWidth=1;endArrow=block;endFill=1;" edge="1" parent="1" source="base_model_box" target="gpt2">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 16b. Spatial model -> GPT2 Tokenizer -->
<mxCell id="flow_16b" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#30363D;strokeWidth=1;endArrow=block;endFill=1;" edge="1" parent="1" source="spatial_model_box" target="gpt2">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- 17. Conv Manager <-> Ensemble VQA -->
<mxCell id="flow_17" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=1;strokeColor=#7B2FBE;strokeWidth=2;animation=1;dashed=1;endArrow=block;endFill=1;startArrow=block;startFill=1;" edge="1" parent="1" source="conv_manager" target="ensemble_vqa">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<!-- ===================== PHASE ANNOTATIONS ===================== -->
<mxCell id="ann1" value="(1) User uploads image + question" style="text;html=1;strokeColor=none;fillColor=#0D1117;fontColor=#58A6FF;fontSize=11;fontStyle=1;align=center;" vertex="1" parent="1">
<mxGeometry x="100" y="988" width="250" height="28" as="geometry" />
</mxCell>
<mxCell id="ann2" value="(2) REST API routes to ensemble" style="text;html=1;strokeColor=none;fillColor=#0D1117;fontColor=#3FB950;fontSize=11;fontStyle=1;align=center;" vertex="1" parent="1">
<mxGeometry x="460" y="988" width="240" height="28" as="geometry" />
</mxCell>
<mxCell id="ann3" value="(3) Neural model answers question" style="text;html=1;strokeColor=none;fillColor=#0D1117;fontColor=#FFA657;fontSize=11;fontStyle=1;align=center;" vertex="1" parent="1">
<mxGeometry x="860" y="988" width="250" height="28" as="geometry" />
</mxCell>
<mxCell id="ann4" value="(4) Symbolic + Groq enriches answer" style="text;html=1;strokeColor=none;fillColor=#0D1117;fontColor=#BC8CFF;fontSize=11;fontStyle=1;align=center;" vertex="1" parent="1">
<mxGeometry x="1270" y="988" width="260" height="28" as="geometry" />
</mxCell>
</root>
</mxGraphModel>