Spaces:

Deva8
/

vqa-backend

Running

App Files Files Community

vqa-backend / draft_generator.py

Deva8

Deploy VQA Space with model downloader

bb8f662 4 days ago

raw

history blame contribute delete

3.77 kB

	import subprocess
	import os

	mermaid_code = """
	graph TD
	%% Styling
	classDef default fill:#1A1A1A,stroke:#444,stroke-width:2px,color:#FFF,rx:8px,ry:8px,font-family:arial;
	classDef mobile fill:#003366,stroke:#0055AA,stroke-width:2px,color:#FFF;
	classDef preproc fill:#333333,stroke:#555,stroke-width:2px,color:#FFF;
	classDef model fill:#4B0082,stroke:#8A2BE2,stroke-width:2px,color:#FFF;
	classDef condition fill:#2B2B2B,stroke:#F4A460,stroke-width:2px,color:#FFF,shape:rhombus;
	classDef external fill:#004d00,stroke:#009900,stroke-width:2px,color:#FFF;
	classDef final fill:#660000,stroke:#CC0000,stroke-width:2px,color:#FFF;

	%% Nodes
	UserApp[📱 Mobile App]:::mobile

	ImgUpload[🖼️ Image]:::preproc
	Question[⌨️ Question Text]:::preproc

	PIL[🐍 PIL Preprocessing<br/>RGB conversion]:::preproc

	CLIP[👁️ OpenAI CLIP ViT-B/32<br/>Image Features 512-dim]:::model
	GPT2[🤗 DistilGPT-2<br/>Tokenized Question]:::model

	Route1{Question<br/>spatial?}:::condition

	Spatial[📐 Spatial VQA Model<br/>8-head attention]:::model
	Base[🧠 Base VQA Model<br/>General VQA]:::model

	Decoder[🤗 GPT-2 Decoder<br/>vocab decode]:::model
	NeuralAns[💬 Neural Answer]:::final

	Route2{Knowledge<br/>question?}:::condition

	ObjDet[👁️ CLIP Object Detector<br/>Top-3 objects]:::model
	Wikidata[🌍 Wikidata SPARQL<br/>P31, P186, P366]:::external
	GroqV[⚡ Groq Llama-3.3<br/>Verbalizer]:::external
	KGAns[🧩 KG Enhancement]:::final

	FastAPI[🚀 FastAPI]:::preproc
	GroqA[⚡ Groq Llama-3.3<br/>Accessibility]:::external
	Audio[🔊 2-sentence description]:::final

	%% Edges
	UserApp -- "Image uploaded" --> ImgUpload
	UserApp -- "Question typed" --> Question

	ImgUpload --> PIL
	PIL --> CLIP
	Question --> GPT2

	CLIP & GPT2 --> Route1

	Route1 -- "YES" --> Spatial
	Route1 -- "NO" --> Base

	Spatial & Base -- "Beam search (width=5)" --> Decoder
	Decoder --> NeuralAns

	CLIP -- "Anchor similarity" --> Route2

	Route2 -- "YES" --> ObjDet
	ObjDet -- "Detected objects" --> Wikidata
	Wikidata -- "Structured facts" --> GroqV
	GroqV --> KGAns

	FastAPI -- "Narration request" --> GroqA
	GroqA --> Audio

	NeuralAns & KGAns & Audio -- "JSON output" --> FastAPI
	FastAPI --> UserApp
	"""

	file_path = r"C:\Users\rdeva\Downloads\vqa_coes\architecture_draft.mmd"

	with open(file_path, "w", encoding="utf-8") as f:
	f.write(mermaid_code)

	print(f"Mermaid file saved to {file_path}")

	# Note: In a real environment, we would use mermaid-cli (mmdc) to convert this to SVG/PNG.
	# Since it might not be installed globally, we will just provide the mermaid file and
	# instructions, or generate an HTML wrapper that renders it in browser.

	html_path = r"C:\Users\rdeva\Downloads\vqa_coes\architecture_draft.html"
	html_content = f"""
	<!DOCTYPE html>
	<html>
	<head>
	<title>VQA Architecture Draft</title>
	<script type="module">
	import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.esm.min.mjs';
	mermaid.initialize({{ startOnLoad: true, theme: 'dark', flowchart: {{ curve: 'basis' }} }});
	</script>
	<style>
	body {{ background-color: #0D1117; color: white; font-family: sans-serif; display: flex; justify-content: center; padding: 20px; }}
	.mermaid {{ background-color: #161B22; padding: 20px; border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.5); }}
	</style>
	</head>
	<body>
	<div class="mermaid">
	{mermaid_code}
	</div>
	</body>
	</html>
	"""

	with open(html_path, "w", encoding="utf-8") as f:
	f.write(html_content)

	print(f"HTML viewer saved to {html_path}")