Spaces:

lablab-ai-amd-developer-hackathon
/

ForgeSight

Sleeping

App Files Files Community

ForgeSight / backend /amd_hackathon /track3_vision.py

rasAli02

git add, commit, push

307f1c5 20 days ago

raw

history blame contribute delete

4.42 kB

	import torch
	from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
	from PIL import Image

	def analyze_construction_site(image_path: str, device: str = "cuda") -> str:
	"""
	Uses Qwen2-VL (Track 3) to process a construction site image (e.g., from a drone)
	and output a structured technical description. This description acts as the 'Context'
	for the fine-tuned Track 2 Compliance Auditor model.
	"""
	# Initialize the model and processor
	# We use a placeholder path for the Qwen2-VL model here.
	model_id = "Qwen/Qwen2-VL-7B-Instruct"

	print(f"Loading {model_id} on {device}...")
	try:
	model = Qwen2VLForConditionalGeneration.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map=device
	)
	processor = AutoProcessor.from_pretrained(model_id)
	except Exception as e:
	print(f"Model loading failed (this is expected if weights aren't downloaded): {e}")
	# Return a mocked structured output for demonstration purposes in the hackathon
	return _mocked_vision_output()

	# Load the image
	try:
	image = Image.open(image_path).convert("RGB")
	except Exception as e:
	raise ValueError(f"Could not load image at {image_path}: {e}")

	# Prepare the prompt tailored for technical extraction
	prompt = (
	"You are an expert construction site inspector. Describe the structural elements, "
	"materials, and construction practices visible in this image. Focus on technical "
	"details like concrete pouring, rebar placement, structural steel connections, "
	"and any visible environmental exposure factors. Be highly descriptive and objective."
	)

	# Qwen2-VL format
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image", "image": image},
	{"type": "text", "text": prompt},
	],
	}
	]

	# Preprocess inputs
	text = processor.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	image_inputs, video_inputs = processor.image_processor(image), None
	inputs = processor(
	text=[text],
	images=image_inputs,
	videos=video_inputs,
	padding=True,
	return_tensors="pt",
	)
	inputs = inputs.to(device)

	# Generate output
	print("Analyzing image...")
	with torch.no_grad():
	generated_ids = model.generate(**inputs, max_new_tokens=256)

	generated_ids_trimmed = [
	out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]

	output_text = processor.batch_decode(
	generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
	)[0]

	return _format_for_track2(output_text)

	def _mocked_vision_output() -> str:
	"""Provides a mocked output when running without the heavy VLM weights."""
	mocked_description = (
	"A bridge pier is constructed using concrete. Reinforcement bars are visible with approximately "
	"50mm of concrete cover. The pier is located directly in a tidal splash zone (marine environment). "
	"Concrete surface appears to have minor honeycombing at the base."
	)
	return _format_for_track2(mocked_description)

	def _format_for_track2(vision_text: str) -> str:
	"""
	Structures the vision output so it can be seamlessly passed
	as input 'Context' to the fine-tuned 35B model.
	"""
	structured_context = (
	"### VISUAL INSPECTION REPORT (TRACK 3)\n"
	f"{vision_text}\n\n"
	"### TASK\n"
	"Based on the visual inspection report above, identify any violations of structural codes "
	"(e.g., Eurocodes, ASTM, ISO 9001). Provide a label of 'Compliant' or 'Non-Compliant' "
	"followed by a detailed reasoning trace."
	)
	return structured_context

	if __name__ == "__main__":
	# Test the pipeline
	test_image = "dummy_construction_site.jpg"
	print(f"Testing Multimodal Pipeline with {test_image}")
	try:
	context_for_track2 = analyze_construction_site(test_image)
	print("\n--- Structured Output for Track 2 Model ---\n")
	print(context_for_track2)
	print("\n-------------------------------------------\n")
	except Exception as e:
	print(f"Error: {e}")