Spaces:

omar0scarf
/

glm_api

Runtime error

App Files Files Community

glm_api / app.py

Example88

Fix: Robust monkey patch before Gradio import

60147c8 about 2 months ago

raw

history blame contribute delete

8.38 kB

	# ==========================================
	# CRITICAL: Monkey patch BEFORE any imports that use transformers
	# This fixes: TypeError: argument of type 'NoneType' is not iterable
	# ==========================================
	import sys
	import types

	# Patch the module before it can be imported by anything else
	# We prefer patching the specific sub-module if it exists, or pretending it exists
	try:
	# Try to import just the top level to have 'transformers' in sys.modules if needed,
	# but we need to intercept the specific video_processing_auto module.
	# Actually, the safest bet is to pre-populate sys.modules.

	# Create the fake module structure
	fake_vpa = types.ModuleType('transformers.models.auto.video_processing_auto')
	fake_vpa.extractors = {}

	# We might need to ensure the parent modules exist too, or Python might complain
	# However, usually just patching the specific leaf module in sys.modules is enough if imports are done specific way.
	# But let's try to be less invasive first: just Ensure the attribute exists if loaded.
	# But wait, the error happens AT import time or usage time inside the library?
	# The traceback showed: video_processor_class = video_processor_class_from_name(video_processor_class)
	# File ".../video_processing_auto.py", line 96, in video_processor_class_from_name
	# if class_name in extractors:
	# So 'extractors' is None inside the module.

	# Strategy: We force-load the module, patch it, THEN import everything else.
	# But we can't force load if it crashes on import.
	# The crash is inside a function 'video_processor_class_from_name', not at module level.
	# So we CAN import it, then patch it.
	pass
	except Exception:
	pass

	# Lets try the user's specific heavy-handed patch which injects into sys.modules
	# This is safer because it guarantees the state before the code runs.

	if 'transformers.models.auto.video_processing_auto' not in sys.modules:
	# Create a dummy module that will be used instead of the real one (or mixed with it?)
	# Wait, if we replace it completely we might break other things.
	# The user's code suggests mocking it.
	# Let's try the simpler "import then patch" approach first BUT do it very early.
	# If the user says "transitive import", we must do it before gradio.
	pass

	import importlib.util
	import sys

	# Attempt to find the module spec
	try:
	# We import the module manually.
	# We know the crash happens inside a function call, NOT at import time.
	# So we can import it, patch it, and then proceed.
	# The issue was 'gradio' importing it before we patched it.
	# If we import it HERE, we get the reference, we patch it, and then when gradio imports it, it gets the patched version (from sys.modules).

	# But we must import the underlying module directly.
	# Note: 'transformers.models.auto.video_processing_auto' might not be directly importable if top level __init__ does stuff.
	# Let's trust the user's snippet which does it aggressively.
	pass
	except:
	pass

	# Implementing the user's suggested patch exactly as it seems robust
	import sys
	import types

	# 1. Ensure we can import the module (or create a stub if it fails hard)
	# Actually, iterating on the user's logic:
	if 'transformers.models.auto.video_processing_auto' in sys.modules:
	sys.modules['transformers.models.auto.video_processing_auto'].extractors = {}
	else:
	# Pre-inject to catch the first import
	# This is tricky because if we fully replace it, we lose the real functionality.
	# But the real functionality is broken (it has None).
	# The user's code creates a 'fake_module'. This effectively disables video processing auto mapping?
	# That might be fine if we don't use it, but GLM-4v might need it.

	# BETTER APPROACH: Import, then patch.
	# The key is doing it BEFORE gradio.
	try:
	# We try to import it. If it crashes ON IMPORT, we are stuck.
	# But the traceback says the crash is in `from_pretrained` -> `video_processor_class_from_name`.
	# So import is safe.
	import transformers.models.auto.video_processing_auto as vpa
	if not hasattr(vpa, "extractors") or vpa.extractors is None:
	vpa.extractors = {}
	except ImportError:
	# If we can't import it, maybe it doesn't exist yet?
	pass

	import gradio as gr
	import torch
	from PIL import Image
	import os

	from transformers import AutoProcessor, Glm4vForConditionalGeneration

	# Configuration
	MODEL_PATH = "zai-org/GLM-4.6V-Flash"

	# Load Model
	print(f"Loading model: {MODEL_PATH}...")
	try:
	processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
	model = Glm4vForConditionalGeneration.from_pretrained(
	MODEL_PATH,
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True,
	trust_remote_code=True,
	device_map="auto"
	)
	print("Model loaded successfully.")
	except Exception as e:
	print(f"Error loading model: {e}")
	# If it failed, print the extractors state for debugging logs
	try:
	import transformers.models.auto.video_processing_auto as vpa
	print(f"DEBUG: vpa.extractors is {getattr(vpa, 'extractors', 'MISSING')}")
	except:
	pass
	raise

	def predict(image, text, history_state):
	if not text and not image:
	return "Please upload an image or enter text.", history_state

	# Initialize history if None (first run)
	if history_state is None:
	history_state = []

	messages = []

	# Build conversation history
	for turn in history_state:
	if isinstance(turn, dict) and "user" in turn and "assistant" in turn:
	messages.append({"role": "user", "content": turn["user"]})
	messages.append({"role": "assistant", "content": turn["assistant"]})

	# Current turn
	content = []
	if image is not None:
	content.append({"type": "image", "image": image})
	if text:
	content.append({"type": "text", "text": text})

	messages.append({"role": "user", "content": content})

	try:
	# Prepare inputs
	inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt"
	).to(model.device)

	# Remove token_type_ids if present (sometimes causes issues)
	if "token_type_ids" in inputs:
	inputs.pop("token_type_ids")

	# Generate
	with torch.no_grad():
	generated_ids = model.generate(
	**inputs,
	max_new_tokens=1024,
	do_sample=True,
	temperature=0.7
	)

	# Decode
	output_text = processor.decode(
	generated_ids[0][inputs["input_ids"].shape[1]:],
	skip_special_tokens=True
	)

	# Update history
	history_state.append({
	"user": content,
	"assistant": [{"type": "text", "text": output_text}]
	})

	return output_text, history_state

	except Exception as e:
	return f"Error during generation: {str(e)}", history_state

	# Create Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown(f"# {MODEL_PATH}")
	gr.Markdown("Multimodal chat with conversation history support.")

	# Proper state initialization
	state = gr.State([])

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload Image (Optional)")
	text_input = gr.Textbox(label="Message", placeholder="Enter text here...")
	submit_btn = gr.Button("Submit", variant="primary")
	clear_btn = gr.Button("Clear Conversation")
	with gr.Column():
	output = gr.Markdown(label="Response")

	submit_btn.click(
	fn=predict,
	inputs=[image_input, text_input, state],
	outputs=[output, state]
	)

	clear_btn.click(lambda: ([], "Conversation cleared."), outputs=[state, output])

	gr.Markdown("""
	### API Usage
	Supports multimodal inputs (text + image).

	Note: Includes pre-import monkey patch for transformers `video_processing_auto.extractors` bug.
	""")

	if __name__ == "__main__":
	demo.launch()