Spaces:
Runtime error
Runtime error
File size: 8,379 Bytes
60147c8 8863b2f c42d004 05c509e 8863b2f 05c509e 60147c8 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 8863b2f 05c509e 60147c8 8863b2f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | # ==========================================
# CRITICAL: Monkey patch BEFORE any imports that use transformers
# This fixes: TypeError: argument of type 'NoneType' is not iterable
# ==========================================
import sys
import types
# Patch the module before it can be imported by anything else
# We prefer patching the specific sub-module if it exists, or pretending it exists
try:
# Try to import just the top level to have 'transformers' in sys.modules if needed,
# but we need to intercept the specific video_processing_auto module.
# Actually, the safest bet is to pre-populate sys.modules.
# Create the fake module structure
fake_vpa = types.ModuleType('transformers.models.auto.video_processing_auto')
fake_vpa.extractors = {}
# We might need to ensure the parent modules exist too, or Python might complain
# However, usually just patching the specific leaf module in sys.modules is enough *if* imports are done specific way.
# But let's try to be less invasive first: just Ensure the attribute exists if loaded.
# But wait, the error happens AT import time or usage time inside the library?
# The traceback showed: video_processor_class = video_processor_class_from_name(video_processor_class)
# File ".../video_processing_auto.py", line 96, in video_processor_class_from_name
# if class_name in extractors:
# So 'extractors' is None inside the module.
# Strategy: We force-load the module, patch it, THEN import everything else.
# But we can't force load if it crashes on import.
# The crash is inside a function 'video_processor_class_from_name', not at module level.
# So we CAN import it, then patch it.
pass
except Exception:
pass
# Lets try the user's specific heavy-handed patch which injects into sys.modules
# This is safer because it guarantees the state before the code runs.
if 'transformers.models.auto.video_processing_auto' not in sys.modules:
# Create a dummy module that will be used instead of the real one (or mixed with it?)
# Wait, if we replace it completely we might break other things.
# The user's code suggests mocking it.
# Let's try the simpler "import then patch" approach first BUT do it very early.
# If the user says "transitive import", we must do it before gradio.
pass
import importlib.util
import sys
# Attempt to find the module spec
try:
# We import the module manually.
# We know the crash happens inside a function call, NOT at import time.
# So we can import it, patch it, and then proceed.
# The issue was 'gradio' importing it before we patched it.
# If we import it HERE, we get the reference, we patch it, and then when gradio imports it, it gets the patched version (from sys.modules).
# But we must import the underlying module directly.
# Note: 'transformers.models.auto.video_processing_auto' might not be directly importable if top level __init__ does stuff.
# Let's trust the user's snippet which does it aggressively.
pass
except:
pass
# Implementing the user's suggested patch exactly as it seems robust
import sys
import types
# 1. Ensure we can import the module (or create a stub if it fails hard)
# Actually, iterating on the user's logic:
if 'transformers.models.auto.video_processing_auto' in sys.modules:
sys.modules['transformers.models.auto.video_processing_auto'].extractors = {}
else:
# Pre-inject to catch the first import
# This is tricky because if we fully replace it, we lose the real functionality.
# But the real functionality is broken (it has None).
# The user's code creates a 'fake_module'. This effectively disables video processing auto mapping?
# That might be fine if we don't use it, but GLM-4v might need it.
# BETTER APPROACH: Import, then patch.
# The key is doing it BEFORE gradio.
try:
# We try to import it. If it crashes ON IMPORT, we are stuck.
# But the traceback says the crash is in `from_pretrained` -> `video_processor_class_from_name`.
# So import is safe.
import transformers.models.auto.video_processing_auto as vpa
if not hasattr(vpa, "extractors") or vpa.extractors is None:
vpa.extractors = {}
except ImportError:
# If we can't import it, maybe it doesn't exist yet?
pass
import gradio as gr
import torch
from PIL import Image
import os
from transformers import AutoProcessor, Glm4vForConditionalGeneration
# Configuration
MODEL_PATH = "zai-org/GLM-4.6V-Flash"
# Load Model
print(f"Loading model: {MODEL_PATH}...")
try:
processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
model = Glm4vForConditionalGeneration.from_pretrained(
MODEL_PATH,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True,
device_map="auto"
)
print("Model loaded successfully.")
except Exception as e:
print(f"Error loading model: {e}")
# If it failed, print the extractors state for debugging logs
try:
import transformers.models.auto.video_processing_auto as vpa
print(f"DEBUG: vpa.extractors is {getattr(vpa, 'extractors', 'MISSING')}")
except:
pass
raise
def predict(image, text, history_state):
if not text and not image:
return "Please upload an image or enter text.", history_state
# Initialize history if None (first run)
if history_state is None:
history_state = []
messages = []
# Build conversation history
for turn in history_state:
if isinstance(turn, dict) and "user" in turn and "assistant" in turn:
messages.append({"role": "user", "content": turn["user"]})
messages.append({"role": "assistant", "content": turn["assistant"]})
# Current turn
content = []
if image is not None:
content.append({"type": "image", "image": image})
if text:
content.append({"type": "text", "text": text})
messages.append({"role": "user", "content": content})
try:
# Prepare inputs
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt"
).to(model.device)
# Remove token_type_ids if present (sometimes causes issues)
if "token_type_ids" in inputs:
inputs.pop("token_type_ids")
# Generate
with torch.no_grad():
generated_ids = model.generate(
**inputs,
max_new_tokens=1024,
do_sample=True,
temperature=0.7
)
# Decode
output_text = processor.decode(
generated_ids[0][inputs["input_ids"].shape[1]:],
skip_special_tokens=True
)
# Update history
history_state.append({
"user": content,
"assistant": [{"type": "text", "text": output_text}]
})
return output_text, history_state
except Exception as e:
return f"Error during generation: {str(e)}", history_state
# Create Gradio Interface
with gr.Blocks() as demo:
gr.Markdown(f"# {MODEL_PATH}")
gr.Markdown("Multimodal chat with conversation history support.")
# Proper state initialization
state = gr.State([])
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Upload Image (Optional)")
text_input = gr.Textbox(label="Message", placeholder="Enter text here...")
submit_btn = gr.Button("Submit", variant="primary")
clear_btn = gr.Button("Clear Conversation")
with gr.Column():
output = gr.Markdown(label="Response")
submit_btn.click(
fn=predict,
inputs=[image_input, text_input, state],
outputs=[output, state]
)
clear_btn.click(lambda: ([], "Conversation cleared."), outputs=[state, output])
gr.Markdown("""
### API Usage
Supports multimodal inputs (text + image).
**Note**: Includes pre-import monkey patch for transformers `video_processing_auto.extractors` bug.
""")
if __name__ == "__main__":
demo.launch()
|