Spaces:

DarnClanker
/

TrueFrame

Runtime error

App Files Files Community

NeelakshSaxena commited on Apr 5

Commit

b3b4bd5

1 Parent(s): b9d0270

Replace tiny LLaVA with stable CPU fallback mode

Browse files

Files changed (1) hide show

app.py +79 -99

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import gradio as gr
 import torch
 from dotenv import load_dotenv
 from PIL import Image, ImageOps
-from transformers import AutoProcessor, LlavaForConditionalGeneration
 ROOT_DIR = Path(__file__).resolve().parent
 SCRIPTS_DIR = ROOT_DIR / "scripts"
@@ -25,25 +25,23 @@ BASE_MODEL_ID = "llava-hf/llava-1.5-7b-hf"
 ADAPTER_PATH = ROOT_DIR / "final-production-weights" / "best_model"
 ADAPTER_REPO_ID = os.getenv("ADAPTER_REPO_ID", "Werrewulf/TMOS-DD")
 ADAPTER_SUBFOLDER = os.getenv("ADAPTER_SUBFOLDER", "")
-MODEL_MODE = os.getenv("MODEL_MODE", "tiny").strip().lower()
-TINY_BASE_MODEL_ID = os.getenv("TINY_BASE_MODEL_ID", "bczhou/tiny-llava-v1-hf")
 TMOS_PROMPT = "USER: <image>\nIs this video real or produced by AI?\nASSISTANT:"
-TINY_PROMPT = "Answer with one word only: Real or Fake."
 TARGET_IMAGE_SIZE = 336
 THRESHOLD = 0.5
 model = None
 processor = None
 inference_device = None
-inference_mode = MODEL_MODE
 def resolve_inference_device(model_obj) -> torch.device:
     if torch.cuda.is_available():
         return torch.device("cuda")
-    # With device_map='auto', some parameters can live on 'meta' while offloaded.
-    # For CPU Spaces, inputs must stay on CPU.
     device_map = getattr(model_obj, "hf_device_map", None)
     if isinstance(device_map, dict):
         for mapped in device_map.values():
@@ -117,36 +115,16 @@ def load_remote_adapter_config(repo_id: str, subfolder: str) -> dict | None:
 def select_torch_dtype() -> torch.dtype:
     if torch.cuda.is_available():
         return torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
-    # float16 on CPU is numerically unstable for this model and can produce NaNs.
     return torch.float32
-def load_model_and_processor():
     global model, processor, inference_device
-    if model is not None and processor is not None and inference_device is not None:
-        return model, processor, inference_device
-    if MODEL_MODE == "tiny":
-        dtype = select_torch_dtype()
-        print(f"Loading low-memory tiny model from {TINY_BASE_MODEL_ID} with dtype={dtype}...")
-        model = LlavaForConditionalGeneration.from_pretrained(
-            TINY_BASE_MODEL_ID,
-            torch_dtype=dtype,
-            low_cpu_mem_usage=True,
-            device_map="auto",
-            token=HF_TOKEN,
         )
-        model.eval()
-        processor = AutoProcessor.from_pretrained(TINY_BASE_MODEL_ID, token=HF_TOKEN)
-        processor.patch_size = 14
-        processor.vision_feature_select_strategy = "default"
-        inference_device = resolve_inference_device(model)
-        print(f"TMOS-DD tiny fallback ready on {inference_device}.")
-        return model, processor, inference_device
     from peft import PeftModel
     from tmos_classifier import TMOSClassifier
@@ -225,7 +203,6 @@ def load_model_and_processor():
                             "Classifier weights did not change after loading adapter; adapter likely incompatible."
                         )
-                # Merge LoRA into the base network so inference always uses adapted weights.
                 model = loaded_model.merge_and_unload()
                 selected_subfolder = subfolder
                 print(
@@ -250,16 +227,43 @@ def load_model_and_processor():
         print(f"Loaded TMOS local adapter (lora_layers={lora_layer_count})")
     model.eval()
     processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, token=HF_TOKEN)
     processor.patch_size = 14
     processor.vision_feature_select_strategy = "default"
     inference_device = resolve_inference_device(model)
     if adapter_source == ADAPTER_REPO_ID:
         print(f"TMOS-DD ready on {inference_device} using remote subfolder '{selected_subfolder or '.'}'.")
     else:
         print(f"TMOS-DD ready on {inference_device} using local production adapter.")
     return model, processor, inference_device
@@ -293,28 +297,33 @@ def confidence_card(prob_fake: float, label: str) -> str:
     """
-def score_binary_logits(logits: torch.Tensor, tokenizer) -> tuple[float, str]:
-    fake_ids = tokenizer(" Fake", add_special_tokens=False).input_ids or tokenizer("Fake", add_special_tokens=False).input_ids
-    real_ids = tokenizer(" Real", add_special_tokens=False).input_ids or tokenizer("Real", add_special_tokens=False).input_ids
-    if not fake_ids or not real_ids:
-        return 0.5, "Real"
-    fake_id = fake_ids[0]
-    real_id = real_ids[0]
-    candidate_logits = torch.tensor([logits[real_id].item(), logits[fake_id].item()], dtype=torch.float32)
-    probs = torch.softmax(candidate_logits, dim=0)
-    prob_fake = float(probs[1].item())
-    label = "Fake" if prob_fake >= THRESHOLD else "Real"
-    return prob_fake, label
-def build_prompt(processor_obj, user_text: str) -> str:
-    if MODEL_MODE == "tiny":
-        # Tiny LLaVA forward expects an explicit image placeholder token.
-        return f"<image>\n{user_text}"
-    return user_text
 def infer_image(image: Image.Image):
@@ -323,27 +332,7 @@ def infer_image(image: Image.Image):
             return None, "Error: please upload an image.", None, None, None, "<div style='color:#f87171;'>Please upload an image before running detection.</div>"
         model_obj, processor_obj, device = load_model_and_processor()
         prepared_image = preprocess_image(image)
-        prompt_text = build_prompt(processor_obj, TINY_PROMPT if MODEL_MODE == "tiny" else TMOS_PROMPT)
-        inputs = processor_obj(text=prompt_text, images=prepared_image, return_tensors="pt", padding=True)
-        inputs = {name: tensor.to(device) for name, tensor in inputs.items()}
-        if MODEL_MODE == "tiny":
-            image_token_id = getattr(model_obj.config, "image_token_index", None)
-            if image_token_id is not None and "input_ids" in inputs:
-                image_token_count = int((inputs["input_ids"] == image_token_id).sum().item())
-                if image_token_count == 0:
-                    # Defensive recovery if tokenizer template/path strips the image placeholder.
-                    fallback_prompt = f"<image>\n{TINY_PROMPT}"
-                    inputs = processor_obj(text=fallback_prompt, images=prepared_image, return_tensors="pt", padding=True)
-                    inputs = {name: tensor.to(device) for name, tensor in inputs.items()}
-        # Keep pixel dtype stable across CPU/GPU to avoid backend kernel errors.
-        if "pixel_values" in inputs:
-            inputs["pixel_values"] = inputs["pixel_values"].to(
-                dtype=select_torch_dtype() if device.type == "cuda" else torch.float32
-            )
         autocast_context = (
             torch.autocast(device_type="cuda", dtype=select_torch_dtype())
@@ -353,44 +342,35 @@ def infer_image(image: Image.Image):
         start_time = time.perf_counter()
         with torch.inference_mode(), autocast_context:
-            if MODEL_MODE == "tiny":
-                outputs = model_obj(
-                    input_ids=inputs["input_ids"],
-                    pixel_values=inputs["pixel_values"],
-                    attention_mask=inputs.get("attention_mask"),
-                    return_dict=True,
-                )
-            else:
                 outputs = model_obj(
                     input_ids=inputs["input_ids"],
                     pixel_values=inputs["pixel_values"],
                     attention_mask=inputs["attention_mask"],
                 )
         if device.type == "cuda":
             torch.cuda.synchronize()
         elapsed_ms = (time.perf_counter() - start_time) * 1000.0
-        if MODEL_MODE == "tiny":
-            next_token_logits = outputs.logits[:, -1, :].squeeze(0).detach().float().cpu()
-            prob_fake, label = score_binary_logits(next_token_logits, processor_obj.tokenizer)
-            logit = float(torch.logit(torch.tensor(prob_fake, dtype=torch.float32), eps=1e-6).item())
-        else:
-            logit = float(outputs["logit"].squeeze().detach().float().cpu().item())
-            if not math.isfinite(logit):
-                raise gr.Error("Model produced a non-finite logit (NaN/Inf). Please retry.")
-            prob_fake = float(torch.sigmoid(torch.tensor(logit)).item())
-            if not math.isfinite(prob_fake):
-                raise gr.Error("Model produced a non-finite probability (NaN/Inf). Please retry.")
-            label = "Fake" if prob_fake >= THRESHOLD else "Real"
         if not math.isfinite(prob_fake):
             raise gr.Error("Model produced a non-finite probability (NaN/Inf). Please retry.")
-        confidence = prob_fake if label == "Fake" else 1.0 - prob_fake
         return prepared_image, label, round(prob_fake, 6), round(confidence * 100.0, 2), round(elapsed_ms, 2), confidence_card(prob_fake, label)
     except Exception as exc:
         err = f"Inference failed: {type(exc).__name__}: {exc}"
@@ -402,9 +382,9 @@ load_model_and_processor()
 with gr.Blocks(title="TMOS Deepfake Detector", theme=gr.themes.Soft()) as demo:
     demo_description = (
-        "Low-memory fallback mode using tiny LLaVA for stable Space execution.\n\n"
-        if MODEL_MODE == "tiny"
-        else "Research demo for image-based deepfake detection with a deterministic classification head on top of LLaVA-1.5-7B.\n\n"
     )
     gr.Markdown(
         "# TMOS Deepfake Detector\n"

 import torch
 from dotenv import load_dotenv
 from PIL import Image, ImageOps
+from transformers import AutoProcessor, AutoImageProcessor, AutoModelForImageClassification
 ROOT_DIR = Path(__file__).resolve().parent
 SCRIPTS_DIR = ROOT_DIR / "scripts"
 ADAPTER_PATH = ROOT_DIR / "final-production-weights" / "best_model"
 ADAPTER_REPO_ID = os.getenv("ADAPTER_REPO_ID", "Werrewulf/TMOS-DD")
 ADAPTER_SUBFOLDER = os.getenv("ADAPTER_SUBFOLDER", "")
+MODEL_MODE = os.getenv("MODEL_MODE", "cpu-fallback").strip().lower()
+CPU_FALLBACK_MODEL_ID = os.getenv("CPU_FALLBACK_MODEL_ID", "DaMsTaR/Detecto-DeepFake_Image_Detector")
 TMOS_PROMPT = "USER: <image>\nIs this video real or produced by AI?\nASSISTANT:"
 TARGET_IMAGE_SIZE = 336
 THRESHOLD = 0.5
 model = None
 processor = None
 inference_device = None
 def resolve_inference_device(model_obj) -> torch.device:
     if torch.cuda.is_available():
         return torch.device("cuda")
     device_map = getattr(model_obj, "hf_device_map", None)
     if isinstance(device_map, dict):
         for mapped in device_map.values():
 def select_torch_dtype() -> torch.dtype:
     if torch.cuda.is_available():
         return torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
     return torch.float32
+def load_tmos_model():
     global model, processor, inference_device
+    if not torch.cuda.is_available():
+        raise RuntimeError(
+            "TMOS mode requires GPU hardware. Set MODEL_MODE=cpu-fallback for free-tier CPU execution."
         )
     from peft import PeftModel
     from tmos_classifier import TMOSClassifier
                             "Classifier weights did not change after loading adapter; adapter likely incompatible."
                         )
                 model = loaded_model.merge_and_unload()
                 selected_subfolder = subfolder
                 print(
         print(f"Loaded TMOS local adapter (lora_layers={lora_layer_count})")
     model.eval()
     processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, token=HF_TOKEN)
     processor.patch_size = 14
     processor.vision_feature_select_strategy = "default"
     inference_device = resolve_inference_device(model)
     if adapter_source == ADAPTER_REPO_ID:
         print(f"TMOS-DD ready on {inference_device} using remote subfolder '{selected_subfolder or '.'}'.")
     else:
         print(f"TMOS-DD ready on {inference_device} using local production adapter.")
+def load_cpu_fallback_model():
+    global model, processor, inference_device
+    print(f"Loading CPU fallback model from {CPU_FALLBACK_MODEL_ID}...")
+    processor = AutoImageProcessor.from_pretrained(CPU_FALLBACK_MODEL_ID, token=HF_TOKEN)
+    model = AutoModelForImageClassification.from_pretrained(
+        CPU_FALLBACK_MODEL_ID,
+        torch_dtype=torch.float32,
+        low_cpu_mem_usage=True,
+        token=HF_TOKEN,
+    )
+    model.to("cpu").eval()
+    inference_device = torch.device("cpu")
+    print("CPU fallback classifier ready.")
+def load_model_and_processor():
+    global model, processor, inference_device
+    if model is not None and processor is not None and inference_device is not None:
+        return model, processor, inference_device
+    if MODEL_MODE == "tmos":
+        load_tmos_model()
+    else:
+        load_cpu_fallback_model()
     return model, processor, inference_device
     """
+def score_fallback_logits(logits: torch.Tensor, id2label: dict) -> tuple[float, str]:
+    probs = torch.softmax(logits.float(), dim=0)
+    fake_indices = []
+    real_indices = []
+    for idx in range(len(probs)):
+        label = str(id2label.get(idx, "")).lower()
+        if any(key in label for key in ["fake", "deepfake", "ai", "synthetic"]):
+            fake_indices.append(idx)
+        if any(key in label for key in ["real", "authentic", "genuine"]):
+            real_indices.append(idx)
+    if len(probs) == 2 and not fake_indices and not real_indices:
+        fake_indices = [1]
+        real_indices = [0]
+    fake_prob = float(probs[fake_indices].sum().item()) if fake_indices else 0.0
+    real_prob = float(probs[real_indices].sum().item()) if real_indices else 0.0
+    total = fake_prob + real_prob
+    if total > 0:
+        prob_fake = fake_prob / total
+    else:
+        prob_fake = float(probs.max().item()) if len(probs) == 1 else float(probs[1].item()) if len(probs) > 1 else 0.5
+    label = "Fake" if prob_fake >= THRESHOLD else "Real"
+    return prob_fake, label
 def infer_image(image: Image.Image):
             return None, "Error: please upload an image.", None, None, None, "<div style='color:#f87171;'>Please upload an image before running detection.</div>"
         model_obj, processor_obj, device = load_model_and_processor()
         prepared_image = preprocess_image(image)
         autocast_context = (
             torch.autocast(device_type="cuda", dtype=select_torch_dtype())
         start_time = time.perf_counter()
         with torch.inference_mode(), autocast_context:
+            if MODEL_MODE == "tmos":
+                inputs = processor_obj(text=TMOS_PROMPT, images=prepared_image, return_tensors="pt", padding=True)
+                inputs = {name: tensor.to(device) for name, tensor in inputs.items()}
                 outputs = model_obj(
                     input_ids=inputs["input_ids"],
                     pixel_values=inputs["pixel_values"],
                     attention_mask=inputs["attention_mask"],
                 )
+                logit = float(outputs["logit"].squeeze().detach().float().cpu().item())
+                if not math.isfinite(logit):
+                    raise gr.Error("Model produced a non-finite logit (NaN/Inf). Please retry.")
+                prob_fake = float(torch.sigmoid(torch.tensor(logit)).item())
+                label = "Fake" if prob_fake >= THRESHOLD else "Real"
+            else:
+                inputs = processor_obj(images=prepared_image, return_tensors="pt")
+                inputs = {name: tensor.to(device) for name, tensor in inputs.items()}
+                outputs = model_obj(**inputs)
+                logits = outputs.logits.squeeze(0).detach().float().cpu()
+                id2label = getattr(model_obj.config, "id2label", {}) or {}
+                prob_fake, label = score_fallback_logits(logits, id2label)
         if device.type == "cuda":
             torch.cuda.synchronize()
         elapsed_ms = (time.perf_counter() - start_time) * 1000.0
         if not math.isfinite(prob_fake):
             raise gr.Error("Model produced a non-finite probability (NaN/Inf). Please retry.")
+        confidence = prob_fake if label == "Fake" else 1.0 - prob_fake
         return prepared_image, label, round(prob_fake, 6), round(confidence * 100.0, 2), round(elapsed_ms, 2), confidence_card(prob_fake, label)
     except Exception as exc:
         err = f"Inference failed: {type(exc).__name__}: {exc}"
 with gr.Blocks(title="TMOS Deepfake Detector", theme=gr.themes.Soft()) as demo:
     demo_description = (
+        "TMOS mode (GPU required) enabled.\n\n"
+        if MODEL_MODE == "tmos"
+        else f"CPU fallback mode using {CPU_FALLBACK_MODEL_ID}.\n\n"
     )
     gr.Markdown(
         "# TMOS Deepfake Detector\n"