Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| # ---------------------------- | |
| # Load TEXT detector (upgradeable) | |
| # ---------------------------- | |
| TEXT_MODEL_ID = "wangkevin02/AI_Detect_Model" # swap if you try another model | |
| text_pipe = pipeline("text-classification", model=TEXT_MODEL_ID) | |
| def _canonical(label: str) -> str | None: | |
| """Map raw label names to 'AI' or 'HUMAN' when possible.""" | |
| if not label: | |
| return None | |
| l = label.strip().lower() | |
| # Common explicit names | |
| if any(k in l for k in ["ai", "machine", "generated", "fake", "synthetic", "gpt"]): | |
| return "AI" | |
| if any(k in l for k in ["human", "real", "authentic", "organic"]): | |
| return "HUMAN" | |
| # Try LABEL_X -> use id2label if present | |
| if l.startswith("label_"): | |
| try: | |
| idx = int(l.split("_")[-1]) | |
| except ValueError: | |
| return None | |
| id2label = getattr(text_pipe.model.config, "id2label", None) | |
| if isinstance(id2label, dict) and idx in id2label: | |
| return _canonical(str(id2label[idx])) | |
| # Sometimes labels are just "0"/"1" | |
| if l in {"0", "1"}: | |
| id2label = getattr(text_pipe.model.config, "id2label", None) | |
| if isinstance(id2label, dict) and l.isdigit(): | |
| mapped = id2label.get(int(l)) | |
| if mapped: | |
| return _canonical(str(mapped)) | |
| return None | |
| def _aggregate_probs(raw_results): | |
| """ | |
| Convert pipeline outputs into {'AI': p, 'HUMAN': p, 'raw': {...}} robustly. | |
| Ensures both keys exist and sum <= 1.0 (may be < 1 if labels don't map). | |
| """ | |
| # text-classification with top_k=None returns a list of dicts | |
| # e.g. [{'label': 'AI', 'score': 0.82}, {'label': 'HUMAN', 'score': 0.18}] | |
| if isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], dict): | |
| label_scores = {d["label"]: float(d["score"]) for d in raw_results} | |
| elif isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], list): | |
| # return_all_scores=True style: [[{label, score}, {label, score}, ...]] | |
| label_scores = {d["label"]: float(d["score"]) for d in raw_results[0]} | |
| else: | |
| label_scores = {} | |
| ai_p = 0.0 | |
| human_p = 0.0 | |
| for lbl, sc in label_scores.items(): | |
| canon = _canonical(lbl) | |
| if canon == "AI": | |
| ai_p += sc | |
| elif canon == "HUMAN": | |
| human_p += sc | |
| # If nothing mapped, fall back to top label heuristic | |
| if ai_p == 0.0 and human_p == 0.0 and label_scores: | |
| top_lbl = max(label_scores, key=label_scores.get) | |
| top_sc = label_scores[top_lbl] | |
| canon = _canonical(top_lbl) | |
| if canon == "AI": | |
| ai_p = top_sc | |
| human_p = 1.0 - top_sc | |
| elif canon == "HUMAN": | |
| human_p = top_sc | |
| ai_p = 1.0 - top_sc | |
| return {"AI": round(ai_p, 6), "HUMAN": round(human_p, 6), "raw": label_scores} | |
| def _verdict(ai_p: float, human_p: float, n_words: int) -> str: | |
| conf = max(ai_p, human_p) | |
| if n_words < 120: | |
| band = "LOW (short text)" | |
| elif conf < 0.60: | |
| band = "LOW (uncertain)" | |
| elif conf < 0.80: | |
| band = "MEDIUM" | |
| else: | |
| band = "HIGH" | |
| if ai_p > human_p: | |
| return f"π€ Likely AI β Confidence: {band}" | |
| elif human_p > ai_p: | |
| return f"π Likely Human β Confidence: {band}" | |
| else: | |
| return "β Uncertain β Confidence: LOW" | |
| def detect_text(input_text: str): | |
| text = (input_text or "").strip() | |
| if not text: | |
| return {}, "β Please enter some text." | |
| try: | |
| # Get ALL label scores so we can map correctly | |
| results = text_pipe(text, top_k=None) | |
| agg = _aggregate_probs(results) | |
| ai_p, human_p = float(agg["AI"]), float(agg["HUMAN"]) | |
| # Normalize to show nicely, but keep raw too | |
| probs_out = { | |
| "AI-generated": round(ai_p, 4), | |
| "Human-written": round(human_p, 4), | |
| } | |
| # Optional: include raw labels so you can debug mappings in UI | |
| # probs_out.update({f"raw::{k}": round(v, 4) for k, v in agg["raw"].items()}) | |
| verdict = _verdict(ai_p, human_p, n_words=len(text.split())) | |
| return probs_out, verdict | |
| except Exception as e: | |
| return {}, f"β Error: {str(e)}" | |
| # ---------------------------- | |
| # (Optional) IMAGE detector β won't crash if model unavailable | |
| # ---------------------------- | |
| try: | |
| from PIL import Image | |
| image_pipe = pipeline("image-classification", model="umm-maybe/ai-vs-human-images") | |
| except Exception: | |
| image_pipe = None | |
| def detect_image(img): | |
| if image_pipe is None: | |
| return {}, "β οΈ Image detector not available on this Space." | |
| try: | |
| results = image_pipe(img) | |
| label_scores = {d["label"]: float(d["score"]) for d in results} | |
| best = max(label_scores, key=label_scores.get) | |
| if any(k in best.lower() for k in ["ai", "fake", "generated", "synthetic"]): | |
| return label_scores, "π€ This image looks AI-generated" | |
| else: | |
| return label_scores, "π· This image looks Human/Real" | |
| except Exception as e: | |
| return {}, f"β Error: {str(e)}" | |
| # ---------------------------- | |
| # UI | |
| # ---------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π AI Content Detector\nDetect whether **text** (and optionally images) are AI-generated or human-made.") | |
| with gr.Tab("π Text"): | |
| txt = gr.Textbox(label="Enter text", lines=10, placeholder="Paste text hereβ¦") | |
| out_probs = gr.Label(label="Probabilities") | |
| out_verdict = gr.Textbox(label="Verdict", interactive=False) | |
| btn = gr.Button("Analyze", variant="primary") | |
| btn.click(detect_text, inputs=txt, outputs=[out_probs, out_verdict]) | |
| with gr.Tab("π· Image"): | |
| img_in = gr.Image(type="pil", label="Upload an image") | |
| img_probs = gr.Label(label="Probabilities") | |
| img_verdict = gr.Textbox(label="Verdict", interactive=False) | |
| btn2 = gr.Button("Analyze Image") | |
| btn2.click(detect_image, inputs=img_in, outputs=[img_probs, img_verdict]) | |
| if __name__ == "__main__": | |
| demo.launch() | |