File size: 6,048 Bytes
c375f14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1607a94
c375f14
 
 
 
 
 
 
1607a94
c375f14
 
1607a94
c375f14
 
 
 
4afc6ee
 
1607a94
 
 
 
 
 
 
c375f14
1607a94
4afc6ee
1607a94
 
95690ea
 
 
 
 
 
c375f14
 
95690ea
c375f14
 
 
 
 
 
1607a94
c375f14
 
1607a94
c375f14
 
 
 
 
 
 
 
 
1607a94
51a4856
 
c375f14
1607a94
51a4856
95690ea
c375f14
 
 
1607a94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281e535
1607a94
 
 
 
c375f14
 
 
 
 
 
 
 
 
 
 
 
76ded8f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
from divscore import DivScore
import torch
import os

# Set environment variables for Hugging Face
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Initialize the DivScore detector with loading state
def load_model():
    try:
        detector = DivScore(
            generalLM_name_or_path="mistral-community/Mistral-7B-v0.2",
            enhancedLM_name_or_path="RichardChenZH/DivScore_combined",
            device="cuda:0" if torch.cuda.is_available() else "cpu",
            use_bfloat16=True  # Use bfloat16 for better memory efficiency
        )
        return detector
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        return None

# Global variable for the detector
detector = None

def detect_ai_text(text):
    """
    Detect if the input text is AI-generated using DivScore.
    Returns a tuple of (score, is_ai_generated)
    """
    global detector
    
    # Initialize detector if not already done
    if detector is None:
        detector = load_model()
        if detector is None:
            return "Error: Failed to load the model. Please try again later.", False
    
    if not text.strip():
        return "Please enter some text to analyze.", False
    
    try:
        score, entropy_score, ce_score = detector.compute_score(text)
        
        # Based on the paper's findings, we use 0.19 as the threshold
        is_ai_generated = score < 0.19
        
        # Format the result with more detailed information
        result = f"""
### Analysis Results
- **DivScore**: {score:.4f}
- **Entropy Score**: {entropy_score:.4f}
- **CE Score**: {ce_score:.4f}

### Interpretation
- **Threshold**: 0.19 (scores below this are likely AI-generated)
- **Current Score**: {score:.4f}
"""
        if is_ai_generated:
            detection_result = "## 🔴 AI-Generated Text Detected\nThis text is likely to have been generated by an AI model."
        else:
            detection_result = "## 🟢 Human-Written Text Detected\nThis text appears to be written by a human."
            
        return result, detection_result
        
    except Exception as e:
        return f"Error occurred: {str(e)}", "Error in analysis"

# Create the Gradio interface with loading state
with gr.Blocks(title="DivScore AI Text Detector") as demo:
    gr.Markdown("""
    # DivScore AI Text Detector
    
    This demo uses the DivScore model to detect if specialized domain text (i.e. medical and legal text) was generated by an AI model.
    Enter your text below to analyze it.
    
    **Note:** The demo may take quite a few moments to run as it runs on Huggingface free CPUs. Running on local GPUs provides much faster speed.
    """)
    
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(
                label="Input Text",
                placeholder="Enter text to analyze...",
                lines=5
            )
            submit_btn = gr.Button("Analyze Text", variant="primary")
    
    with gr.Row():
        with gr.Column():
            result_output = gr.Markdown(label="Analysis Results")
        with gr.Column():
            ai_generated = gr.Markdown(label="AI Generation Detection")
    
    gr.Examples(
        examples=[
            ["""Adjustment for reporting heterogeneity can be necessary in sleep disorders research to ensure accurate and reliable results.
Reporting heterogeneity refers to variations in how information is documented or reported, which can stem from differences
in assessment tools, study design, participant demographics, cultural differences, and subjective interpretations of symptoms.
In sleep disorders, symptoms and their severity can be perceived differently by individuals due to personal, cultural, or
situational factors. Without adjusting for these differences, there can be inconsistencies in data interpretation, potentially
leading to skewed or misleading findings. Therefore, employing methods to adjust for reporting heterogeneity can enhance
the validity and comparability of research outcomes across different studies or populations."""],
            ["""In Pennsylvania, as in many other jurisdictions, parking violations, including those for street sweeper offenses, are generally
considered civil infractions rather than criminal offenses. This means that the violation is typically against the vehicle and
not directly against the individual who parked it. As such, the burden of proof is generally on the city to show that the
vehicle was parked in violation of the law, rather than proving who specifically parked the car. The registered letter you
received offering options to plead not guilty or guilty and pay a bond amount is a common procedural step in handling such
civil infractions. While parking tickets are civil matters, failure to address them can lead to additional consequences, such
as fines, vehicle registration holds, or even a bench warrant for failing to respond to the citation, which can create legal
complications. Regarding jurisdiction, if the owner of the vehicle resides out of state, such as in Texas, Pennsylvania cannot
extradite the individual solely for an unpaid parking ticket. Extradition typically applies to criminal offenses. However,
unresolved parking tickets can lead to complications like increased fines or affecting the owner's ability to register their
vehicle, depending on interstate compacts or agreements. If you believe that the ticket was issued in error or you have
compelling reasons to challenge it, you may wish to contest the ticket through the legal process outlined in the letter. It can
also be helpful to seek legal advice to understand the specific options and implications based on the circumstances of the
case."""]
        ],
        inputs=text_input
    )
    
    submit_btn.click(
        fn=detect_ai_text,
        inputs=text_input,
        outputs=[result_output, ai_generated]
    )

if __name__ == "__main__":
    demo.queue()  # Enable queuing for better handling of multiple requests
    demo.launch()