File size: 7,259 Bytes
84669bc
 
 
 
71bcf84
 
936bfca
c22ec41
 
 
 
 
 
 
 
 
 
 
84669bc
 
936bfca
84669bc
 
936bfca
84669bc
ce95412
35244e7
10dc1f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29fb5d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbd048e
 
84669bc
66c390d
10dc1f6
66c390d
 
 
 
 
 
10dc1f6
 
66c390d
 
 
 
 
6d59000
66c390d
10dc1f6
 
 
6d59000
 
66c390d
6d59000
10dc1f6
 
 
f367cad
ffa6b11
 
 
6d59000
84669bc
 
10dc1f6
84669bc
 
 
f0fc36b
84669bc
776fa07
10dc1f6
84669bc
 
99b3c08
936bfca
 
 
99b3c08
9f91f15
84669bc
99b3c08
84669bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# import dependencies
import gradio as gr
from openai import OpenAI
import os
import random
import string

import nltk
from nltk.corpus import wordnet, stopwords
import random
import string

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('stopwords')

# define the openai key
api_key = os.getenv("OPENAI_API_KEY")

# make an instance of the openai client
client = OpenAI(api_key = api_key)

# finetuned model instance
finetuned_model = "gpt-3.5-turbo"







# text processing functions
def random_capitalize(word):
    if word.isalpha() and random.random() < 0.1:
        return word.capitalize()
    return word

def random_remove_punctuation(text):
    if random.random() < 0.2:
        text = list(text)
        indices = [i for i, c in enumerate(text) if c in string.punctuation]
        if indices:
            remove_indices = random.sample(indices, min(3, len(indices)))
            for idx in sorted(remove_indices, reverse=True):
                text.pop(idx)
        return ''.join(text)
    return text

def random_double_period(text):
    if random.random() < 0.2:
        text = text.replace('.', '..', 3)
    return text

def random_double_space(text):
    if random.random() < 0.2:
        words = text.split()
        for _ in range(min(3, len(words) - 1)):
            idx = random.randint(0, len(words) - 2)
            words[idx] += '  '
        return ' '.join(words)
    return text

def random_replace_comma_space(text, period_replace_percentage=0.33):

  # Count occurrences
  comma_occurrences = text.count(", ")
  period_occurrences = text.count(". ")

  # Replacements
  replace_count_comma = max(1, comma_occurrences // 3)
  replace_count_period = max(1, period_occurrences // 3)

  # Find indices
  comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
  period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]

  # Sample indices
  replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
  replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))

  # Apply replacements
  for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
    if text.startswith(", ", idx):
      text = text[:idx] + " ," + text[idx + 2:]
    if text.startswith(". ", idx):
      text = text[:idx] + " ." + text[idx + 2:]

  return text

def transform_paragraph(paragraph):
    words = paragraph.split()
    if len(words) > 12:
        words = [random_capitalize(word) for word in words]

        transformed_paragraph = ' '.join(words)
        transformed_paragraph = random_remove_punctuation(transformed_paragraph)
        transformed_paragraph = random_double_period(transformed_paragraph)
        transformed_paragraph = random_double_space(transformed_paragraph)
        transformed_paragraph = random_replace_comma_space(transformed_paragraph)
    else:
        transformed_paragraph = paragraph

    transformed_paragraph = transformed_paragraph.replace("#", "*")
    transformed_paragraph = transformed_paragraph.replace("*", "")
    # transformed_paragraph = transformed_paragraph.replace(", ", " ,")

    return transformed_paragraph

def transform_text(text):
    paragraphs = text.split('\n')
    transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
    return '\n'.join(transformed_paragraphs)

import nltk
from nltk.corpus import wordnet, stopwords

# Download necessary NLTK data (only needed once)
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('stopwords')

def get_synonyms(word):
    """Retrieve simple synonyms for a given word."""
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonym = lemma.name().replace('_', ' ')
            if synonym.isalpha() and len(synonym.split()) == 1 and len(synonym) <= 10:  # Filter out complex synonyms
                synonyms.add(synonym)
    return synonyms

def paraphrase_text(text, replace_ratio=0.6):
    """Paraphrase the input text by replacing words with synonyms."""
    words = text.split()
    stop_words = set(stopwords.words("english"))
    paraphrased_words = []

    for word in words:
        if random.random() < replace_ratio and word.lower() not in stop_words:  # Replace 60% of words
            synonyms = get_synonyms(word)
            if synonyms:
                paraphrased_words.append(random.choice(list(synonyms)))  # Pick a random synonym
            else:
                paraphrased_words.append(word)  # Keep original if no synonyms found
        else:
            paraphrased_words.append(word)  # Keep original for stopwords

    # Introduce small "human-like" errors
    text = " ".join(paraphrased_words)
    text = text.replace("  ", " ")  # Double spaces
    if random.random() < 0.1:
        text = text.replace(".", "..", 1)  # Double periods
    return text


import re

def humanize_text(AI_text):
    """Humanizes AI-generated text using GPT + Paraphrasing."""
    response = client.chat.completions.create(
        model=finetuned_model,  # This remains the same (gpt-3.5-turbo)
        temperature=1.1,  # Increased for more variation
        max_tokens=500,
        top_p=0.95,
        frequency_penalty=0.3,
        presence_penalty=0.5,
        messages=[
            {"role": "system", "content": """
            You are an advanced AI text rewriter that makes AI-generated text sound fully human-written.
            - Use natural synonyms, contractions, and varied sentence structures.
            - Restructure sentences to be complex and nuanced.
            - Avoid robotic phrasing or overly formal structures.
            - Ensure the text feels like it was written by a real person.
            """},
            {"role": "user", "content": f"Rewrite this text to make it more human:\n\n{AI_text}"}
        ]
    )

    gpt_output = response.choices[0].message.content.strip()
    
    # Apply additional paraphrasing to GPT output
    humanized_text = paraphrase_text(gpt_output)

    return humanized_text


# Define the main function to process text
def main_function(AI_text):
    return humanize_text(AI_text)  # Calls the GPT + Paraphrasing function

# Gradio interface definition
interface = gr.Interface(
  fn=main_function,
  inputs="textbox",
  outputs="textbox",
  title="AI Text Humanizer",
  description="Enter AI-generated text and get a human-written version. This space is availabe for limited time only so contact farhan.sid1111@gmail.com to put this application in production.",
)


# Launch the Gradio app
interface.launch(debug = True)






# import gradio as gr

# # Function to handle text submission
# def contact_info(text):
#     return "Contact farhan.sid1111@gmail.com for Humanizer Application service"

# # Gradio interface definition
# interface = gr.Interface(
#     fn=contact_info,
#     inputs="textbox",
#     outputs="text",
#     title="AI TEXT HUMANIZER",
#     description="Enter AI text and get its humanizer equivalent"
# )

# # Launch the Gradio app
# if __name__ == "__main__":
#     interface.launch()