Humanizer / app.py
debbaghmehdi1's picture
Update app.py
66c390d verified
# import dependencies
import gradio as gr
from openai import OpenAI
import os
import random
import string
import nltk
from nltk.corpus import wordnet, stopwords
import random
import string
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('stopwords')
# define the openai key
api_key = os.getenv("OPENAI_API_KEY")
# make an instance of the openai client
client = OpenAI(api_key = api_key)
# finetuned model instance
finetuned_model = "gpt-3.5-turbo"
# text processing functions
def random_capitalize(word):
if word.isalpha() and random.random() < 0.1:
return word.capitalize()
return word
def random_remove_punctuation(text):
if random.random() < 0.2:
text = list(text)
indices = [i for i, c in enumerate(text) if c in string.punctuation]
if indices:
remove_indices = random.sample(indices, min(3, len(indices)))
for idx in sorted(remove_indices, reverse=True):
text.pop(idx)
return ''.join(text)
return text
def random_double_period(text):
if random.random() < 0.2:
text = text.replace('.', '..', 3)
return text
def random_double_space(text):
if random.random() < 0.2:
words = text.split()
for _ in range(min(3, len(words) - 1)):
idx = random.randint(0, len(words) - 2)
words[idx] += ' '
return ' '.join(words)
return text
def random_replace_comma_space(text, period_replace_percentage=0.33):
# Count occurrences
comma_occurrences = text.count(", ")
period_occurrences = text.count(". ")
# Replacements
replace_count_comma = max(1, comma_occurrences // 3)
replace_count_period = max(1, period_occurrences // 3)
# Find indices
comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]
# Sample indices
replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))
# Apply replacements
for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
if text.startswith(", ", idx):
text = text[:idx] + " ," + text[idx + 2:]
if text.startswith(". ", idx):
text = text[:idx] + " ." + text[idx + 2:]
return text
def transform_paragraph(paragraph):
words = paragraph.split()
if len(words) > 12:
words = [random_capitalize(word) for word in words]
transformed_paragraph = ' '.join(words)
transformed_paragraph = random_remove_punctuation(transformed_paragraph)
transformed_paragraph = random_double_period(transformed_paragraph)
transformed_paragraph = random_double_space(transformed_paragraph)
transformed_paragraph = random_replace_comma_space(transformed_paragraph)
else:
transformed_paragraph = paragraph
transformed_paragraph = transformed_paragraph.replace("#", "*")
transformed_paragraph = transformed_paragraph.replace("*", "")
# transformed_paragraph = transformed_paragraph.replace(", ", " ,")
return transformed_paragraph
def transform_text(text):
paragraphs = text.split('\n')
transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
return '\n'.join(transformed_paragraphs)
import nltk
from nltk.corpus import wordnet, stopwords
# Download necessary NLTK data (only needed once)
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('stopwords')
def get_synonyms(word):
"""Retrieve simple synonyms for a given word."""
synonyms = set()
for syn in wordnet.synsets(word):
for lemma in syn.lemmas():
synonym = lemma.name().replace('_', ' ')
if synonym.isalpha() and len(synonym.split()) == 1 and len(synonym) <= 10: # Filter out complex synonyms
synonyms.add(synonym)
return synonyms
def paraphrase_text(text, replace_ratio=0.6):
"""Paraphrase the input text by replacing words with synonyms."""
words = text.split()
stop_words = set(stopwords.words("english"))
paraphrased_words = []
for word in words:
if random.random() < replace_ratio and word.lower() not in stop_words: # Replace 60% of words
synonyms = get_synonyms(word)
if synonyms:
paraphrased_words.append(random.choice(list(synonyms))) # Pick a random synonym
else:
paraphrased_words.append(word) # Keep original if no synonyms found
else:
paraphrased_words.append(word) # Keep original for stopwords
# Introduce small "human-like" errors
text = " ".join(paraphrased_words)
text = text.replace(" ", " ") # Double spaces
if random.random() < 0.1:
text = text.replace(".", "..", 1) # Double periods
return text
import re
def humanize_text(AI_text):
"""Humanizes AI-generated text using GPT + Paraphrasing."""
response = client.chat.completions.create(
model=finetuned_model, # This remains the same (gpt-3.5-turbo)
temperature=1.1, # Increased for more variation
max_tokens=500,
top_p=0.95,
frequency_penalty=0.3,
presence_penalty=0.5,
messages=[
{"role": "system", "content": """
You are an advanced AI text rewriter that makes AI-generated text sound fully human-written.
- Use natural synonyms, contractions, and varied sentence structures.
- Restructure sentences to be complex and nuanced.
- Avoid robotic phrasing or overly formal structures.
- Ensure the text feels like it was written by a real person.
"""},
{"role": "user", "content": f"Rewrite this text to make it more human:\n\n{AI_text}"}
]
)
gpt_output = response.choices[0].message.content.strip()
# Apply additional paraphrasing to GPT output
humanized_text = paraphrase_text(gpt_output)
return humanized_text
# Define the main function to process text
def main_function(AI_text):
return humanize_text(AI_text) # Calls the GPT + Paraphrasing function
# Gradio interface definition
interface = gr.Interface(
fn=main_function,
inputs="textbox",
outputs="textbox",
title="AI Text Humanizer",
description="Enter AI-generated text and get a human-written version. This space is availabe for limited time only so contact farhan.sid1111@gmail.com to put this application in production.",
)
# Launch the Gradio app
interface.launch(debug = True)
# import gradio as gr
# # Function to handle text submission
# def contact_info(text):
# return "Contact farhan.sid1111@gmail.com for Humanizer Application service"
# # Gradio interface definition
# interface = gr.Interface(
# fn=contact_info,
# inputs="textbox",
# outputs="text",
# title="AI TEXT HUMANIZER",
# description="Enter AI text and get its humanizer equivalent"
# )
# # Launch the Gradio app
# if __name__ == "__main__":
# interface.launch()