humanize

Runtime error

File size: 4,606 Bytes

# import dependencies
import gradio as gr
from openai import OpenAI
import os
import re
import random
import string


# define the openai key
api_key = os.getenv("OPENAI_API_KEY")

# make an instance of the openai client
client = OpenAI(api_key = api_key)


# finetuned model instance
finetuned_model = "ft:gpt-3.5-turbo-0125:cedarbyte-business-solutions::9f4vd1FP"


def random_capitalize(word):
    if word.isalpha() and random.random() < 0.1:
        return word.capitalize()
    return word

def random_remove_punctuation(text):
    if random.random() < 0.2:
        text = list(text)
        indices = [i for i, c in enumerate(text) if c in string.punctuation]
        if indices:
            remove_indices = random.sample(indices, min(3, len(indices)))
            for idx in sorted(remove_indices, reverse=True):
                text.pop(idx)
        return ''.join(text)
    return text

def random_double_period(text):
    if random.random() < 0.2:
        text = text.replace('.', '..', 3)
    return text

def random_double_space(text):
    if random.random() < 0.2:
        words = text.split()
        for _ in range(min(3, len(words) - 1)):
            idx = random.randint(0, len(words) - 2)
            words[idx] += '  '
        return ' '.join(words)
    return text

def random_replace_comma_space(text, period_replace_percentage=0.33):


  # Count occurrences
  comma_occurrences = text.count(", ")
  period_occurrences = text.count(". ")

  # Replacements
  replace_count_comma = max(1, comma_occurrences // 3)
  replace_count_period = max(1, period_occurrences // 3)

  # Find indices
  comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
  period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]

  # Sample indices
  replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
  replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))

  # Apply replacements
  for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
    if text.startswith(", ", idx):
      text = text[:idx] + " ," + text[idx + 2:]
    if text.startswith(". ", idx):
      text = text[:idx] + " ." + text[idx + 2:]

  return text

def transform_paragraph(paragraph):
    words = paragraph.split()
    if len(words) > 12:
        words = [random_capitalize(word) for word in words]

        transformed_paragraph = ' '.join(words)
        transformed_paragraph = random_remove_punctuation(transformed_paragraph)
        transformed_paragraph = random_double_period(transformed_paragraph)
        transformed_paragraph = random_double_space(transformed_paragraph)
        transformed_paragraph = random_replace_comma_space(transformed_paragraph)
    else:
        transformed_paragraph = paragraph

    transformed_paragraph = transformed_paragraph.replace("#", "*")
    transformed_paragraph = transformed_paragraph.replace("*", "")
    # transformed_paragraph = transformed_paragraph.replace(", ", " ,")

    return transformed_paragraph

def transform_text(text):
    paragraphs = text.split('\n')
    transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
    return '\n'.join(transformed_paragraphs)


# function to humanize the text
def humanize_text(AI_text):
  """Humanizes the provided AI text using the fine-tuned model."""
  response = completion = client.chat.completions.create(
  model=finetuned_model,
  temperature = 0.9,
  messages=[
    {"role": "system", "content": """
    You are a text humanizer.
    You humanize AI generated text.
    The text must appear like humanly written.
    THE INPUT AND THE OUTPUT TEXT SHOULD HAVE THE SAME FORMAT.
    THE HEADINGS AND THE BULLETS IN THE INPUT SHOULD REMAIN IN PLACE"""},
    {"role": "user", "content": f"THE LANGUAGE OF THE INPUT AND THE OUTPUT MUST BE SAME. THE SENTENCES SHOULD NOT BE SHORT LENGTH - THEY SHOULD BE SAME AS IN THE INPUT. ALSO THE PARAGRAPHS SHOULD NOT BE SHORT EITHER - PARAGRAPHS MUST HAVE THE SAME LENGTH"},
    {"role": "user", "content": f"Humanize the text. Keep the output format i.e. the bullets and the headings as it is and dont use the list of words that are not permissible. \nTEXT: {AI_text}"}
  ]
  )

  humanized_text = response.choices[0].message.content.strip()
  transformed_text = transform_text(humanized_text)

  return transformed_text


# Gradio interface definition
interface = gr.Interface(
  fn=humanize_text,
  inputs="textbox",
  outputs="textbox",
)

# Launch the Gradio app
interface.launch(debug = True, share=True)