File size: 4,606 Bytes
9f91f15
99b3c08
 
35244e7
0f52e8d
a32c355
 
 
35244e7
9f91f15
35244e7
99b3c08
9f91f15
dcf3cfa
 
f367cad
9f91f15
3540b16
99b3c08
a32c355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f91f15
99b3c08
 
 
 
a32c355
99b3c08
dcf3cfa
776fa07
 
 
dcf3cfa
 
776fa07
dcf3cfa
99b3c08
 
776fa07
1305d94
a32c355
99b3c08
a32c355
99b3c08
9f91f15
99b3c08
 
 
 
 
 
 
 
eb2d776
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# import dependencies
import gradio as gr
from openai import OpenAI
import os
import re
import random
import string


# define the openai key
api_key = os.getenv("OPENAI_API_KEY")

# make an instance of the openai client
client = OpenAI(api_key = api_key)


# finetuned model instance
finetuned_model = "ft:gpt-3.5-turbo-0125:cedarbyte-business-solutions::9f4vd1FP"


def random_capitalize(word):
    if word.isalpha() and random.random() < 0.1:
        return word.capitalize()
    return word

def random_remove_punctuation(text):
    if random.random() < 0.2:
        text = list(text)
        indices = [i for i, c in enumerate(text) if c in string.punctuation]
        if indices:
            remove_indices = random.sample(indices, min(3, len(indices)))
            for idx in sorted(remove_indices, reverse=True):
                text.pop(idx)
        return ''.join(text)
    return text

def random_double_period(text):
    if random.random() < 0.2:
        text = text.replace('.', '..', 3)
    return text

def random_double_space(text):
    if random.random() < 0.2:
        words = text.split()
        for _ in range(min(3, len(words) - 1)):
            idx = random.randint(0, len(words) - 2)
            words[idx] += '  '
        return ' '.join(words)
    return text

def random_replace_comma_space(text, period_replace_percentage=0.33):


  # Count occurrences
  comma_occurrences = text.count(", ")
  period_occurrences = text.count(". ")

  # Replacements
  replace_count_comma = max(1, comma_occurrences // 3)
  replace_count_period = max(1, period_occurrences // 3)

  # Find indices
  comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
  period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]

  # Sample indices
  replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
  replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))

  # Apply replacements
  for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
    if text.startswith(", ", idx):
      text = text[:idx] + " ," + text[idx + 2:]
    if text.startswith(". ", idx):
      text = text[:idx] + " ." + text[idx + 2:]

  return text

def transform_paragraph(paragraph):
    words = paragraph.split()
    if len(words) > 12:
        words = [random_capitalize(word) for word in words]

        transformed_paragraph = ' '.join(words)
        transformed_paragraph = random_remove_punctuation(transformed_paragraph)
        transformed_paragraph = random_double_period(transformed_paragraph)
        transformed_paragraph = random_double_space(transformed_paragraph)
        transformed_paragraph = random_replace_comma_space(transformed_paragraph)
    else:
        transformed_paragraph = paragraph

    transformed_paragraph = transformed_paragraph.replace("#", "*")
    transformed_paragraph = transformed_paragraph.replace("*", "")
    # transformed_paragraph = transformed_paragraph.replace(", ", " ,")

    return transformed_paragraph

def transform_text(text):
    paragraphs = text.split('\n')
    transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
    return '\n'.join(transformed_paragraphs)


# function to humanize the text
def humanize_text(AI_text):
  """Humanizes the provided AI text using the fine-tuned model."""
  response = completion = client.chat.completions.create(
  model=finetuned_model,
  temperature = 0.9,
  messages=[
    {"role": "system", "content": """
    You are a text humanizer.
    You humanize AI generated text.
    The text must appear like humanly written.
    THE INPUT AND THE OUTPUT TEXT SHOULD HAVE THE SAME FORMAT.
    THE HEADINGS AND THE BULLETS IN THE INPUT SHOULD REMAIN IN PLACE"""},
    {"role": "user", "content": f"THE LANGUAGE OF THE INPUT AND THE OUTPUT MUST BE SAME. THE SENTENCES SHOULD NOT BE SHORT LENGTH - THEY SHOULD BE SAME AS IN THE INPUT. ALSO THE PARAGRAPHS SHOULD NOT BE SHORT EITHER - PARAGRAPHS MUST HAVE THE SAME LENGTH"},
    {"role": "user", "content": f"Humanize the text. Keep the output format i.e. the bullets and the headings as it is and dont use the list of words that are not permissible. \nTEXT: {AI_text}"}
  ]
  )

  humanized_text = response.choices[0].message.content.strip()
  transformed_text = transform_text(humanized_text)

  return transformed_text


# Gradio interface definition
interface = gr.Interface(
  fn=humanize_text,
  inputs="textbox",
  outputs="textbox",
)

# Launch the Gradio app
interface.launch(debug = True, share=True)