youngtsai commited on
Commit
8749812
·
1 Parent(s): babfde6

def validate_article(generated_article, lesson_words, base_chars, original_word_count):

Browse files
Files changed (1) hide show
  1. app.py +49 -10
app.py CHANGED
@@ -32,12 +32,31 @@ def extract_article_from_content(article_text):
32
  return article_text
33
 
34
  def validate_article(generated_article, lesson_words, base_chars, original_word_count):
35
- clean_article = "".join(char for char in generated_article if char not in ",。!?;:「」『』()《》【】'\n'")
36
  not_every_new_word_is_used = not all(word in clean_article for word in lesson_words.split())
37
  word_out_of_range = not set(clean_article).issubset(set(lesson_words + base_chars))
38
  new_word_count = len(clean_article)
39
  word_count_error = not (0.9 * original_word_count <= new_word_count <= 1.1 * original_word_count)
40
- return not_every_new_word_is_used, word_out_of_range, word_count_error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def generate_new_article(lesson_words, original_article, original_word_count, base_chars, model_name):
43
  attempt = 0
@@ -81,9 +100,11 @@ def generate_new_article(lesson_words, original_article, original_word_count, ba
81
  generated_text = response.choices[0].message.content.strip()
82
  generated_article = extract_article_from_content(generated_text)
83
 
84
- not_every_new_word_is_used, word_out_of_range, word_count_error = validate_article(
85
- generated_article, lesson_words, base_chars, original_word_count)
86
-
 
 
87
  print("====validate_article====")
88
  print(f"not_every_new_word_is_used: {not_every_new_word_is_used}")
89
  print(f"word_out_of_range: {word_out_of_range}")
@@ -93,8 +114,20 @@ def generate_new_article(lesson_words, original_article, original_word_count, ba
93
  if not not_every_new_word_is_used and not word_out_of_range and not word_count_error:
94
  print("Generated article is valid")
95
  break
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
- return generated_article
98
 
99
  def load_csv(file):
100
  try:
@@ -112,7 +145,6 @@ def load_csv(file):
112
  return "", "", 0, ""
113
 
114
  with gr.Blocks() as demo:
115
- gr.Markdown("import CSV file to load data.")
116
  csv_file_input = gr.File(label="Upload CSV file (Columns: lesson_words, original_article, original_word_count, base_chars)")
117
  load_button = gr.Button("Load from CSV")
118
 
@@ -129,33 +161,40 @@ with gr.Blocks() as demo:
129
  generate_button1 = gr.Button("Generate Article - gpt-4-0125-preview")
130
  with gr.Column():
131
  output_text1 = gr.Textbox(label="Generated Article - gpt-4-0125-preview")
 
 
132
  with gr.Row():
133
  with gr.Column():
134
  model_2 = gr.Dropdown(label="Model 2", choices=model_list, value="gpt-3.5-turbo")
135
  generate_button2 = gr.Button("Generate Article - gpt-3.5-turbo")
136
  with gr.Column():
137
  output_text2 = gr.Textbox(label="Generated Article - gpt-3.5-turbo")
 
 
138
  with gr.Row():
139
  with gr.Column():
140
  model_3 = gr.Dropdown(label="Model 3", choices=model_list, value="gpt-4")
141
  generate_button3 = gr.Button("Generate Article - gpt-4")
142
  with gr.Column():
143
  output_text3 = gr.Textbox(label="Generated Article - gpt-4")
 
 
 
144
 
145
  generate_button1.click(
146
  generate_new_article,
147
  inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_1],
148
- outputs=output_text1
149
  )
150
  generate_button2.click(
151
  generate_new_article,
152
  inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_2],
153
- outputs=output_text2
154
  )
155
  generate_button3.click(
156
  generate_new_article,
157
  inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_3],
158
- outputs=output_text3
159
  )
160
 
161
  # 為其他模型添加點擊事件
 
32
  return article_text
33
 
34
  def validate_article(generated_article, lesson_words, base_chars, original_word_count):
35
+ clean_article = "".join(char for char in generated_article if char not in "、,。!?;:「」『』()《》【】'\n'")
36
  not_every_new_word_is_used = not all(word in clean_article for word in lesson_words.split())
37
  word_out_of_range = not set(clean_article).issubset(set(lesson_words + base_chars))
38
  new_word_count = len(clean_article)
39
  word_count_error = not (0.9 * original_word_count <= new_word_count <= 1.1 * original_word_count)
40
+
41
+ lesson_words_not_in_new_article = [word for word in [char for char in lesson_words] if word not in clean_article]
42
+ words_not_in_both = [word for word in lesson_words_not_in_new_article if word not in base_chars]
43
+ additional_words = set([word for word in [char for char in clean_article] if word not in [char for char in lesson_words] and word not in [char for char in base_chars]])
44
+ count_of_words_in_new_article = len(clean_article)
45
+
46
+ result = {
47
+ "not_every_new_word_is_used": not_every_new_word_is_used,
48
+ "word_out_of_range": word_out_of_range,
49
+ "word_count_error": word_count_error,
50
+
51
+ "lesson_words_not_in_new_article": lesson_words_not_in_new_article,
52
+ "words_not_in_both": words_not_in_both,
53
+ "additional_words": additional_words,
54
+ "count_of_words_in_new_article": count_of_words_in_new_article
55
+ }
56
+
57
+
58
+
59
+ return result
60
 
61
  def generate_new_article(lesson_words, original_article, original_word_count, base_chars, model_name):
62
  attempt = 0
 
100
  generated_text = response.choices[0].message.content.strip()
101
  generated_article = extract_article_from_content(generated_text)
102
 
103
+ validate_article_result = validate_article(generated_article, lesson_words, base_chars, original_word_count)
104
+ not_every_new_word_is_used = validate_article_result['not_every_new_word_is_used']
105
+ word_out_of_range = validate_article_result['word_out_of_range']
106
+ word_count_error = validate_article_result['word_count_error']
107
+
108
  print("====validate_article====")
109
  print(f"not_every_new_word_is_used: {not_every_new_word_is_used}")
110
  print(f"word_out_of_range: {word_out_of_range}")
 
114
  if not not_every_new_word_is_used and not word_out_of_range and not word_count_error:
115
  print("Generated article is valid")
116
  break
117
+ else:
118
+ print("Generated article is invalid")
119
+ error_messages = []
120
+ if not_every_new_word_is_used:
121
+ error_messages.append("Not every new word is used in the article.")
122
+ if word_out_of_range:
123
+ error_messages.append("The article contains words that are not in the new words or word library.")
124
+ if word_count_error:
125
+ error_messages.append(f"The word count of the new article deviates more than 10% from the original ({original_word_count}).")
126
+
127
+ prompt += "\n".join(error_messages) + "\n" # Append the error messages to the prompt for the next attempt
128
+
129
 
130
+ return generated_article, validate_article_result
131
 
132
  def load_csv(file):
133
  try:
 
145
  return "", "", 0, ""
146
 
147
  with gr.Blocks() as demo:
 
148
  csv_file_input = gr.File(label="Upload CSV file (Columns: lesson_words, original_article, original_word_count, base_chars)")
149
  load_button = gr.Button("Load from CSV")
150
 
 
161
  generate_button1 = gr.Button("Generate Article - gpt-4-0125-preview")
162
  with gr.Column():
163
  output_text1 = gr.Textbox(label="Generated Article - gpt-4-0125-preview")
164
+ with gr.Column():
165
+ validate_article_result_1 = gr.JSON()
166
  with gr.Row():
167
  with gr.Column():
168
  model_2 = gr.Dropdown(label="Model 2", choices=model_list, value="gpt-3.5-turbo")
169
  generate_button2 = gr.Button("Generate Article - gpt-3.5-turbo")
170
  with gr.Column():
171
  output_text2 = gr.Textbox(label="Generated Article - gpt-3.5-turbo")
172
+ with gr.Column():
173
+ validate_article_result_2 = gr.JSON()
174
  with gr.Row():
175
  with gr.Column():
176
  model_3 = gr.Dropdown(label="Model 3", choices=model_list, value="gpt-4")
177
  generate_button3 = gr.Button("Generate Article - gpt-4")
178
  with gr.Column():
179
  output_text3 = gr.Textbox(label="Generated Article - gpt-4")
180
+ with gr.Column():
181
+ # validate_article_result_3 Json format
182
+ validate_article_result_3 = gr.JSON()
183
 
184
  generate_button1.click(
185
  generate_new_article,
186
  inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_1],
187
+ outputs=[output_text1, validate_article_result_1]
188
  )
189
  generate_button2.click(
190
  generate_new_article,
191
  inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_2],
192
+ outputs=[output_text2, validate_article_result_2]
193
  )
194
  generate_button3.click(
195
  generate_new_article,
196
  inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_3],
197
+ outputs=[output_text3, validate_article_result_3]
198
  )
199
 
200
  # 為其他模型添加點擊事件