File size: 13,107 Bytes
f55a64f
 
 
 
5514040
f55a64f
911fc5f
 
 
 
 
 
f55a64f
3f6a4af
 
96d3772
 
911fc5f
 
96d3772
5514040
911fc5f
 
96d3772
911fc5f
 
 
 
 
 
 
 
 
 
 
 
 
 
5514040
f55a64f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8749812
67eb293
f55a64f
 
 
8749812
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f55a64f
8e3c243
15c45c4
 
 
 
f55a64f
 
 
 
 
 
e22a348
 
 
 
 
911fc5f
 
f55a64f
 
 
fe3dc6c
f55a64f
 
 
 
 
 
 
 
 
 
 
 
 
 
911fc5f
df2624f
911fc5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f55a64f
 
8749812
 
 
 
67eb293
8749812
e22a348
 
 
 
 
f55a64f
 
e22a348
f55a64f
8749812
 
 
 
 
 
 
 
 
 
67eb293
 
 
 
 
 
 
 
 
f55a64f
8749812
f55a64f
0647147
5514040
 
 
 
 
f55a64f
 
c70b978
 
d8a3a63
 
c70b978
 
f55a64f
 
0647147
 
 
 
 
 
 
 
 
 
 
 
d7227a1
0647147
 
 
 
 
 
 
fe3dc6c
8e3c243
 
 
 
 
0e1d843
fe3dc6c
911fc5f
b02eb78
3f6a4af
fe3dc6c
b02eb78
fe3dc6c
8749812
 
fe3dc6c
b02eb78
3f6a4af
fe3dc6c
b02eb78
fe3dc6c
8749812
 
fe3dc6c
fab361d
3f6a4af
fe3dc6c
fab361d
fe3dc6c
8749812
 
 
911fc5f
 
 
 
 
 
 
 
8e3c243
 
 
 
8749812
8e3c243
 
d7227a1
8e3c243
8749812
d7227a1
fe3dc6c
 
 
8749812
fe3dc6c
911fc5f
 
 
 
 
d7227a1
8e3c243
0647147
 
 
babfde6
 
0647147
 
 
 
d7227a1
 
 
f55a64f
 
8e3c243
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import gradio as gr
import os
import pandas as pd
from openai import OpenAI
import json


from google.cloud import aiplatform
from vertexai.preview.generative_models import GenerativeModel
from google.oauth2.service_account import Credentials


# 設置 OpenAI API 客戶端
IS_ENV_LOCAL = os.getenv("IS_ENV_LOCAL", False)
if IS_ENV_LOCAL:
    local_json = json.load(open("local.json"))
    openai_api_key = local_json["OPENAI_API_KEY"]
    GOOGLE_SERVICE_ACCOUNT_INFO = local_json["GBQ_TOKEN"]
    google_service_account_info_dict = GOOGLE_SERVICE_ACCOUNT_INFO
else:
    openai_api_key = os.getenv("OPENAI_API_KEY")
    GOOGLE_SERVICE_ACCOUNT_INFO = os.getenv("GBQ_TOKEN")
    google_service_account_info_dict = json.loads(GOOGLE_SERVICE_ACCOUNT_INFO)

# OPENAI
OPENAI_CLIENT = OpenAI(api_key=openai_api_key)

# GOOGLE
GOOGPE_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
google_creds = Credentials.from_service_account_info(
    google_service_account_info_dict, scopes=GOOGPE_SCOPES
)
aiplatform.init(
    project="junyiacademy",
    service_account=google_service_account_info_dict,
    credentials=google_creds,
)
GEMINI_MODEL = GenerativeModel("gemini-pro")


def extract_article_from_content(article_text):
    start_markers = ["新文章:", "New Article:", "Here it is:"]
    end_marker = "\nThank you"
    for start_marker in start_markers:
        start_index = article_text.find(start_marker)
        if start_index != -1:
            start_index += len(start_marker)
            while article_text[start_index] in "\n":
                start_index += 1
            end_index = article_text.find(end_marker, start_index)
            if end_index != -1:
                return article_text[start_index:end_index].rstrip()
            return article_text[start_index:].rstrip()
    return article_text

def validate_article(generated_article, lesson_words, base_chars, original_word_count):
    clean_article = "".join(char for char in generated_article if char not in "、,。!?;:「」『』()《》【】'\n'")
    not_every_new_word_is_used = not all(word in clean_article for word in [char for char in lesson_words])
    word_out_of_range = not set(clean_article).issubset(set(lesson_words + base_chars))
    new_word_count = len(clean_article)
    word_count_error = not (0.9 * original_word_count <= new_word_count <= 1.1 * original_word_count)

    lesson_words_not_in_new_article = [word for word in [char for char in lesson_words] if word not in clean_article]
    words_not_in_both = [word for word in lesson_words_not_in_new_article if word not in base_chars]
    additional_words = set([word for word in [char for char in clean_article] if word not in [char for char in lesson_words] and word not in [char for char in base_chars]])
    count_of_words_in_new_article = len(clean_article)

    result = {
        "not_every_new_word_is_used": not_every_new_word_is_used,
        "word_out_of_range": word_out_of_range,
        "word_count_error": word_count_error,

        "lesson_words_not_in_new_article": lesson_words_not_in_new_article,
        "words_not_in_both": words_not_in_both,
        "additional_words": additional_words,
        "count_of_words_in_new_article": count_of_words_in_new_article
    }



    return result

def generate_new_article(lesson_words, original_article, original_word_count, base_chars, model_name):
    # check lesson_words, original_article, original_word_count exist
    if not lesson_words or not original_article or not original_word_count:
        raise gr.Error("lesson_words, original_article, original_word_count are required. Please upload the lesson csv file.")

    attempt = 0
    max_attempts = 3
    generated_article = ""

    while attempt < max_attempts:
        attempt += 1

        print("================Attempt=====================")
        print(f"Attempt {attempt} to generate new article")
        print("===========================================")

        system_prompt = "You are a creative writer specialized in Chinese Children book. You will help me write Chinese Articles."

        prompt = f"""
            Please write a new and original Chinese article tailored for first-grade students. Here's a summary of the key points that you should follow:

            Use Traditional Chinese (ZH-TW) Characters: The article should be written in Traditional Chinese(ZH-TW), not Simplified Chinese.

            Adherence to the Original Article: The new creation should closely follow the spirit, style, and rhythmic pattern of the provided original article. The number of words, excluding punctuation marks, should be similar to that of the original, approximately {original_word_count} words.

            Incorporate "New Words": Every word listed under "new words" must be used in the article. These words are: {lesson_words}.

            Utilize the "Word Library": Additional words required for the article can be selected from the provided "word library," which includes: {base_chars}.

            Restriction on Vocabulary: Do not use any words outside the "new words" or the "word library".

            Originality: The new article must be unique and original, not a copy of the original work.

            "Original Article" for Reference: The example provided is {original_article}。This article serves as a model for the spirit, style, and rhythmic pattern to be emulated.
        """
        

        if model_name in ["gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"]:
            response = OPENAI_CLIENT.chat.completions.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=1000
            )
            generated_text = response.choices[0].message.content.strip()
        elif model_name == "gemini-pro":
            model_response = GEMINI_MODEL.generate_content(
                f"{system_prompt}, {prompt}"
            )
            generated_text = model_response.candidates[0].content.parts[0].text

        generated_article = extract_article_from_content(generated_text)

        validate_article_result = validate_article(generated_article, lesson_words, base_chars, original_word_count)
        not_every_new_word_is_used = validate_article_result['not_every_new_word_is_used']
        word_out_of_range = validate_article_result['word_out_of_range']
        word_count_error = validate_article_result['word_count_error']
        count_of_words_in_new_article = validate_article_result['count_of_words_in_new_article']

        print("====validate_article====")
        print(f"not_every_new_word_is_used: {not_every_new_word_is_used}")
        print(f"word_out_of_range: {word_out_of_range}")
        print(f"word_count_error: {word_count_error}")
        print("=========================")

        if not not_every_new_word_is_used and not word_out_of_range and not word_count_error:
            print("Generated article is valid")
            break
        else:
            print("Generated article is invalid")
            error_messages = []
            if not_every_new_word_is_used:
                error_messages.append("Not every new word is used in the article.")
            if word_out_of_range:
                error_messages.append("The article contains words that are not in the new words or word library.")
            if word_count_error:
                error_messages.append(f"The word count of the new article deviates more than 10% from the original ({original_word_count}).")
            
            error_messages_str = "\n".join(error_messages) + "\n"  # Append the error messages to the prompt for the next attempt
            prompt += f"""
                The new article is {generated_article}.
                word_count is {count_of_words_in_new_article}.
                But the generated article is invalid. The following issues were found:
                {error_messages_str}
                please follow the summary of the key points and fix the errors to generate a new article.
            """ 
            print(f"Prompt for next attempt: {prompt}")

    return generated_article, validate_article_result

def load_lesson_csv(file):
    try:
        df = pd.read_csv(file, encoding='utf-8')
    except:
        df = pd.read_csv(file.name, encoding='utf-8')

    if not df.empty:
        first_row = df.iloc[0]
        lesson_words = first_row['lesson_words']
        original_article = first_row['lesson_article']
        clean_original_article = [char for char in original_article if char not in "、,。!?;:「」『』()《》【】'\n''\r'"]
        original_word_count = len(clean_original_article)
        base_chars = ''  # This should be defined or extracted from some column or external source
        return lesson_words, original_article, original_word_count, base_chars
    return "", "", 0, ""

def load_base_chars_csv(file):
    try:
        df = pd.read_csv(file, encoding='utf-8')
    except:
        df = pd.read_csv(file.name, encoding='utf-8')

    if not df.empty:
        first_row = df.iloc[0]
        base_chars = first_row['words']
        return base_chars
    return ""

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Original Lesson CSV File")
            lesson_csv_file_input = gr.File(label="Upload CSV file (Columns: lesson_words, original_article, original_word_count, base_chars)")
        with gr.Column():
            gr.Markdown("### Base Characters CSV File")
            base_chars_csv_file_input = gr.File(label="Upload Base Characters File")
    
    with gr.Row():
        lesson_words_input = gr.Textbox(label="Lesson Words")
        original_article_input = gr.Textbox(label="Original Article")
        original_word_count_input = gr.Number(label="Original Word Count")
        base_chars_input = gr.Textbox(label="Base Characters")
    
    with gr.Row():
        model_list = ["gpt-4-0125-preview", "gpt-3.5-turbo", "gpt-4", "gemini-pro"]
        with gr.Column():
            model_1 = gr.Dropdown(label="Model 1", choices=model_list, value="gpt-4-0125-preview")
            generate_button1 = gr.Button("Generate Article - gpt-4-0125-preview")
        with gr.Column(): 
            output_text1 = gr.Textbox(label="Generated Article - gpt-4-0125-preview")
        with gr.Column(): 
            validate_article_result_1 = gr.JSON()
    with gr.Row():
        with gr.Column(): 
            model_2 = gr.Dropdown(label="Model 2", choices=model_list, value="gpt-3.5-turbo")
            generate_button2 = gr.Button("Generate Article - gpt-3.5-turbo")
        with gr.Column(): 
            output_text2 = gr.Textbox(label="Generated Article - gpt-3.5-turbo")
        with gr.Column(): 
            validate_article_result_2 = gr.JSON()
    with gr.Row():
        with gr.Column():
            model_3 = gr.Dropdown(label="Model 3", choices=model_list, value="gpt-4")
            generate_button3 = gr.Button("Generate Article - gpt-4")
        with gr.Column(): 
            output_text3 = gr.Textbox(label="Generated Article - gpt-4")
        with gr.Column():
            # validate_article_result_3 Json format
            validate_article_result_3 = gr.JSON()
    with gr.Row():
        with gr.Column():
            model_4 = gr.Dropdown(label="Model 4", choices=model_list, value="gemini-pro")
            generate_button4 = gr.Button("Generate Article - gemini-pro")
        with gr.Column():
            output_text4 = gr.Textbox(label="Generated Article - gemini-pro")
        with gr.Column():
            validate_article_result_4 = gr.JSON()

    generate_button1.click(
        generate_new_article,
        inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_1],
        outputs=[output_text1, validate_article_result_1]
    )
    generate_button2.click(
        generate_new_article,
        inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_2],
        outputs=[output_text2, validate_article_result_2]
    )
    generate_button3.click(
        generate_new_article,
        inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_3],
        outputs=[output_text3, validate_article_result_3]
    )
    generate_button4.click(
        generate_new_article,
        inputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input, model_4],
        outputs=[output_text4, validate_article_result_4]
    )

    # 為其他模型添加點擊事件
    lesson_csv_file_input.change(
        load_lesson_csv,
        inputs=[lesson_csv_file_input],
        outputs=[lesson_words_input, original_article_input, original_word_count_input, base_chars_input]
    )
    base_chars_csv_file_input.change(
        load_base_chars_csv,
        inputs=[base_chars_csv_file_input],
        outputs=[base_chars_input]
    )

demo.launch()