Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,6 +15,35 @@ output_dir = "./question_bank"
|
|
| 15 |
# 載入題庫字典(question_bank_dict),格式為 { "年級_學期": [題庫檔名列表] }
|
| 16 |
question_bank_dict = json.load(open(f"{output_dir}/question_bank_dict.json", "r"))
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
| 19 |
"""
|
| 20 |
根據年級、學期以及指定的題型(qtype)和題目數量(num_questions),
|
|
@@ -33,17 +62,14 @@ def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
|
| 33 |
grade_semester = f"{grades[grade]}年級_{terms[term]}學期"
|
| 34 |
|
| 35 |
# 根據題庫字典從指定年級學期中取得所有檔名,並嘗試讀取其對應的 markdown 檔案內容
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
filename = filename.replace('.pdf', '.md')
|
| 41 |
-
if not os.path.exists(f"{output_dir}/md/{filename}"): continue
|
| 42 |
-
|
| 43 |
-
raw_questions.append(open(f"{output_dir}/md/{filename}").read())
|
| 44 |
|
| 45 |
-
print(len(raw_questions))
|
| 46 |
-
|
| 47 |
# 隨機選取並限制字串長度的題庫內容
|
| 48 |
input_question_bank = random_questions_with_limit(raw_questions, 20000)
|
| 49 |
|
|
@@ -98,7 +124,6 @@ def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
|
| 98 |
]
|
| 99 |
|
| 100 |
# 使用 InferenceClient 呼叫 API 模型產生新題目
|
| 101 |
-
# 請自行挑選使用哪一個開源大語言模型,並自行調整模型輸入參數,看看會產生什麼結果
|
| 102 |
completion = client.chat.completions.create(
|
| 103 |
model="mistralai/Mistral-Nemo-Instruct-2407",
|
| 104 |
messages=messages,
|
|
@@ -108,33 +133,6 @@ def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
|
| 108 |
# 傳回模型產生的文本
|
| 109 |
return completion.choices[0].message.content
|
| 110 |
|
| 111 |
-
def random_questions_with_limit(data, limit=20000):
|
| 112 |
-
"""
|
| 113 |
-
隨機從 data 中挑選題目,並將總字串長度限制在 limit 字元數內(至少不小於 5000)。
|
| 114 |
-
會傳回:
|
| 115 |
-
- result_list:篩選後的題目列表
|
| 116 |
-
- result_str:將篩選後的題目以兩行空白分隔串接的字串
|
| 117 |
-
- count:所選題卷的數量
|
| 118 |
-
"""
|
| 119 |
-
# 確保 limit 不小於 5000
|
| 120 |
-
limit = max(limit, 5000)
|
| 121 |
-
|
| 122 |
-
# 將題目列表隨機洗牌
|
| 123 |
-
random.shuffle(data)
|
| 124 |
-
|
| 125 |
-
result_list = []
|
| 126 |
-
current_length = 0
|
| 127 |
-
count = 0
|
| 128 |
-
|
| 129 |
-
for item in data:
|
| 130 |
-
# 如果加入下一個題目後長度不超過 limit,則加入結果列表
|
| 131 |
-
if current_length + len(item) <= limit:
|
| 132 |
-
result_list.append(item)
|
| 133 |
-
current_length += len(item)
|
| 134 |
-
count += 1
|
| 135 |
-
|
| 136 |
-
return result_list, "\n\n".join(result_list), count
|
| 137 |
-
|
| 138 |
# 建立 Gradio 介面
|
| 139 |
with gr.Blocks() as app:
|
| 140 |
# 介面標題區
|
|
|
|
| 15 |
# 載入題庫字典(question_bank_dict),格式為 { "年級_學期": [題庫檔名列表] }
|
| 16 |
question_bank_dict = json.load(open(f"{output_dir}/question_bank_dict.json", "r"))
|
| 17 |
|
| 18 |
+
print(question_bank_dict)
|
| 19 |
+
|
| 20 |
+
def random_questions_with_limit(data, limit=20000):
|
| 21 |
+
"""
|
| 22 |
+
隨機從 data 中挑選題目,並將總字串長度限制在 limit 字元數內(至少不小於 5000)。
|
| 23 |
+
會傳回:
|
| 24 |
+
- result_list:篩選後的題目列表
|
| 25 |
+
- result_str:將篩選後的題目以兩行空白分隔串接的字串
|
| 26 |
+
- count:所選題卷的數量
|
| 27 |
+
"""
|
| 28 |
+
# 確保 limit 不小於 5000
|
| 29 |
+
limit = max(limit, 5000)
|
| 30 |
+
|
| 31 |
+
# 將題目列表隨機洗牌
|
| 32 |
+
random.shuffle(data)
|
| 33 |
+
|
| 34 |
+
result_list = []
|
| 35 |
+
current_length = 0
|
| 36 |
+
count = 0
|
| 37 |
+
|
| 38 |
+
for item in data:
|
| 39 |
+
# 如果加入下一個題目後長度不超過 limit,則加入結果列表
|
| 40 |
+
if current_length + len(item) <= limit:
|
| 41 |
+
result_list.append(item)
|
| 42 |
+
current_length += len(item)
|
| 43 |
+
count += 1
|
| 44 |
+
|
| 45 |
+
return result_list, "\n\n".join(result_list), count
|
| 46 |
+
|
| 47 |
def generate_math_questions(grade, term, qtype="Unspecified", num_questions=10):
|
| 48 |
"""
|
| 49 |
根據年級、學期以及指定的題型(qtype)和題目數量(num_questions),
|
|
|
|
| 62 |
grade_semester = f"{grades[grade]}年級_{terms[term]}學期"
|
| 63 |
|
| 64 |
# 根據題庫字典從指定年級學期中取得所有檔名,並嘗試讀取其對應的 markdown 檔案內容
|
| 65 |
+
raw_questions = [
|
| 66 |
+
open(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}").read()
|
| 67 |
+
for doc_path in question_bank_dict[grade_semester]
|
| 68 |
+
if os.path.exists(f"{output_dir}/md/{doc_path.replace('.pdf', '.md')}")
|
| 69 |
+
]
|
| 70 |
|
| 71 |
+
print(raw_questions)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
|
|
|
|
|
|
| 73 |
# 隨機選取並限制字串長度的題庫內容
|
| 74 |
input_question_bank = random_questions_with_limit(raw_questions, 20000)
|
| 75 |
|
|
|
|
| 124 |
]
|
| 125 |
|
| 126 |
# 使用 InferenceClient 呼叫 API 模型產生新題目
|
|
|
|
| 127 |
completion = client.chat.completions.create(
|
| 128 |
model="mistralai/Mistral-Nemo-Instruct-2407",
|
| 129 |
messages=messages,
|
|
|
|
| 133 |
# 傳回模型產生的文本
|
| 134 |
return completion.choices[0].message.content
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
# 建立 Gradio 介面
|
| 137 |
with gr.Blocks() as app:
|
| 138 |
# 介面標題區
|