TranVanTri352 commited on
Commit
b089ffe
·
verified ·
1 Parent(s): a6b6dbf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +200 -1
README.md CHANGED
@@ -13,4 +13,203 @@ base_model:
13
  pipeline_tag: question-answering
14
  tags:
15
  - code
16
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  pipeline_tag: question-answering
14
  tags:
15
  - code
16
+ ---
17
+ ```python
18
+ !pip install flask transformers pyngrok --quiet # install library
19
+
20
+ from flask import Flask, request, jsonify
21
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
22
+ import tensorflow
23
+ from pyngrok import ngrok
24
+ import json
25
+ import torch
26
+ import requests
27
+ # format output json
28
+ def parse_questions(raw_json):
29
+ import re
30
+ questions = []
31
+ question_blocks = re.split(r"Q:\s", raw_json["generated_text"])
32
+ for idx, block in enumerate(question_blocks[1:], start=1): # Bỏ qua phần đầu trước câu hỏi
33
+ try:
34
+ question_match = re.search(r"(.+?)\sA:", block)
35
+ options_match = re.search(r"A:\s(.+?)\sCorrect:", block, re.DOTALL)
36
+ correct_match = re.search(r"Correct:\s(.+)", block)
37
+
38
+ question = question_match.group(1).strip() if question_match else None
39
+ options_raw = options_match.group(1).strip() if options_match else None
40
+ correct_answer = correct_match.group(1).strip() if correct_match else None
41
+
42
+
43
+ options = {}
44
+ if options_raw:
45
+ option_list = re.split(r"\d\)", options_raw)
46
+ for i, option in enumerate(option_list[1:], start=1):
47
+ options[chr(64 + i)] = option.strip()
48
+
49
+ questions.append({
50
+ "id": f"Q{idx}",
51
+ "Question": question,
52
+ "options": options,
53
+ "correct_answer": correct_answer
54
+ })
55
+ except Exception as e:
56
+ print(f"Error parsing block {idx}: {e}")
57
+
58
+ return questions
59
+
60
+ app = Flask(__name__)
61
+ ngrok.set_auth_token("Ngrok_Auth_Token")
62
+ public_url = ngrok.connect(5000)
63
+ print("Ngrok URL:", public_url)
64
+
65
+ model_name = "TranVanTri352/MCQ_Paragraph_AI_Model"
66
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
67
+ model = T5ForConditionalGeneration.from_pretrained(model_name, from_tf=True)
68
+
69
+
70
+ @app.route('/status', methods=['GET'])
71
+ def model_status():
72
+ try:
73
+ # Kiểm tra xem model đã được tải chưa
74
+ if model and tokenizer:
75
+ return jsonify({
76
+ 'status': 'ready',
77
+ 'model_name': model_name,
78
+ 'framework': 'transformers',
79
+ 'device': 'cuda' if torch.cuda.is_available() else 'cpu',
80
+ 'message': 'Model is loaded and ready for inference.'
81
+ }), 200
82
+ else:
83
+ return jsonify({
84
+ 'status': 'not_ready',
85
+ 'message': 'Model or tokenizer is not loaded.'
86
+ }), 500
87
+ except Exception as e:
88
+ return jsonify({
89
+ 'status': 'error',
90
+ 'message': f'Error occurred while checking model status: {str(e)}'
91
+ }), 500
92
+
93
+
94
+
95
+ @app.route('/generate', methods=['POST'])
96
+ def generate_text():
97
+ try:
98
+ data = request.json
99
+ if not data or 'text' not in data:
100
+ return jsonify({'error': 'Invalid input, "text" is required'}), 400
101
+
102
+ input_text = "Generate a question and multiple answers based on this article: " + data['text']
103
+ inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
104
+
105
+ all_outputs = []
106
+
107
+ # Loop to generate 5 outputs
108
+ for i in range(5):
109
+ torch.manual_seed(i) # Set different seeds to increase randomness
110
+ outputs = model.generate(
111
+ input_ids=inputs["input_ids"],
112
+ attention_mask=inputs["attention_mask"],
113
+ max_length=128,
114
+ do_sample=True, # Turn on random mode
115
+ temperature=0.9, # Increase randomness
116
+ top_k=30, # Choose only the word with the highest probability in the top 30
117
+ top_p=0.9, # Nucleus sampling
118
+ repetition_penalty=1.5, # Limit repetition
119
+ )
120
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
121
+ all_outputs.append(output_text)
122
+
123
+
124
+ final_output = " ".join(all_outputs)
125
+
126
+ # Parse the final output into formatted questions
127
+ parsed_questions = parse_questions({"generated_text": final_output})
128
+ json_data = json.dumps(parsed_questions)
129
+
130
+
131
+
132
+ return jsonify({'questions': parsed_questions}), 200
133
+ except Exception as e:
134
+ return jsonify({'error': str(e)}), 500
135
+
136
+ @app.route('/health', methods=['GET'])
137
+ def health_check():
138
+ return jsonify({'status': 'Service is healthy'}), 200
139
+
140
+ print(f"Public URL: {public_url}")
141
+
142
+ # Flask
143
+ if __name__ == "__main__":
144
+ app.run(debug=False)
145
+ ```
146
+
147
+ #Test Result
148
+ #Request /generate
149
+ ```json
150
+ {
151
+ "text": "Originally from Gangseo District, Seoul, Faker was signed by SKT in 2013, and quickly established himself as one of the league's top players. In his debut year, he achieved both an LCK title and a World Championship victory with SKT. From 2014 to 2017, Faker added five more LCK titles to his name, along with two MSI titles in 2016 and 2017, and two additional World Championships in 2015 and 2016. During this time, he also emerged victorious in the All-Star Paris 2014 and the IEM World Championship in 2016. Between 2019 and 2022, Faker secured four more LCK titles, becoming the first player to reach a total of 10. He also represented the South Korean national team at the 2018 Asian Games, earning a silver medal, and the 2022 Asian Games, earning a gold."
152
+ }
153
+ ```
154
+ #response body
155
+ ```json
156
+ {
157
+ "questions": [
158
+ {
159
+ "Question": "From the passage we know _.",
160
+ "correct_answer": "Faker will succeed in the LCK title and the MSI titles",
161
+ "id": "Q1",
162
+ "options": {
163
+ "A": "Faker will succeed in the IEM World Championships",
164
+ "B": "Fakers has won the LCK title and the MSI titles",
165
+ "C": "Faker is a top player of the SKT league",
166
+ "D": "Faker became the first player to reach a total of 10"
167
+ }
168
+ },
169
+ {
170
+ "Question": "Which of the following is true?",
171
+ "correct_answer": "Faker won three MSI titles from 2016 to 2016.",
172
+ "id": "Q2",
173
+ "options": {
174
+ "A": "Faker won two MSI titles in 2016 and 2017 respectively.",
175
+ "B": "Fakers won two MSI titles in 2016 and 2016.",
176
+ "C": "Faker won two MSI titles from 2016 to 2017.",
177
+ "D": "Faker helped the South Korea national team at the 2018 Asian Games."
178
+ }
179
+ },
180
+ {
181
+ "Question": "Faker _.",
182
+ "correct_answer": "was first to win the all-star Paris 2014 and 2016.",
183
+ "id": "Q3",
184
+ "options": {
185
+ "A": "won the All-Star Paris 2014 and IEM World Championship",
186
+ "B": "was first to win the all-star Paris 2014 and 2016",
187
+ "C": "appeared on the IEM World Championships in 2016 and 2016",
188
+ "D": "made his debut at SKT in 2013"
189
+ }
190
+ },
191
+ {
192
+ "Question": "What did Faker gain from 2014 to 2016 in 2015?",
193
+ "correct_answer": "Four MS I titles.",
194
+ "id": "Q4",
195
+ "options": {
196
+ "A": "Two MSI titles.",
197
+ "B": "Five MSI titles.",
198
+ "C": "Four MSi titles.",
199
+ "D": "Six MSI titles."
200
+ }
201
+ },
202
+ {
203
+ "Question": "If you want to win the IEM World Championship, you should go to _.",
204
+ "correct_answer": "the 2018 Asian Games",
205
+ "id": "Q5",
206
+ "options": {
207
+ "A": "Paris 2014 and 2016",
208
+ "B": "the IEM World Championship",
209
+ "C": "the 2018 Asian Games",
210
+ "D": "the 2012 South Korea national team"
211
+ }
212
+ }
213
+ ]
214
+ }
215
+ ```