File size: 5,549 Bytes
a6b6dbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b089ffe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36bef50
b089ffe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8354046
b089ffe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
---
license: apache-2.0
datasets:
- rajpurkar/squad
- ehovy/race
- mandarjoshi/trivia_qa
language:
- vi
- en
library_name: transformers
base_model:
- google-t5/t5-small
pipeline_tag: question-answering
tags:
- code
---
```python
!pip install flask transformers  pyngrok --quiet   # install library

from flask import Flask, request, jsonify
from transformers import T5Tokenizer, T5ForConditionalGeneration
import tensorflow
from pyngrok import ngrok
import json
import torch
import requests
# format output json
def parse_questions(raw_json):
    import re
    questions = []
    question_blocks = re.split(r"Q:\s", raw_json["generated_text"])
    for idx, block in enumerate(question_blocks[1:], start=1):  # Skip the first part of the question
        try:
            question_match = re.search(r"(.+?)\sA:", block)
            options_match = re.search(r"A:\s(.+?)\sCorrect:", block, re.DOTALL)
            correct_match = re.search(r"Correct:\s(.+)", block)

            question = question_match.group(1).strip() if question_match else None
            options_raw = options_match.group(1).strip() if options_match else None
            correct_answer = correct_match.group(1).strip() if correct_match else None


            options = {}
            if options_raw:
                option_list = re.split(r"\d\)", options_raw)
                for i, option in enumerate(option_list[1:], start=1): 
                    options[chr(64 + i)] = option.strip()

            questions.append({
                "id": f"Q{idx}",
                "Question": question,
                "options": options,
                "correct_answer": correct_answer
            })
        except Exception as e:
            print(f"Error parsing block {idx}: {e}")

    return questions

app = Flask(__name__)
ngrok.set_auth_token("Ngrok_Auth_Token")
public_url = ngrok.connect(5000)
print("Ngrok URL:", public_url)

model_name = "TranVanTri352/MCQ_Paragraph_AI_Model"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name, from_tf=True)


@app.route('/status', methods=['GET'])
def model_status():
    try:
        # Check if the model is loaded
        if model and tokenizer:
            return jsonify({
                'status': 'ready',
                'model_name': model_name,
                'framework': 'transformers',
                'device': 'cuda' if torch.cuda.is_available() else 'cpu',
                'message': 'Model is loaded and ready for inference.'
            }), 200
        else:
            return jsonify({
                'status': 'not_ready',
                'message': 'Model or tokenizer is not loaded.'
            }), 500
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': f'Error occurred while checking model status: {str(e)}'
        }), 500



@app.route('/generate', methods=['POST'])
def generate_text():
    try:
        data = request.json
        if not data or 'text' not in data:
            return jsonify({'error': 'Invalid input, "text" is required'}), 400

        input_text = "Generate a question and multiple answers based on this article: " + data['text']
        inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)

        all_outputs = []

        # Loop to generate 5 outputs
        for i in range(5):
            torch.manual_seed(i)  # Set different seeds to increase randomness
            outputs = model.generate(
                input_ids=inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                max_length=128,
                do_sample=True,  # Turn on random mode
                temperature=0.9,  # Increase randomness
                top_k=30,         # Choose only the word with the highest probability in the top 30
                top_p=0.9,        # Nucleus sampling
                repetition_penalty=1.5,  # Limit repetition
            )
            output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            all_outputs.append(output_text) 

     
        final_output = " ".join(all_outputs)

        # Parse the final output into formatted questions
        parsed_questions = parse_questions({"generated_text": final_output})
        json_data = json.dumps(parsed_questions)
        
       

        return jsonify({'questions': parsed_questions}), 200
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/health', methods=['GET'])
def health_check():
    return jsonify({'status': 'Service is healthy'}), 200

print(f"Public URL: {public_url}")

# Flask
if __name__ == "__main__":
    app.run(debug=False)
```

#Test Result
#Request /generate 
```json
{
    "text": "Originally from Gangseo District, Seoul, Faker was signed by SKT in 2013, and quickly established himself as one of the league's top players. In his debut year, he achieved both an LCK title and a World Championship victory with SKT. From 2014 to 2017, Faker added five more LCK titles to his name, along with two MSI titles in 2016 and 2017, and two additional World Championships in 2015 and 2016. During this time, he also emerged victorious in the All-Star Paris 2014 and the IEM World Championship in 2016. Between 2019 and 2022, Faker secured four more LCK titles, becoming the first player to reach a total of 10. He also represented the South Korean national team at the 2018 Asian Games, earning a silver medal, and the 2022 Asian Games, earning a gold."
}
```