vimedllm / notebook /An /new /prepare_doc.py
VuvanAn's picture
Upload folder using huggingface_hub
cc37925 verified
import json
def import_question(qa_dir):
questions = []
options = []
ids = []
with open(qa_dir, 'r', encoding='utf-8') as f:
for line in f:
data = json.loads(line)
questions.append(data['question'])
options.append([data['A'], data['B'], data['C'], data['D'], data['E']])
ids.append(data['uuid'])
return questions, options, ids
qa_dir = r"C:\Users\vuvan\Desktop\An_Plaza\ViMedLLM\Vietnamese-Medical-LLM\dataset\QA Data\MedAB\MedABv2.jsonl"
questions, options, ids = import_question(qa_dir)
from chatbot import Chatbot
cb = Chatbot("llama3.1:8b", max_token=10000)
from prompt import request_retrieve_prompt
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
def process_question(i):
response = cb.chat(request_retrieve_prompt.format(question=questions[i], options=options[i]))
result = {
'id': ids[i],
'query': response
}
with open('query.jsonl', 'a', encoding='utf-8') as f:
f.write(json.dumps(result, ensure_ascii=False) + '\n')
print(result)
with ThreadPoolExecutor(max_workers=2) as executor:
list(tqdm(executor.map(process_question, range(len(questions))), total=len(questions)))