| |
| import subprocess |
| import sys |
|
|
| subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth", "peft", "bitsandbytes", "accelerate", "transformers"]) |
|
|
|
|
| |
| from transformers import AutoTokenizer |
| from unsloth import FastLanguageModel |
|
|
| |
| medqa_prompt = """You are a medical QA system. Answer the following medical question clearly and in detail with complete sentences. |
| |
| ### Question: |
| {} |
| |
| ### Answer: |
| """ |
|
|
| |
| model_name = "Vijayendra/Phi4-MedQA" |
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name=model_name, |
| max_seq_length=2048, |
| dtype=None, |
| load_in_4bit=True, |
| device_map="auto" |
| ) |
|
|
| |
| FastLanguageModel.for_inference(model) |
|
|
| |
| medical_question = "What are the common symptoms of diabetes?" |
| inputs = tokenizer( |
| [medqa_prompt.format(medical_question)], |
| return_tensors="pt", |
| padding=True, |
| truncation=True, |
| max_length=1024 |
| ).to("cuda") |
|
|
| |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=512, |
| use_cache=True |
| ) |
|
|
| |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
| |
| answer_text = response.split("### Answer:")[1].strip() if "### Answer:" in response else response.strip() |
|
|
| print(f"Question: {medical_question}") |
| print(f"Answer: {answer_text}") |
|
|