ClinicalThought-AI-8B / Scripts /Inference_llama.cpp.py
Raymond-dev-546730's picture
Upload 2 files
44b6aa9 verified
from llama_cpp import Llama
# Insert your medical query here
MEDICAL_QUERY = """
"""
model_path = "./" # Path to the directory containing your model weight files
llm = Llama(
model_path=model_path,
n_gpu_layers=40,
n_ctx=10000,
n_threads=4
)
medical_query = MEDICAL_QUERY.strip()
prompt = f"USER: <medical_query>{medical_query}</medical_query>\nASSISTANT:"
output = llm(
prompt,
max_tokens=12000,
temperature=0.3,
top_p=0.7,
repeat_penalty=1.05
)
result = output.get("choices", [{}])[0].get("text", "").strip()
if "</answer>" in result:
end_pos = result.find("</answer>") + len("</answer>")
result = result[:end_pos]
print(result)