Raymond-dev-546730 commited on
Commit
44b6aa9
·
verified ·
1 Parent(s): 5b83c71

Upload 2 files

Browse files
Scripts/Inference_llama.cpp.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+
3
+ # Insert your medical query here
4
+ MEDICAL_QUERY = """
5
+
6
+ """
7
+
8
+ model_path = "./" # Path to the directory containing your model weight files
9
+
10
+ llm = Llama(
11
+ model_path=model_path,
12
+ n_gpu_layers=40,
13
+ n_ctx=10000,
14
+ n_threads=4
15
+ )
16
+
17
+ medical_query = MEDICAL_QUERY.strip()
18
+ prompt = f"USER: <medical_query>{medical_query}</medical_query>\nASSISTANT:"
19
+
20
+ output = llm(
21
+ prompt,
22
+ max_tokens=12000,
23
+ temperature=0.3,
24
+ top_p=0.7,
25
+ repeat_penalty=1.05
26
+ )
27
+
28
+ result = output.get("choices", [{}])[0].get("text", "").strip()
29
+
30
+ if "</answer>" in result:
31
+ end_pos = result.find("</answer>") + len("</answer>")
32
+ result = result[:end_pos]
33
+
34
+ print(result)
Scripts/Inference_safetensors.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+
4
+ # Insert your medical query here
5
+ MEDICAL_QUERY = """
6
+
7
+ """
8
+
9
+ def load_model(model_path):
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_path,
12
+ torch_dtype=torch.float16,
13
+ device_map="auto"
14
+ )
15
+
16
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
17
+
18
+ return model, tokenizer
19
+
20
+ def generate_response(model, tokenizer, medical_query):
21
+ medical_query = medical_query.strip()
22
+ prompt = f"USER: <medical_query>{medical_query}</medical_query>\nASSISTANT:"
23
+
24
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
25
+
26
+ outputs = model.generate(
27
+ **inputs,
28
+ max_new_tokens=12000,
29
+ temperature=0.3,
30
+ top_p=0.7,
31
+ repetition_penalty=1.05,
32
+ do_sample=True
33
+ )
34
+
35
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
+ full_response = response.split("ASSISTANT:")[-1].strip()
37
+
38
+ if "</answer>" in full_response:
39
+ end_pos = full_response.find("</answer>") + len("</answer>")
40
+ return full_response[:end_pos]
41
+
42
+ return full_response
43
+
44
+ def run():
45
+ model_path = "./" # Path to the directory containing your model weight files
46
+ model, tokenizer = load_model(model_path)
47
+ result = generate_response(model, tokenizer, MEDICAL_QUERY)
48
+ print(result)
49
+
50
+ if __name__ == "__main__":
51
+ run()