Arsh014 commited on
Commit
de3ddde
·
verified ·
1 Parent(s): d3ee0d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -19
app.py CHANGED
@@ -3,23 +3,14 @@ import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
  from peft import PeftModel
5
 
6
- # --- Configuration ---
7
- # 1. Base Model ID: Llama-2-7b-chat-hf is typically used as the base
8
  base_model_id = "NousResearch/Llama-2-7b-chat-hf"
9
 
10
- # 2. LoRA Path: IMPORTANT! Replace this with the path to your fine-tuned model
11
- # This should be the Hugging Face repo ID (e.g., "your-username/llama2-dockerfile-lora")
12
- # or a local directory path where the adapter weights are stored.
13
  lora_path = "Arsh014/lora-llama2-finetuned"
14
 
15
- # Check for CUDA availability
16
- device = 0 if torch.cuda.is_available() else -1
17
 
18
- print(f"Loading tokenizer from: {base_model_id}")
19
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
20
 
21
- # 3. Load the base model with 8-bit quantization for efficiency
22
- print(f"Loading base model (8-bit) from: {base_model_id}")
23
  model = AutoModelForCausalLM.from_pretrained(
24
  base_model_id,
25
  load_in_8bit=True,
@@ -27,24 +18,19 @@ model = AutoModelForCausalLM.from_pretrained(
27
  device_map="auto"
28
  )
29
 
30
- # 4. Apply the PEFT (LoRA) adapters to the base model
31
- print(f"Applying LoRA adapter from: {lora_path}")
32
  try:
33
  model = PeftModel.from_pretrained(model, lora_path)
34
  model.eval() # Set model to evaluation mode
35
  except Exception as e:
36
  print(f"Error loading LoRA adapter from {lora_path}. Ensure it exists and is correct.")
37
  print(f"Error: {e}")
38
- # The app will likely fail if the LoRA path is incorrect.
39
- # We proceed with the base model, but generation quality will be poor for the task.
40
 
41
  # 5. Create a text-generation pipeline
42
  print("Creating text-generation pipeline.")
43
  pipe = pipeline(
44
  "text-generation",
45
  model=model,
46
- tokenizer=tokenizer,
47
- device=device
48
  )
49
 
50
  def format_prompt(instruction, code):
@@ -73,11 +59,9 @@ def explain_dockerfile(instruction, code):
73
  return_full_text=False # We want only the new tokens generated after the prompt
74
  )
75
 
76
- # The pipeline's output can be complex, extract the text and clean up
77
  generated_text = response[0]["generated_text"].strip()
78
 
79
- # Clean up the output to remove the initial prompt if return_full_text=False
80
- # didn't perfectly handle it (it's good practice to split/strip again)
81
  if "### Response:" in generated_text:
82
  return generated_text.split("### Response:")[-1].strip()
83
 
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
  from peft import PeftModel
5
 
 
 
6
  base_model_id = "NousResearch/Llama-2-7b-chat-hf"
7
 
 
 
 
8
  lora_path = "Arsh014/lora-llama2-finetuned"
9
 
 
 
10
 
11
+
12
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
13
 
 
 
14
  model = AutoModelForCausalLM.from_pretrained(
15
  base_model_id,
16
  load_in_8bit=True,
 
18
  device_map="auto"
19
  )
20
 
 
 
21
  try:
22
  model = PeftModel.from_pretrained(model, lora_path)
23
  model.eval() # Set model to evaluation mode
24
  except Exception as e:
25
  print(f"Error loading LoRA adapter from {lora_path}. Ensure it exists and is correct.")
26
  print(f"Error: {e}")
 
 
27
 
28
  # 5. Create a text-generation pipeline
29
  print("Creating text-generation pipeline.")
30
  pipe = pipeline(
31
  "text-generation",
32
  model=model,
33
+ tokenizer=tokenizer
 
34
  )
35
 
36
  def format_prompt(instruction, code):
 
59
  return_full_text=False # We want only the new tokens generated after the prompt
60
  )
61
 
62
+
63
  generated_text = response[0]["generated_text"].strip()
64
 
 
 
65
  if "### Response:" in generated_text:
66
  return generated_text.split("### Response:")[-1].strip()
67