Spaces:

Arsh014
/

DockerFile_Explain

Runtime error

App Files Files Community

Arsh014 commited on Oct 19, 2025

Commit

de3ddde

verified ·

1 Parent(s): d3ee0d7

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -19

app.py CHANGED Viewed

@@ -3,23 +3,14 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from peft import PeftModel
-# --- Configuration ---
-# 1. Base Model ID: Llama-2-7b-chat-hf is typically used as the base
 base_model_id = "NousResearch/Llama-2-7b-chat-hf"
-# 2. LoRA Path: IMPORTANT! Replace this with the path to your fine-tuned model
-# This should be the Hugging Face repo ID (e.g., "your-username/llama2-dockerfile-lora")
-# or a local directory path where the adapter weights are stored.
 lora_path = "Arsh014/lora-llama2-finetuned"
-# Check for CUDA availability
-device = 0 if torch.cuda.is_available() else -1
-print(f"Loading tokenizer from: {base_model_id}")
 tokenizer = AutoTokenizer.from_pretrained(base_model_id)
-# 3. Load the base model with 8-bit quantization for efficiency
-print(f"Loading base model (8-bit) from: {base_model_id}")
 model = AutoModelForCausalLM.from_pretrained(
     base_model_id,
     load_in_8bit=True,
@@ -27,24 +18,19 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map="auto"
 )
-# 4. Apply the PEFT (LoRA) adapters to the base model
-print(f"Applying LoRA adapter from: {lora_path}")
 try:
     model = PeftModel.from_pretrained(model, lora_path)
     model.eval() # Set model to evaluation mode
 except Exception as e:
     print(f"Error loading LoRA adapter from {lora_path}. Ensure it exists and is correct.")
     print(f"Error: {e}")
-    # The app will likely fail if the LoRA path is incorrect.
-    # We proceed with the base model, but generation quality will be poor for the task.
 # 5. Create a text-generation pipeline
 print("Creating text-generation pipeline.")
 pipe = pipeline(
     "text-generation",
     model=model,
-    tokenizer=tokenizer,
-    device=device
 )
 def format_prompt(instruction, code):
@@ -73,11 +59,9 @@ def explain_dockerfile(instruction, code):
         return_full_text=False # We want only the new tokens generated after the prompt
     )
-    # The pipeline's output can be complex, extract the text and clean up
     generated_text = response[0]["generated_text"].strip()
-    # Clean up the output to remove the initial prompt if return_full_text=False
-    # didn't perfectly handle it (it's good practice to split/strip again)
     if "### Response:" in generated_text:
         return generated_text.split("### Response:")[-1].strip()

 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from peft import PeftModel
 base_model_id = "NousResearch/Llama-2-7b-chat-hf"
 lora_path = "Arsh014/lora-llama2-finetuned"
 tokenizer = AutoTokenizer.from_pretrained(base_model_id)
 model = AutoModelForCausalLM.from_pretrained(
     base_model_id,
     load_in_8bit=True,
     device_map="auto"
 )
 try:
     model = PeftModel.from_pretrained(model, lora_path)
     model.eval() # Set model to evaluation mode
 except Exception as e:
     print(f"Error loading LoRA adapter from {lora_path}. Ensure it exists and is correct.")
     print(f"Error: {e}")
 # 5. Create a text-generation pipeline
 print("Creating text-generation pipeline.")
 pipe = pipeline(
     "text-generation",
     model=model,
+    tokenizer=tokenizer
 )
 def format_prompt(instruction, code):
         return_full_text=False # We want only the new tokens generated after the prompt
     )
     generated_text = response[0]["generated_text"].strip()
     if "### Response:" in generated_text:
         return generated_text.split("### Response:")[-1].strip()