saish-shetty commited on
Commit
78c1b41
Β·
verified Β·
1 Parent(s): 858256f

Upload 3 files

Browse files
Files changed (3) hide show
  1. DockerFile +25 -0
  2. app.py +73 -0
  3. requirements.txt +6 -0
DockerFile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim-buster
2
+
3
+ # Update package lists
4
+ RUN apt-get update
5
+
6
+ # Install libstdc++ (if needed)
7
+ RUN apt-get install -y libstdc++6
8
+
9
+ ## Force install of bitsandbyes non-CUDA version
10
+ RUN pip install --force-reinstall 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'
11
+
12
+ # Copy requirements file
13
+ COPY requirements.txt .
14
+
15
+ # Install dependencies
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy app files
19
+ COPY . .
20
+
21
+ EXPOSE 7860
22
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
23
+
24
+ # Command to run the app
25
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from peft import PeftModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
+ import torch
5
+ import os
6
+ os.environ["CUDA_VISIBLE_DEVICES"] = "" # Disable CUDA for Gradio
7
+
8
+ # Model and tokenizer loading
9
+ model_name = "microsoft/phi-2" # Replace with your base model name
10
+ adapter_path = "./checkpoint-500" # Path to your adapter directory (relative to app.py)
11
+
12
+ bnb_config = BitsAndBytesConfig(
13
+ load_in_4bit=True,
14
+ bnb_4bit_quant_type="nf4",
15
+ bnb_4bit_compute_dtype=torch.float16,
16
+ )
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
19
+ tokenizer.pad_token = tokenizer.eos_token
20
+
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_name,
23
+ device_map="auto",
24
+ quantization_config=bnb_config,
25
+ trust_remote_code=True,
26
+ torch_dtype=torch.float16, # Use float16 for faster inference
27
+ )
28
+ model = PeftModel.from_pretrained(model, adapter_path)
29
+ model.eval()
30
+
31
+ # Inference function
32
+ def generate_text(prompt, max_length=200, temperature=0.7, top_p=0.9):
33
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
34
+ attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.to(model.device)
35
+
36
+ with torch.no_grad():
37
+ outputs = model.generate(
38
+ input_ids=input_ids,
39
+ attention_mask=attention_mask,
40
+ max_length=max_length,
41
+ temperature=temperature,
42
+ top_p=top_p,
43
+ do_sample=True,
44
+ pad_token_id=tokenizer.pad_token_id,
45
+ )
46
+
47
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
+ return generated_text
49
+
50
+ # Sample questions
51
+ sample_questions = [
52
+ "Write a short story about a dog who becomes a detective.",
53
+ "What is 2+2?",
54
+ "Write a Flask App in python to say 'Hello World!'",
55
+ "Give me a short 200-word essay on 'monospony'.",
56
+ ]
57
+
58
+ # Gradio interface
59
+ iface = gr.Interface(
60
+ fn=generate_text,
61
+ inputs=[
62
+ gr.Textbox(lines=5, label="Prompt"),
63
+ gr.Slider(minimum=50, maximum=500, value=250, label="Max Length"),
64
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.1, label="Temperature"),
65
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top P"),
66
+ ],
67
+ outputs=gr.Textbox(label="Generated Text"),
68
+ title="Phi-2 OASST Fine-Tuning Demo",
69
+ description="Generate text using a fine-tuned Phi-2 model with PEFT adapters. Click a sample question below to get started!",
70
+ examples=[[q, 250, 0.1, 0.9] for q in sample_questions], # Add examples
71
+ )
72
+
73
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ huggingface_hub
2
+ transformers
3
+ peft
4
+ accelerate
5
+ torch
6
+ gradio