Monimoy commited on
Commit
2c962c9
·
verified ·
1 Parent(s): 92b52b1

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +130 -58
  2. requirements.txt +7 -5
app.py CHANGED
@@ -1,63 +1,135 @@
1
- import openvino_genai
2
- from optimum.intel.openvino import OVModelForCausalLM
3
  import gradio as gr
 
 
 
 
 
 
 
4
 
5
- print(" Inside application1")
6
- # Base Phi-2 model name
7
- #base_model_name = "microsoft/phi-2"
8
- base_model_name = "Monimoy/openvino_phi2"
9
- peft_model_path = "./phi2-openassistant-lora-final"
10
-
11
-
12
-
13
- device = 'CPU' # GPU can be used as well
14
- #adapter = openvino_genai.Adapter(peft_model_path)
15
- #print(" Inside application2")
16
- #adapter_config = openvino_genai.AdapterConfig(adapter)
17
- #print(" Inside application3")
18
- #pipe = openvino_genai.LLMPipeline(model=base_model_name, device=device, adapters=adapter_config) # register all required adapters here
19
- #pipe = openvino_genai.LLMPipeline(model=base_model_name)
20
- # Load model from Hugging Face
21
- model = OVModelForCausalLM.from_pretrained(base_model_name, export=True)
22
- print(" Inside application2")
23
- tokenizer = AutoTokenizer.from_pretrained(base_model_name)
24
- print(" Inside application3")
25
-
26
- # Create a pipeline
27
- text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
28
- print(" Inside application4")
29
-
30
- print("Generate with LoRA adapter and alpha set to 0.75:")
31
- #print(pipe.generate(args.prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig(adapter, 0.75)))
32
-
33
-
34
- # Define prediction function
35
- def generate_response(prompt):
36
- #inputs = tokenizer(prompt, return_tensors="pt").to(device)
37
- #with torch.no_grad():
38
- # output = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.7)
39
- #return tokenizer.decode(output[0], skip_special_tokens=True)
40
- #return pipe.generate(prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig(adapter, 0.75))
41
- return text_generator(prompt, max_length=50)
42
-
43
- # Define example prompts
44
- examples = [
45
- ["What is machine learning?"],
46
- ["Explain quantum mechanics in simple terms."],
47
- ["Write a short story about a robot discovering emotions."],
48
- ["Summarize the theory of relativity."]
49
- ]
50
-
51
- # Create Gradio UI
52
- iface = gr.Interface(
53
- fn=generate_response,
54
- inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
55
- outputs=gr.Textbox(),
56
- title="Phi-2 LoRA Model",
57
- description="A fine-tuned Phi-2 model with LoRA running on Hugging Face Spaces (CPU optimized).",
58
- examples=examples,
 
 
 
 
 
 
59
  )
60
 
61
- # Launch Gradio app
62
- iface.launch()
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
  import gradio as gr
4
+ import torch
5
+ from PIL import Image
6
+ from transformers import AutoTokenizer
7
+ import timm
8
+ from torchvision import transforms
9
+ from llama_cpp import Llama
10
+ from peft import PeftModel
11
 
12
+ # 1. Model Definitions (Same as in training script)
13
+ class SigLIPImageEncoder(torch.nn.Module):
14
+ def __init__(self, model_name='resnet50', embed_dim=512, pretrained_path=None):
15
+ super().__init__()
16
+ self.model = timm.create_model(model_name, pretrained=False, num_classes=0, global_pool='avg') # pretrained=False
17
+ self.embed_dim = embed_dim
18
+ self.projection = torch.nn.Linear(self.model.num_features, embed_dim)
19
+
20
+ if pretrained_path:
21
+ self.load_state_dict(torch.load(pretrained_path, map_location=torch.device('cpu'))) # Load to CPU first
22
+ print(f"Loaded SigLIP image encoder from {pretrained_path}")
23
+ else:
24
+ print("Initialized SigLIP image encoder without pretrained weights.")
25
+
26
+ def forward(self, image):
27
+ features = self.model(image)
28
+ embedding = self.projection(features)
29
+ return embedding
30
+
31
+ # 2. Load Models and Tokenizer
32
+ phi3_model_path = "QuantFactory/Phi-3-mini-4k-instruct-GGUF" # Path to your quantized Phi-3 GGUF model
33
+ peft_model_path = "./qlora_phi3_model"
34
+ image_model_name = 'resnet50'
35
+ image_embed_dim = 512
36
+ siglip_pretrained_path = "image_encoder.pth" # Path to your pretrained SigLIP model
37
+
38
+ device = torch.device("cpu") # Force CPU
39
+ print(f"Using device: {device}")
40
+
41
+ # Load Tokenizer (using a compatible tokenizer)
42
+ text_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) # Or a compatible tokenizer
43
+ text_tokenizer.pad_token = text_tokenizer.eos_token # Important for training
44
+
45
+ # Image Transformations
46
+ image_transform = transforms.Compose([
47
+ transforms.Resize((224, 224)),
48
+ transforms.ToTensor(),
49
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
50
+ ])
51
+
52
+ # Load SigLIP Image Encoder
53
+ image_encoder = SigLIPImageEncoder(model_name=image_model_name, embed_dim=image_embed_dim, pretrained_path=siglip_pretrained_path).to(device)
54
+ image_encoder.eval() # Set to evaluation mode
55
+
56
+ # Load Phi-3 model using llama.cpp
57
+ #base_model = Llama(
58
+ # model_path=phi3_model_path,
59
+ # n_gpu_layers=0, # Ensure no GPU usage
60
+ # n_ctx=2048, # Adjust context length as needed
61
+ # verbose=True,
62
+ #)
63
+
64
+
65
+ llm = Llama.from_pretrained(
66
+ repo_id="QuantFactory/Phi-3-mini-4k-instruct-GGUF",
67
+ #filename="Phi-3-mini-4k-instruct.Q2_K.gguf",
68
+ filename="Phi-3-mini-4k-instruct.Q4_K_M.gguf",
69
+ n_gpu_layers=0,
70
+ n_ctx=2048,
71
+ verbose=True
72
  )
73
 
 
 
74
 
75
+
76
+ model = PeftModel.from_pretrained(base_model, peft_model_path, offload_dir='./offload')
77
+ model = model.merge_and_unload()
78
+ print("phi-3 model loaded sucessfully")
79
+ # 3. Inference Function
80
+ def predict(image, question):
81
+ """
82
+ Takes an image and a question as input and returns an answer.
83
+ """
84
+ if image is None or question is None or question == "":
85
+ return "Please provide both an image and a question."
86
+
87
+ try:
88
+ image = Image.fromarray(image).convert("RGB")
89
+ image = image_transform(image).unsqueeze(0).to(device)
90
+
91
+ # Get image embeddings
92
+ with torch.no_grad():
93
+ image_embeddings = image_encoder(image)
94
+ # Flatten the image embeddings for simplicity
95
+ image_embeddings = image_embeddings.flatten().tolist()
96
+
97
+ # Create the prompt with image embeddings
98
+ prompt = f"Question: {question}\nImage Embeddings: {image_embeddings}\nAnswer:"
99
+
100
+ # Generate answer using llama.cpp
101
+ output = model(
102
+ prompt,
103
+ max_tokens=128,
104
+ stop=["Q:", "\n"],
105
+ echo=False,
106
+ )
107
+
108
+ answer = output["choices"][0]["text"].strip()
109
+
110
+ return answer
111
+
112
+ except Exception as e:
113
+ return f"An error occurred: {str(e)}"
114
+
115
+ # 4. Gradio Interface
116
+ iface = gr.Interface(
117
+ fn=predict,
118
+ inputs=[
119
+ gr.Image(label="Upload an Image"),
120
+ gr.Textbox(label="Ask a Question about the Image", placeholder="What is in the image?")
121
+ ],
122
+ outputs=gr.Textbox(label="Answer"),
123
+ title="Image Question Answering with Phi-3 and SigLIP (CPU)",
124
+ description="Ask questions about an image and get answers powered by Phi-3 (llama.cpp) and SigLIP.",
125
+ examples=[
126
+ ["cat_0006.png", "Create a interesting story about this image?"],
127
+ ["bird_0004.png", "Can you describe this image?"],
128
+ ["truck_0003.png", "Elaborate the setting of the image"],
129
+ ["ship_0007.png", "Explain the purpose of image"]
130
+ ]
131
+ )
132
+
133
+ # 5. Launch the App
134
+ if __name__ == "__main__":
135
+ iface.launch()
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
  gradio
2
- huggingface_hub
3
- openvino
4
- openvino-genai
5
- optimum-intel
6
- transformers
 
 
 
1
  gradio
2
+ torch
3
+ torchvision
4
+ timm
5
+ Pillow
6
+ transformers
7
+ llama-cpp-python
8
+ peft