Spaces:

Monimoy
/

fine_tuned_phi2_model

Runtime error

App Files Files Community

Monimoy commited on Apr 13, 2025

Commit

2c962c9

verified ·

1 Parent(s): 92b52b1

Upload 2 files

Browse files

Files changed (2) hide show

app.py +130 -58
requirements.txt +7 -5

app.py CHANGED Viewed

@@ -1,63 +1,135 @@
-import openvino_genai
-from optimum.intel.openvino import OVModelForCausalLM
 import gradio as gr
-print(" Inside application1")
-# Base Phi-2 model name
-#base_model_name = "microsoft/phi-2"
-base_model_name = "Monimoy/openvino_phi2"
-peft_model_path = "./phi2-openassistant-lora-final"
-device = 'CPU'  # GPU can be used as well
-#adapter = openvino_genai.Adapter(peft_model_path)
-#print(" Inside application2")
-#adapter_config = openvino_genai.AdapterConfig(adapter)
-#print(" Inside application3")
-#pipe = openvino_genai.LLMPipeline(model=base_model_name, device=device, adapters=adapter_config)  # register all required adapters here
-#pipe = openvino_genai.LLMPipeline(model=base_model_name)
-# Load model from Hugging Face
-model = OVModelForCausalLM.from_pretrained(base_model_name, export=True)
-print(" Inside application2")
-tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-print(" Inside application3")
-# Create a pipeline
-text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
-print(" Inside application4")
-print("Generate with LoRA adapter and alpha set to 0.75:")
-#print(pipe.generate(args.prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig(adapter, 0.75)))
-# Define prediction function
-def generate_response(prompt):
-    #inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    #with torch.no_grad():
-    #    output = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.7)
-    #return tokenizer.decode(output[0], skip_special_tokens=True)
-    #return pipe.generate(prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig(adapter, 0.75))
-    return text_generator(prompt, max_length=50)
-# Define example prompts
-examples = [
-    ["What is machine learning?"],
-    ["Explain quantum mechanics in simple terms."],
-    ["Write a short story about a robot discovering emotions."],
-    ["Summarize the theory of relativity."]
-]
-# Create Gradio UI
-iface = gr.Interface(
-    fn=generate_response,
-    inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
-    outputs=gr.Textbox(),
-    title="Phi-2 LoRA Model",
-    description="A fine-tuned Phi-2 model with LoRA running on Hugging Face Spaces (CPU optimized).",
-    examples=examples,
 )
-# Launch Gradio app
-iface.launch()

+# app.py
+import os
 import gradio as gr
+import torch
+from PIL import Image
+from transformers import AutoTokenizer
+import timm
+from torchvision import transforms
+from llama_cpp import Llama
+from peft import PeftModel
+# 1. Model Definitions (Same as in training script)
+class SigLIPImageEncoder(torch.nn.Module):
+    def __init__(self, model_name='resnet50', embed_dim=512, pretrained_path=None):
+        super().__init__()
+        self.model = timm.create_model(model_name, pretrained=False, num_classes=0, global_pool='avg') # pretrained=False
+        self.embed_dim = embed_dim
+        self.projection = torch.nn.Linear(self.model.num_features, embed_dim)
+        if pretrained_path:
+            self.load_state_dict(torch.load(pretrained_path, map_location=torch.device('cpu'))) # Load to CPU first
+            print(f"Loaded SigLIP image encoder from {pretrained_path}")
+        else:
+            print("Initialized SigLIP image encoder without pretrained weights.")
+    def forward(self, image):
+        features = self.model(image)
+        embedding = self.projection(features)
+        return embedding
+# 2. Load Models and Tokenizer
+phi3_model_path = "QuantFactory/Phi-3-mini-4k-instruct-GGUF"  # Path to your quantized Phi-3 GGUF model
+peft_model_path = "./qlora_phi3_model"
+image_model_name = 'resnet50'
+image_embed_dim = 512
+siglip_pretrained_path = "image_encoder.pth" # Path to your pretrained SigLIP model
+device = torch.device("cpu") # Force CPU
+print(f"Using device: {device}")
+# Load Tokenizer (using a compatible tokenizer)
+text_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) # Or a compatible tokenizer
+text_tokenizer.pad_token = text_tokenizer.eos_token # Important for training
+# Image Transformations
+image_transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+# Load SigLIP Image Encoder
+image_encoder = SigLIPImageEncoder(model_name=image_model_name, embed_dim=image_embed_dim, pretrained_path=siglip_pretrained_path).to(device)
+image_encoder.eval() # Set to evaluation mode
+# Load Phi-3 model using llama.cpp
+#base_model = Llama(
+#    model_path=phi3_model_path,
+#    n_gpu_layers=0,  # Ensure no GPU usage
+#    n_ctx=2048,       # Adjust context length as needed
+#    verbose=True,
+#)
+llm = Llama.from_pretrained(
+	repo_id="QuantFactory/Phi-3-mini-4k-instruct-GGUF",
+	#filename="Phi-3-mini-4k-instruct.Q2_K.gguf",
+    filename="Phi-3-mini-4k-instruct.Q4_K_M.gguf",
+    n_gpu_layers=0,
+    n_ctx=2048,
+    verbose=True
 )
+model = PeftModel.from_pretrained(base_model, peft_model_path, offload_dir='./offload')
+model = model.merge_and_unload()
+print("phi-3 model loaded sucessfully")
+# 3. Inference Function
+def predict(image, question):
+    """
+    Takes an image and a question as input and returns an answer.
+    """
+    if image is None or question is None or question == "":
+        return "Please provide both an image and a question."
+    try:
+        image = Image.fromarray(image).convert("RGB")
+        image = image_transform(image).unsqueeze(0).to(device)
+        # Get image embeddings
+        with torch.no_grad():
+            image_embeddings = image_encoder(image)
+            # Flatten the image embeddings for simplicity
+            image_embeddings = image_embeddings.flatten().tolist()
+        # Create the prompt with image embeddings
+        prompt = f"Question: {question}\nImage Embeddings: {image_embeddings}\nAnswer:"
+        # Generate answer using llama.cpp
+        output = model(
+            prompt,
+            max_tokens=128,
+            stop=["Q:", "\n"],
+            echo=False,
+        )
+        answer = output["choices"][0]["text"].strip()
+        return answer
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+# 4. Gradio Interface
+iface = gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.Image(label="Upload an Image"),
+        gr.Textbox(label="Ask a Question about the Image", placeholder="What is in the image?")
+    ],
+    outputs=gr.Textbox(label="Answer"),
+    title="Image Question Answering with Phi-3 and SigLIP (CPU)",
+    description="Ask questions about an image and get answers powered by Phi-3 (llama.cpp) and SigLIP.",
+    examples=[
+        ["cat_0006.png", "Create a interesting story about this image?"],
+        ["bird_0004.png", "Can you describe this image?"],
+        ["truck_0003.png", "Elaborate the setting of the image"],
+        ["ship_0007.png", "Explain the purpose of image"]
+    ]
+)
+# 5. Launch the App
+if __name__ == "__main__":
+    iface.launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,8 @@
 gradio
-huggingface_hub
-openvino
-openvino-genai
-optimum-intel
-transformers

 gradio
+torch
+torchvision
+timm
+Pillow
+transformers
+llama-cpp-python
+peft