Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ import timm
|
|
| 9 |
from torchvision import transforms
|
| 10 |
#from llama_cpp import Llama
|
| 11 |
from peft import PeftModel
|
|
|
|
| 12 |
|
| 13 |
# 1. Model Definitions (Same as in training script)
|
| 14 |
class SigLIPImageEncoder(torch.nn.Module):
|
|
@@ -86,6 +87,7 @@ print("phi-3 model loaded sucessfully")
|
|
| 86 |
# 3. Inference Function
|
| 87 |
|
| 88 |
@spaces.GPU
|
|
|
|
| 89 |
def predict(image, question):
|
| 90 |
"""
|
| 91 |
Takes an image and a question as input and returns an answer.
|
|
@@ -101,26 +103,27 @@ def predict(image, question):
|
|
| 101 |
with torch.no_grad():
|
| 102 |
image_embeddings = image_encoder(image)
|
| 103 |
# Flatten the image embeddings for simplicity
|
| 104 |
-
|
|
|
|
| 105 |
|
| 106 |
# Create the prompt with image embeddings
|
| 107 |
-
prompt = f"Question: {question}\nImage Embeddings: {
|
| 108 |
|
| 109 |
# Generate answer using llama.cpp
|
| 110 |
-
output =
|
| 111 |
prompt,
|
| 112 |
-
max_tokens=
|
| 113 |
stop=["Q:", "\n"],
|
| 114 |
echo=False,
|
| 115 |
)
|
| 116 |
-
|
| 117 |
answer = output["choices"][0]["text"].strip()
|
| 118 |
|
| 119 |
return answer
|
| 120 |
|
| 121 |
except Exception as e:
|
|
|
|
| 122 |
return f"An error occurred: {str(e)}"
|
| 123 |
-
|
| 124 |
# 4. Gradio Interface
|
| 125 |
iface = gr.Interface(
|
| 126 |
fn=predict,
|
|
|
|
| 9 |
from torchvision import transforms
|
| 10 |
#from llama_cpp import Llama
|
| 11 |
from peft import PeftModel
|
| 12 |
+
import traceback
|
| 13 |
|
| 14 |
# 1. Model Definitions (Same as in training script)
|
| 15 |
class SigLIPImageEncoder(torch.nn.Module):
|
|
|
|
| 87 |
# 3. Inference Function
|
| 88 |
|
| 89 |
@spaces.GPU
|
| 90 |
+
# 3. Inference Function
|
| 91 |
def predict(image, question):
|
| 92 |
"""
|
| 93 |
Takes an image and a question as input and returns an answer.
|
|
|
|
| 103 |
with torch.no_grad():
|
| 104 |
image_embeddings = image_encoder(image)
|
| 105 |
# Flatten the image embeddings for simplicity
|
| 106 |
+
image_embeddings_list = image_embeddings.flatten().tolist() # Convert to list of floats
|
| 107 |
+
image_embeddings_str = ' '.join(map(str, image_embeddings_list)) # Convert to space-separated string
|
| 108 |
|
| 109 |
# Create the prompt with image embeddings
|
| 110 |
+
prompt = f"Question: {question}\nImage Embeddings: {image_embeddings_str}\nAnswer:"
|
| 111 |
|
| 112 |
# Generate answer using llama.cpp
|
| 113 |
+
output = llm(
|
| 114 |
prompt,
|
| 115 |
+
max_tokens=200,
|
| 116 |
stop=["Q:", "\n"],
|
| 117 |
echo=False,
|
| 118 |
)
|
|
|
|
| 119 |
answer = output["choices"][0]["text"].strip()
|
| 120 |
|
| 121 |
return answer
|
| 122 |
|
| 123 |
except Exception as e:
|
| 124 |
+
traceback.print_exc()
|
| 125 |
return f"An error occurred: {str(e)}"
|
| 126 |
+
|
| 127 |
# 4. Gradio Interface
|
| 128 |
iface = gr.Interface(
|
| 129 |
fn=predict,
|