Monimoy commited on
Commit
d3471f5
·
verified ·
1 Parent(s): 4e42ff8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -9,6 +9,7 @@ import timm
9
  from torchvision import transforms
10
  #from llama_cpp import Llama
11
  from peft import PeftModel
 
12
 
13
  # 1. Model Definitions (Same as in training script)
14
  class SigLIPImageEncoder(torch.nn.Module):
@@ -86,6 +87,7 @@ print("phi-3 model loaded sucessfully")
86
  # 3. Inference Function
87
 
88
  @spaces.GPU
 
89
  def predict(image, question):
90
  """
91
  Takes an image and a question as input and returns an answer.
@@ -101,26 +103,27 @@ def predict(image, question):
101
  with torch.no_grad():
102
  image_embeddings = image_encoder(image)
103
  # Flatten the image embeddings for simplicity
104
- image_embeddings = image_embeddings.flatten().tolist()
 
105
 
106
  # Create the prompt with image embeddings
107
- prompt = f"Question: {question}\nImage Embeddings: {image_embeddings}\nAnswer:"
108
 
109
  # Generate answer using llama.cpp
110
- output = model(
111
  prompt,
112
- max_tokens=128,
113
  stop=["Q:", "\n"],
114
  echo=False,
115
  )
116
-
117
  answer = output["choices"][0]["text"].strip()
118
 
119
  return answer
120
 
121
  except Exception as e:
 
122
  return f"An error occurred: {str(e)}"
123
-
124
  # 4. Gradio Interface
125
  iface = gr.Interface(
126
  fn=predict,
 
9
  from torchvision import transforms
10
  #from llama_cpp import Llama
11
  from peft import PeftModel
12
+ import traceback
13
 
14
  # 1. Model Definitions (Same as in training script)
15
  class SigLIPImageEncoder(torch.nn.Module):
 
87
  # 3. Inference Function
88
 
89
  @spaces.GPU
90
+ # 3. Inference Function
91
  def predict(image, question):
92
  """
93
  Takes an image and a question as input and returns an answer.
 
103
  with torch.no_grad():
104
  image_embeddings = image_encoder(image)
105
  # Flatten the image embeddings for simplicity
106
+ image_embeddings_list = image_embeddings.flatten().tolist() # Convert to list of floats
107
+ image_embeddings_str = ' '.join(map(str, image_embeddings_list)) # Convert to space-separated string
108
 
109
  # Create the prompt with image embeddings
110
+ prompt = f"Question: {question}\nImage Embeddings: {image_embeddings_str}\nAnswer:"
111
 
112
  # Generate answer using llama.cpp
113
+ output = llm(
114
  prompt,
115
+ max_tokens=200,
116
  stop=["Q:", "\n"],
117
  echo=False,
118
  )
 
119
  answer = output["choices"][0]["text"].strip()
120
 
121
  return answer
122
 
123
  except Exception as e:
124
+ traceback.print_exc()
125
  return f"An error occurred: {str(e)}"
126
+
127
  # 4. Gradio Interface
128
  iface = gr.Interface(
129
  fn=predict,