prithivMLmods commited on
Commit
79a20f7
·
verified ·
1 Parent(s): c715222

update app

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -3,7 +3,6 @@ import os
3
  import shutil
4
  import time
5
  import uuid
6
- import spaces #[zeroGPU Spaces]
7
  import unicodedata
8
  from io import BytesIO
9
  from threading import Timer
@@ -12,6 +11,7 @@ from datetime import datetime
12
 
13
  import gradio as gr
14
  import torch
 
15
  from dotenv import load_dotenv
16
  from e2b_desktop import Sandbox
17
  from gradio_modal import Modal
@@ -39,7 +39,7 @@ load_dotenv(override=True)
39
  # -----------------------------------------------------------------------------
40
 
41
  E2B_API_KEY = os.getenv("E2B_API")
42
- HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API")
43
  if HF_TOKEN:
44
  login(token=HF_TOKEN)
45
 
@@ -58,7 +58,7 @@ if not os.path.exists(TMP_DIR):
58
 
59
  print("Loading Fara Model... This may take a moment.")
60
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
61
- MODEL_ID_F = "microsoft/Fara-7B" # Ensure this ID is accessible or point to local path
62
 
63
  try:
64
  processor_f = AutoProcessor.from_pretrained(MODEL_ID_F, trust_remote_code=True)
@@ -66,19 +66,27 @@ try:
66
  MODEL_ID_F,
67
  trust_remote_code=True,
68
  torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
69
- device_map="auto" if DEVICE == "cuda" else None,
70
  )
71
- if DEVICE == "cpu":
72
- model_f.to(DEVICE)
73
-
74
- model_f.eval()
75
  print(f"Fara Model loaded successfully on {DEVICE}")
76
  except Exception as e:
77
  print(f"Error loading Fara Model: {e}")
78
- print("Please ensure you have access to the model and enough GPU memory.")
79
- # Fallback to prevent crash during import, though app won't work without model
80
- model_f = None
81
- processor_f = None
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  class FaraLocalModel(Model):
84
  """
@@ -98,12 +106,9 @@ class FaraLocalModel(Model):
98
  if self.model is None:
99
  raise ValueError("Fara Model is not loaded.")
100
 
101
- # Convert SmolAgents messages to Qwen/Transformers format
102
- # SmolAgents uses a specific dict structure for content.
103
- # We need to normalize it for process_vision_info / apply_chat_template
104
-
105
  formatted_messages = []
106
 
 
107
  for msg in messages:
108
  role = msg["role"]
109
  content = msg["content"]
@@ -150,7 +155,7 @@ class FaraLocalModel(Model):
150
  **inputs,
151
  max_new_tokens=kwargs.get("max_tokens", 1024),
152
  stop_strings=stop_sequences,
153
- tokenizer=self.processor.tokenizer, # Specific for stop_strings in modern transformers
154
  )
155
 
156
  # Decode
@@ -185,7 +190,7 @@ Action:
185
  click(254, 308)
186
  ```<end_code>
187
 
188
- Akways format your action ('Action:' part) as Python code blocks as shown above.
189
  </action_process>
190
 
191
  <tools>
@@ -220,7 +225,7 @@ In browser, ignore any sign-in popups while they don't interfere with the elemen
220
  </general_guidelines>
221
  """.replace("<<current_date>>", datetime.now().strftime("%A, %d-%B-%Y"))
222
 
223
- @spaces.GPU
224
  def draw_marker_on_image(image_copy, click_coordinates):
225
  x, y = click_coordinates
226
  draw = ImageDraw.Draw(image_copy)
@@ -750,6 +755,7 @@ def initialize_session(interactive_mode, browser_uuid):
750
  return update_html(interactive_mode, browser_uuid), browser_uuid
751
 
752
  class EnrichedGradioUI(GradioUI):
 
753
  def interact_with_agent(
754
  self,
755
  task_input,
@@ -836,7 +842,7 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
836
  with gr.Sidebar(position="left"):
837
  with Modal(visible=True) as modal:
838
  gr.Markdown("""### Welcome to Fara CUA Demo 🖥️
839
- This agent uses **microsoft/Fara-7B** (running locally) and **smolagents** to control a remote computer.
840
 
841
  👉 Type a task, click 'Let's go!', and watch the agent work.
842
  """)
 
3
  import shutil
4
  import time
5
  import uuid
 
6
  import unicodedata
7
  from io import BytesIO
8
  from threading import Timer
 
11
 
12
  import gradio as gr
13
  import torch
14
+ import spaces # <--- Added Spaces support
15
  from dotenv import load_dotenv
16
  from e2b_desktop import Sandbox
17
  from gradio_modal import Modal
 
39
  # -----------------------------------------------------------------------------
40
 
41
  E2B_API_KEY = os.getenv("E2B_API")
42
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
43
  if HF_TOKEN:
44
  login(token=HF_TOKEN)
45
 
 
58
 
59
  print("Loading Fara Model... This may take a moment.")
60
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
61
+ MODEL_ID_F = "microsoft/Fara-7B" # Ensure this repository exists and you have access
62
 
63
  try:
64
  processor_f = AutoProcessor.from_pretrained(MODEL_ID_F, trust_remote_code=True)
 
66
  MODEL_ID_F,
67
  trust_remote_code=True,
68
  torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
69
+ device_map="auto",
70
  )
 
 
 
 
71
  print(f"Fara Model loaded successfully on {DEVICE}")
72
  except Exception as e:
73
  print(f"Error loading Fara Model: {e}")
74
+ print("Falling back to Qwen/Qwen2.5-VL-7B-Instruct for demonstration if Fara is unavailable...")
75
+ try:
76
+ # Fallback to base Qwen-VL if Fara repo isn't public/accessible
77
+ MODEL_ID_F = "Qwen/Qwen2.5-VL-7B-Instruct"
78
+ processor_f = AutoProcessor.from_pretrained(MODEL_ID_F, trust_remote_code=True)
79
+ model_f = Qwen2_5_VLForConditionalGeneration.from_pretrained(
80
+ MODEL_ID_F,
81
+ trust_remote_code=True,
82
+ torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
83
+ device_map="auto",
84
+ )
85
+ print(f"Fallback Model ({MODEL_ID_F}) loaded successfully.")
86
+ except Exception as inner_e:
87
+ print(f"Critical error loading model: {inner_e}")
88
+ model_f = None
89
+ processor_f = None
90
 
91
  class FaraLocalModel(Model):
92
  """
 
106
  if self.model is None:
107
  raise ValueError("Fara Model is not loaded.")
108
 
 
 
 
 
109
  formatted_messages = []
110
 
111
+ # Convert SmolAgents messages to Qwen/Transformers format
112
  for msg in messages:
113
  role = msg["role"]
114
  content = msg["content"]
 
155
  **inputs,
156
  max_new_tokens=kwargs.get("max_tokens", 1024),
157
  stop_strings=stop_sequences,
158
+ tokenizer=self.processor.tokenizer,
159
  )
160
 
161
  # Decode
 
190
  click(254, 308)
191
  ```<end_code>
192
 
193
+ Always format your action ('Action:' part) as Python code blocks as shown above.
194
  </action_process>
195
 
196
  <tools>
 
225
  </general_guidelines>
226
  """.replace("<<current_date>>", datetime.now().strftime("%A, %d-%B-%Y"))
227
 
228
+
229
  def draw_marker_on_image(image_copy, click_coordinates):
230
  x, y = click_coordinates
231
  draw = ImageDraw.Draw(image_copy)
 
755
  return update_html(interactive_mode, browser_uuid), browser_uuid
756
 
757
  class EnrichedGradioUI(GradioUI):
758
+ @spaces.GPU(duration=180) # Allocate GPU for 3 minutes per interaction cycle
759
  def interact_with_agent(
760
  self,
761
  task_input,
 
842
  with gr.Sidebar(position="left"):
843
  with Modal(visible=True) as modal:
844
  gr.Markdown("""### Welcome to Fara CUA Demo 🖥️
845
+ This agent uses **microsoft/Fara-7B** (running locally via ZeroGPU) and **smolagents** to control a remote computer.
846
 
847
  👉 Type a task, click 'Let's go!', and watch the agent work.
848
  """)