Fred808 commited on
Commit
a566fbc
·
verified ·
1 Parent(s): 4730a85

Upload 7 files

Browse files
app.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ from PIL import Image
4
+ import requests
5
+ import time
6
+ import multiprocessing
7
+ import json
8
+ import sys
9
+ from typing import Tuple, List, Dict, Any
10
+
11
+ # Add Florence model path to Python path
12
+ florence_path = os.path.join(os.path.dirname(__file__), 'florence-2-large')
13
+ sys.path.append(florence_path)
14
+
15
+ try:
16
+ from florence_2_large.processing_florence2 import Florence2Processor
17
+ from florence_2_large.configuration_florence2 import Florence2Config
18
+ import torch
19
+ import torch.nn.functional as F
20
+
21
+ # Initialize processor with local files
22
+ config = Florence2Config.from_json_file(os.path.join(florence_path, 'config.json'))
23
+ processor = Florence2Processor(config)
24
+ HAVE_PROCESSOR = True
25
+ print("Successfully loaded Florence processor")
26
+ except Exception as e:
27
+ print(f"Warning: Could not load Florence processor: {e}")
28
+ print("Using basic output interpretation")
29
+ HAVE_PROCESSOR = False
30
+
31
+ # Task-specific configuration
32
+ TASK = "<MORE_DETAILED_CAPTION>" # For detailed image captioning
33
+
34
+ # Model configuration
35
+ MODEL_ID = "microsoft/florence-2-base"
36
+
37
+
38
+
39
+ def load_and_preprocess_image(image_path):
40
+ # Load image and resize to 32x32
41
+ img = Image.open(image_path)
42
+ img = img.resize((32, 32))
43
+
44
+ # Convert to numpy array and normalize to [0,1]
45
+ img_array = np.array(img).astype(np.float32) / 255.0
46
+
47
+ # Ensure array has shape (32, 32, 3)
48
+ if len(img_array.shape) == 2:
49
+ img_array = np.stack([img_array] * 3, axis=-1)
50
+
51
+ # Add batch dimension
52
+ img_array = img_array[np.newaxis, ...]
53
+
54
+ # Convert tensor to list of single-element lists for API
55
+ tensor_data = [[float(x)] for x in img_array.flatten()]
56
+
57
+ return tensor_data
58
+
59
+ def run_inference(args: Tuple[str, str, int]) -> dict:
60
+ """Run inference on a specific server with given chunk ID."""
61
+ server_url, image_path, chunk_id = args
62
+
63
+ try:
64
+ print(f"\nProcessing server {server_url} with chunk {chunk_id}...")
65
+
66
+ # Load and preprocess image
67
+ input_tensor = load_and_preprocess_image(image_path)
68
+
69
+ # Prepare request data
70
+ data = {
71
+ "inputs": input_tensor
72
+ }
73
+
74
+ # Send request with timeout
75
+ print(f"Sending request to {server_url}/compute/{chunk_id}")
76
+ start_time = time.time()
77
+ response = requests.post(
78
+ f"{server_url}/compute/{chunk_id}",
79
+ json=data,
80
+ headers={"Content-Type": "application/json"},
81
+ timeout=10
82
+ )
83
+
84
+ inference_time = time.time() - start_time
85
+
86
+ if response.status_code == 200:
87
+ result = response.json()
88
+ return {
89
+ "server": server_url,
90
+ "chunk_id": chunk_id,
91
+ "success": True,
92
+ "time": inference_time,
93
+ "result": result
94
+ }
95
+ else:
96
+ error_msg = f"HTTP {response.status_code}"
97
+ if hasattr(response, 'text'):
98
+ error_msg += f": {response.text}"
99
+ return {
100
+ "server": server_url,
101
+ "chunk_id": chunk_id,
102
+ "success": False,
103
+ "error": error_msg,
104
+ "time": inference_time
105
+ }
106
+
107
+ except Exception as e:
108
+ return {
109
+ "server": server_url,
110
+ "chunk_id": chunk_id,
111
+ "success": False,
112
+ "error": str(e),
113
+ "time": time.time() - start_time if 'start_time' in locals() else None
114
+ }
115
+
116
+ def process_model_outputs(outputs, original_shape=(1, -1, 51289)):
117
+ """Process model outputs using Florence processor for sequence generation."""
118
+ # Convert outputs to numpy array
119
+ outputs_array = np.array([x[0] for x in outputs])
120
+
121
+ if HAVE_PROCESSOR:
122
+ try:
123
+ # Reshape logits to [batch, seq_len, vocab_size]
124
+ logits = outputs_array.reshape(original_shape)
125
+
126
+ if torch.is_tensor(logits):
127
+ # Use torch operations if available
128
+ token_ids = torch.argmax(logits, dim=-1)
129
+ else:
130
+ # Fallback to numpy
131
+ token_ids = np.argmax(logits, axis=-1)
132
+
133
+ # Decode tokens to text
134
+ text = processor.batch_decode(token_ids, skip_special_tokens=True)
135
+
136
+ # Post-process for the specific task
137
+ processed_text = processor.post_process_generation(
138
+ text[0] if isinstance(text, list) else text,
139
+ task=TASK
140
+ )
141
+
142
+ return {
143
+ 'text': processed_text,
144
+ 'tokens': token_ids.tolist() if torch.is_tensor(token_ids) else token_ids.tolist(),
145
+ 'logits_shape': logits.shape,
146
+ 'distribution': {
147
+ 'min': float(outputs_array.min()),
148
+ 'max': float(outputs_array.max()),
149
+ 'mean': float(outputs_array.mean()),
150
+ 'std': float(outputs_array.std())
151
+ }
152
+ }
153
+ except Exception as e:
154
+ print(f"Warning: Error in sequence processing: {e}")
155
+
156
+ # Fallback to basic statistics if processor not available
157
+ return {
158
+ 'overall_mean': float(outputs_array.mean()),
159
+ 'overall_std': float(outputs_array.std()),
160
+ 'shape': outputs_array.shape,
161
+ 'distribution': {
162
+ 'min': float(outputs_array.min()),
163
+ 'max': float(outputs_array.max()),
164
+ 'median': float(np.median(outputs_array))
165
+ }
166
+ }
167
+
168
+ def process_results(results):
169
+ """Process and combine results from all servers."""
170
+ # Filter successful results
171
+ successful_results = [r for r in results if r['success']]
172
+ if not successful_results:
173
+ print("\nError: No servers returned successful results")
174
+ return
175
+
176
+ # Sort successful results by chunk ID
177
+ successful_results.sort(key=lambda x: x['chunk_id'])
178
+
179
+ print(f"\nModel Output Analysis ({len(successful_results)}/{len(results)} servers succeeded):")
180
+ print("-" * 80)
181
+
182
+ # Get total sequence length from all chunks
183
+ total_outputs = []
184
+ for result in successful_results:
185
+ total_outputs.extend(result['result']['outputs'])
186
+
187
+ # Process the combined sequence
188
+ print("\nProcessing complete sequence...")
189
+ analysis = process_model_outputs(total_outputs, original_shape=(1, -1, 51289))
190
+
191
+ if 'text' in analysis:
192
+ print("\nGenerated Description:")
193
+ print("-" * 80)
194
+ print(analysis['text'])
195
+
196
+ print("\nSequence Statistics:")
197
+ print(f"- Logits shape: {analysis['logits_shape']}")
198
+ print(f"- Distribution:")
199
+ for key, value in analysis['distribution'].items():
200
+ print(f" {key}: {value:.4f}")
201
+ else:
202
+ print("\nBasic Analysis (Florence processor not available):")
203
+ print(f"- Sequence length: {len(total_outputs)}")
204
+ print(f"- Overall activation: {analysis['overall_mean']:.4f} ± {analysis['overall_std']:.4f}")
205
+ print("\nValue Distribution:")
206
+ for key, value in analysis['distribution'].items():
207
+ print(f"- {key}: {value:.4f}")
208
+
209
+ # Check server consistency
210
+ if len(successful_results) > 1:
211
+ all_outputs = [np.array([x[0] for x in r['result']['outputs']])
212
+ for r in successful_results]
213
+ differences = [np.max(np.abs(all_outputs[0] - tensor))
214
+ for tensor in all_outputs[1:]]
215
+
216
+ print("\nServer Consistency:")
217
+ if np.max(differences) < 1e-6:
218
+ print("Successful servers provided identical results")
219
+ else:
220
+ print(f"Variations detected between servers (max diff: {np.max(differences):.6f})")
221
+
222
+ # Print timing summary
223
+ successful_times = [r['time'] for r in successful_results]
224
+ print(f"\nProcessing Time Summary:")
225
+ print(f"- Average: {np.mean(successful_times):.2f}s")
226
+ print(f"- Range: {min(successful_times):.2f}s - {max(successful_times):.2f}s")
227
+
228
+ def main():
229
+ # Server configurations with their respective chunk IDs
230
+ servers = [
231
+ ("https://fred808-ilob.hf.space", 0),
232
+ ("https://fred808-tserv.hf.space", 1),
233
+ ("https://fred808-tserve2.hf.space", 2)
234
+ ]
235
+
236
+ # Image path - using the same image for all servers
237
+ image_path = "sample_task/test1.png"
238
+ print(f"\nTesting with image: {image_path}")
239
+
240
+ # Create process pool
241
+ with multiprocessing.Pool() as pool:
242
+ # Prepare arguments for each server
243
+ args = [(server_url, image_path, chunk_id) for server_url, chunk_id in servers]
244
+
245
+ # Run inference in parallel
246
+ print("\nStarting parallel inference across all servers...")
247
+ results = pool.map(run_inference, args)
248
+
249
+ # Display individual server results
250
+ print("\nServer Results:")
251
+ print("-" * 80)
252
+ for result in results:
253
+ print(f"\nServer: {result['server']}")
254
+ print(f"Chunk ID: {result['chunk_id']}")
255
+ print(f"Success: {result['success']}")
256
+ print(f"Time: {result['time']:.4f}s" if result['time'] else "Time: N/A")
257
+
258
+ if result['success']:
259
+ print(f"Output shape: {len(result['result']['outputs'])} elements")
260
+ print("First few outputs:", result['result']['outputs'][:5])
261
+ else:
262
+ print(f"Error: {result['error']}")
263
+ print("-" * 80)
264
+
265
+ # Process and display combined results
266
+ process_results(results)
267
+
268
+ if __name__ == "__main__":
269
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy
2
+ torch
3
+ aiohttp
4
+ asyncio
5
+ transformers
sample_task/test1.png ADDED
sample_task/test2.png ADDED
sample_task/test3.png ADDED
sample_task/test4.png ADDED
sample_task/test5.png ADDED