Spaces:

Fred808
/

appt

Paused

App Files Files Community

Fred808 commited on Oct 15, 2025

Commit

c32cd59

verified ·

1 Parent(s): a566fbc

Update app.py

Browse files

Files changed (1) hide show

app.py +268 -268

app.py CHANGED Viewed

@@ -1,269 +1,269 @@
-import os
-import numpy as np
-from PIL import Image
-import requests
-import time
-import multiprocessing
-import json
-import sys
-from typing import Tuple, List, Dict, Any
-# Add Florence model path to Python path
-florence_path = os.path.join(os.path.dirname(__file__), 'florence-2-large')
-sys.path.append(florence_path)
-try:
-    from florence_2_large.processing_florence2 import Florence2Processor
-    from florence_2_large.configuration_florence2 import Florence2Config
-    import torch
-    import torch.nn.functional as F
-    # Initialize processor with local files
-    config = Florence2Config.from_json_file(os.path.join(florence_path, 'config.json'))
-    processor = Florence2Processor(config)
-    HAVE_PROCESSOR = True
-    print("Successfully loaded Florence processor")
-except Exception as e:
-    print(f"Warning: Could not load Florence processor: {e}")
-    print("Using basic output interpretation")
-    HAVE_PROCESSOR = False
-# Task-specific configuration
-TASK = "<MORE_DETAILED_CAPTION>"  # For detailed image captioning
-# Model configuration
-MODEL_ID = "microsoft/florence-2-base"
-def load_and_preprocess_image(image_path):
-    # Load image and resize to 32x32
-    img = Image.open(image_path)
-    img = img.resize((32, 32))
-    # Convert to numpy array and normalize to [0,1]
-    img_array = np.array(img).astype(np.float32) / 255.0
-    # Ensure array has shape (32, 32, 3)
-    if len(img_array.shape) == 2:
-        img_array = np.stack([img_array] * 3, axis=-1)
-    # Add batch dimension
-    img_array = img_array[np.newaxis, ...]
-    # Convert tensor to list of single-element lists for API
-    tensor_data = [[float(x)] for x in img_array.flatten()]
-    return tensor_data
-def run_inference(args: Tuple[str, str, int]) -> dict:
-    """Run inference on a specific server with given chunk ID."""
-    server_url, image_path, chunk_id = args
-    try:
-        print(f"\nProcessing server {server_url} with chunk {chunk_id}...")
-        # Load and preprocess image
-        input_tensor = load_and_preprocess_image(image_path)
-        # Prepare request data
-        data = {
-            "inputs": input_tensor
-        }
-        # Send request with timeout
-        print(f"Sending request to {server_url}/compute/{chunk_id}")
-        start_time = time.time()
-        response = requests.post(
-            f"{server_url}/compute/{chunk_id}",
-            json=data,
-            headers={"Content-Type": "application/json"},
-            timeout=10
-        )
-        inference_time = time.time() - start_time
-        if response.status_code == 200:
-            result = response.json()
-            return {
-                "server": server_url,
-                "chunk_id": chunk_id,
-                "success": True,
-                "time": inference_time,
-                "result": result
-            }
-        else:
-            error_msg = f"HTTP {response.status_code}"
-            if hasattr(response, 'text'):
-                error_msg += f": {response.text}"
-            return {
-                "server": server_url,
-                "chunk_id": chunk_id,
-                "success": False,
-                "error": error_msg,
-                "time": inference_time
-            }
-    except Exception as e:
-        return {
-            "server": server_url,
-            "chunk_id": chunk_id,
-            "success": False,
-            "error": str(e),
-            "time": time.time() - start_time if 'start_time' in locals() else None
-        }
-def process_model_outputs(outputs, original_shape=(1, -1, 51289)):
-    """Process model outputs using Florence processor for sequence generation."""
-    # Convert outputs to numpy array
-    outputs_array = np.array([x[0] for x in outputs])
-    if HAVE_PROCESSOR:
-        try:
-            # Reshape logits to [batch, seq_len, vocab_size]
-            logits = outputs_array.reshape(original_shape)
-            if torch.is_tensor(logits):
-                # Use torch operations if available
-                token_ids = torch.argmax(logits, dim=-1)
-            else:
-                # Fallback to numpy
-                token_ids = np.argmax(logits, axis=-1)
-            # Decode tokens to text
-            text = processor.batch_decode(token_ids, skip_special_tokens=True)
-            # Post-process for the specific task
-            processed_text = processor.post_process_generation(
-                text[0] if isinstance(text, list) else text,
-                task=TASK
-            )
-            return {
-                'text': processed_text,
-                'tokens': token_ids.tolist() if torch.is_tensor(token_ids) else token_ids.tolist(),
-                'logits_shape': logits.shape,
-                'distribution': {
-                    'min': float(outputs_array.min()),
-                    'max': float(outputs_array.max()),
-                    'mean': float(outputs_array.mean()),
-                    'std': float(outputs_array.std())
-                }
-            }
-        except Exception as e:
-            print(f"Warning: Error in sequence processing: {e}")
-    # Fallback to basic statistics if processor not available
-    return {
-        'overall_mean': float(outputs_array.mean()),
-        'overall_std': float(outputs_array.std()),
-        'shape': outputs_array.shape,
-        'distribution': {
-            'min': float(outputs_array.min()),
-            'max': float(outputs_array.max()),
-            'median': float(np.median(outputs_array))
-        }
-    }
-def process_results(results):
-    """Process and combine results from all servers."""
-    # Filter successful results
-    successful_results = [r for r in results if r['success']]
-    if not successful_results:
-        print("\nError: No servers returned successful results")
-        return
-    # Sort successful results by chunk ID
-    successful_results.sort(key=lambda x: x['chunk_id'])
-    print(f"\nModel Output Analysis ({len(successful_results)}/{len(results)} servers succeeded):")
-    print("-" * 80)
-    # Get total sequence length from all chunks
-    total_outputs = []
-    for result in successful_results:
-        total_outputs.extend(result['result']['outputs'])
-    # Process the combined sequence
-    print("\nProcessing complete sequence...")
-    analysis = process_model_outputs(total_outputs, original_shape=(1, -1, 51289))
-    if 'text' in analysis:
-        print("\nGenerated Description:")
-        print("-" * 80)
-        print(analysis['text'])
-        print("\nSequence Statistics:")
-        print(f"- Logits shape: {analysis['logits_shape']}")
-        print(f"- Distribution:")
-        for key, value in analysis['distribution'].items():
-            print(f"  {key}: {value:.4f}")
-    else:
-        print("\nBasic Analysis (Florence processor not available):")
-        print(f"- Sequence length: {len(total_outputs)}")
-        print(f"- Overall activation: {analysis['overall_mean']:.4f} ± {analysis['overall_std']:.4f}")
-        print("\nValue Distribution:")
-        for key, value in analysis['distribution'].items():
-            print(f"- {key}: {value:.4f}")
-    # Check server consistency
-    if len(successful_results) > 1:
-        all_outputs = [np.array([x[0] for x in r['result']['outputs']])
-                      for r in successful_results]
-        differences = [np.max(np.abs(all_outputs[0] - tensor))
-                      for tensor in all_outputs[1:]]
-        print("\nServer Consistency:")
-        if np.max(differences) < 1e-6:
-            print("Successful servers provided identical results")
-        else:
-            print(f"Variations detected between servers (max diff: {np.max(differences):.6f})")
-    # Print timing summary
-    successful_times = [r['time'] for r in successful_results]
-    print(f"\nProcessing Time Summary:")
-    print(f"- Average: {np.mean(successful_times):.2f}s")
-    print(f"- Range: {min(successful_times):.2f}s - {max(successful_times):.2f}s")
-def main():
-    # Server configurations with their respective chunk IDs
-    servers = [
-        ("https://fred808-ilob.hf.space", 0),
-        ("https://fred808-tserv.hf.space", 1),
-        ("https://fred808-tserve2.hf.space", 2)
-    ]
-    # Image path - using the same image for all servers
-    image_path = "sample_task/test1.png"
-    print(f"\nTesting with image: {image_path}")
-    # Create process pool
-    with multiprocessing.Pool() as pool:
-        # Prepare arguments for each server
-        args = [(server_url, image_path, chunk_id) for server_url, chunk_id in servers]
-        # Run inference in parallel
-        print("\nStarting parallel inference across all servers...")
-        results = pool.map(run_inference, args)
-        # Display individual server results
-        print("\nServer Results:")
-        print("-" * 80)
-        for result in results:
-            print(f"\nServer: {result['server']}")
-            print(f"Chunk ID: {result['chunk_id']}")
-            print(f"Success: {result['success']}")
-            print(f"Time: {result['time']:.4f}s" if result['time'] else "Time: N/A")
-            if result['success']:
-                print(f"Output shape: {len(result['result']['outputs'])} elements")
-                print("First few outputs:", result['result']['outputs'][:5])
-            else:
-                print(f"Error: {result['error']}")
-            print("-" * 80)
-        # Process and display combined results
-        process_results(results)
-if __name__ == "__main__":
     main()

+import os
+import numpy as np
+from PIL import Image
+import requests
+import time
+import multiprocessing
+import json
+import sys
+from typing import Tuple, List, Dict, Any
+# Add Florence model path to Python path
+florence_path = os.path.join(os.path.dirname(__file__), 'florence-2-large')
+sys.path.append(florence_path)
+try:
+    from processing_florence2 import Florence2Processor
+    from configuration_florence2 import Florence2Config
+    import torch
+    import torch.nn.functional as F
+    # Initialize processor with local files
+    config = Florence2Config.from_json_file(os.path.join(florence_path, 'config.json'))
+    processor = Florence2Processor(config)
+    HAVE_PROCESSOR = True
+    print("Successfully loaded Florence processor")
+except Exception as e:
+    print(f"Warning: Could not load Florence processor: {e}")
+    print("Using basic output interpretation")
+    HAVE_PROCESSOR = False
+# Task-specific configuration
+TASK = "<MORE_DETAILED_CAPTION>"  # For detailed image captioning
+# Model configuration
+MODEL_ID = "microsoft/florence-2-base"
+def load_and_preprocess_image(image_path):
+    # Load image and resize to 32x32
+    img = Image.open(image_path)
+    img = img.resize((32, 32))
+    # Convert to numpy array and normalize to [0,1]
+    img_array = np.array(img).astype(np.float32) / 255.0
+    # Ensure array has shape (32, 32, 3)
+    if len(img_array.shape) == 2:
+        img_array = np.stack([img_array] * 3, axis=-1)
+    # Add batch dimension
+    img_array = img_array[np.newaxis, ...]
+    # Convert tensor to list of single-element lists for API
+    tensor_data = [[float(x)] for x in img_array.flatten()]
+    return tensor_data
+def run_inference(args: Tuple[str, str, int]) -> dict:
+    """Run inference on a specific server with given chunk ID."""
+    server_url, image_path, chunk_id = args
+    try:
+        print(f"\nProcessing server {server_url} with chunk {chunk_id}...")
+        # Load and preprocess image
+        input_tensor = load_and_preprocess_image(image_path)
+        # Prepare request data
+        data = {
+            "inputs": input_tensor
+        }
+        # Send request with timeout
+        print(f"Sending request to {server_url}/compute/{chunk_id}")
+        start_time = time.time()
+        response = requests.post(
+            f"{server_url}/compute/{chunk_id}",
+            json=data,
+            headers={"Content-Type": "application/json"},
+            timeout=10
+        )
+        inference_time = time.time() - start_time
+        if response.status_code == 200:
+            result = response.json()
+            return {
+                "server": server_url,
+                "chunk_id": chunk_id,
+                "success": True,
+                "time": inference_time,
+                "result": result
+            }
+        else:
+            error_msg = f"HTTP {response.status_code}"
+            if hasattr(response, 'text'):
+                error_msg += f": {response.text}"
+            return {
+                "server": server_url,
+                "chunk_id": chunk_id,
+                "success": False,
+                "error": error_msg,
+                "time": inference_time
+            }
+    except Exception as e:
+        return {
+            "server": server_url,
+            "chunk_id": chunk_id,
+            "success": False,
+            "error": str(e),
+            "time": time.time() - start_time if 'start_time' in locals() else None
+        }
+def process_model_outputs(outputs, original_shape=(1, -1, 51289)):
+    """Process model outputs using Florence processor for sequence generation."""
+    # Convert outputs to numpy array
+    outputs_array = np.array([x[0] for x in outputs])
+    if HAVE_PROCESSOR:
+        try:
+            # Reshape logits to [batch, seq_len, vocab_size]
+            logits = outputs_array.reshape(original_shape)
+            if torch.is_tensor(logits):
+                # Use torch operations if available
+                token_ids = torch.argmax(logits, dim=-1)
+            else:
+                # Fallback to numpy
+                token_ids = np.argmax(logits, axis=-1)
+            # Decode tokens to text
+            text = processor.batch_decode(token_ids, skip_special_tokens=True)
+            # Post-process for the specific task
+            processed_text = processor.post_process_generation(
+                text[0] if isinstance(text, list) else text,
+                task=TASK
+            )
+            return {
+                'text': processed_text,
+                'tokens': token_ids.tolist() if torch.is_tensor(token_ids) else token_ids.tolist(),
+                'logits_shape': logits.shape,
+                'distribution': {
+                    'min': float(outputs_array.min()),
+                    'max': float(outputs_array.max()),
+                    'mean': float(outputs_array.mean()),
+                    'std': float(outputs_array.std())
+                }
+            }
+        except Exception as e:
+            print(f"Warning: Error in sequence processing: {e}")
+    # Fallback to basic statistics if processor not available
+    return {
+        'overall_mean': float(outputs_array.mean()),
+        'overall_std': float(outputs_array.std()),
+        'shape': outputs_array.shape,
+        'distribution': {
+            'min': float(outputs_array.min()),
+            'max': float(outputs_array.max()),
+            'median': float(np.median(outputs_array))
+        }
+    }
+def process_results(results):
+    """Process and combine results from all servers."""
+    # Filter successful results
+    successful_results = [r for r in results if r['success']]
+    if not successful_results:
+        print("\nError: No servers returned successful results")
+        return
+    # Sort successful results by chunk ID
+    successful_results.sort(key=lambda x: x['chunk_id'])
+    print(f"\nModel Output Analysis ({len(successful_results)}/{len(results)} servers succeeded):")
+    print("-" * 80)
+    # Get total sequence length from all chunks
+    total_outputs = []
+    for result in successful_results:
+        total_outputs.extend(result['result']['outputs'])
+    # Process the combined sequence
+    print("\nProcessing complete sequence...")
+    analysis = process_model_outputs(total_outputs, original_shape=(1, -1, 51289))
+    if 'text' in analysis:
+        print("\nGenerated Description:")
+        print("-" * 80)
+        print(analysis['text'])
+        print("\nSequence Statistics:")
+        print(f"- Logits shape: {analysis['logits_shape']}")
+        print(f"- Distribution:")
+        for key, value in analysis['distribution'].items():
+            print(f"  {key}: {value:.4f}")
+    else:
+        print("\nBasic Analysis (Florence processor not available):")
+        print(f"- Sequence length: {len(total_outputs)}")
+        print(f"- Overall activation: {analysis['overall_mean']:.4f} ± {analysis['overall_std']:.4f}")
+        print("\nValue Distribution:")
+        for key, value in analysis['distribution'].items():
+            print(f"- {key}: {value:.4f}")
+    # Check server consistency
+    if len(successful_results) > 1:
+        all_outputs = [np.array([x[0] for x in r['result']['outputs']])
+                      for r in successful_results]
+        differences = [np.max(np.abs(all_outputs[0] - tensor))
+                      for tensor in all_outputs[1:]]
+        print("\nServer Consistency:")
+        if np.max(differences) < 1e-6:
+            print("Successful servers provided identical results")
+        else:
+            print(f"Variations detected between servers (max diff: {np.max(differences):.6f})")
+    # Print timing summary
+    successful_times = [r['time'] for r in successful_results]
+    print(f"\nProcessing Time Summary:")
+    print(f"- Average: {np.mean(successful_times):.2f}s")
+    print(f"- Range: {min(successful_times):.2f}s - {max(successful_times):.2f}s")
+def main():
+    # Server configurations with their respective chunk IDs
+    servers = [
+        ("https://fred808-ilob.hf.space", 0),
+        ("https://fred808-tserv.hf.space", 1),
+        ("https://fred808-tserve2.hf.space", 2)
+    ]
+    # Image path - using the same image for all servers
+    image_path = "sample_task/test1.png"
+    print(f"\nTesting with image: {image_path}")
+    # Create process pool
+    with multiprocessing.Pool() as pool:
+        # Prepare arguments for each server
+        args = [(server_url, image_path, chunk_id) for server_url, chunk_id in servers]
+        # Run inference in parallel
+        print("\nStarting parallel inference across all servers...")
+        results = pool.map(run_inference, args)
+        # Display individual server results
+        print("\nServer Results:")
+        print("-" * 80)
+        for result in results:
+            print(f"\nServer: {result['server']}")
+            print(f"Chunk ID: {result['chunk_id']}")
+            print(f"Success: {result['success']}")
+            print(f"Time: {result['time']:.4f}s" if result['time'] else "Time: N/A")
+            if result['success']:
+                print(f"Output shape: {len(result['result']['outputs'])} elements")
+                print("First few outputs:", result['result']['outputs'][:5])
+            else:
+                print(f"Error: {result['error']}")
+            print("-" * 80)
+        # Process and display combined results
+        process_results(results)
+if __name__ == "__main__":
     main()