GPU Acceleration Guide

Professional and Enterprise Tiers Only

Overview

Bravura provides comprehensive GPU detection and acceleration capabilities for Professional and Enterprise tier customers. This guide shows you how to implement GPU acceleration in your applications.

---

🚀 Quick Start

Detect Available GPUs


from bravura.components import GPUDetector

# Create detector
detector = GPUDetector()

# Detect GPU
gpu_info = detector.detect_gpu()

if gpu_info['available']:
    print(f"GPU Found: {gpu_info['name']}")
    print(f"Vendor: {gpu_info['vendor']}")
    print(f"Memory: {gpu_info.get('memory', 'Unknown')} MB")
else:
    print(f"No GPU detected: {gpu_info.get('reason')}")

Check GPU Capabilities


from bravura.components import check_gpu_capabilities

# Get capabilities
caps = check_gpu_capabilities()

print(f"CUDA Available: {caps['cuda_available']}")
print(f"OpenCL Available: {caps['opencl_available']}")
print(f"Metal Available: {caps['metal_available']}")
print(f"DirectX Available: {caps['directx_available']}")
print(f"Compute Capable: {caps['compute_capable']}")

---

🔧 GPU Detection API

Using GPUDetector Class

The GPUDetector class provides the primary interface for GPU detection:


from bravura.components import GPUDetector

detector = GPUDetector()

# Detect GPU (cached result)
gpu_info = detector.detect_gpu()

# Force refresh
gpu_info = detector.detect_gpu(force_refresh=True)

# Get capabilities
capabilities = detector.get_capabilities()

# Check specific capabilities
if detector.is_cuda_available():
    print("CUDA acceleration available!")

if detector.is_compute_capable():
    print("GPU can be used for computation!")

# Get GPU name for display
gpu_name = detector.get_gpu_name()  # Returns "CPU Mode" if no GPU

Using Function-Based API


from bravura.components import detect_gpus, check_gpu_capabilities

# Detect all GPUs
result = detect_gpus()

if result['detected']:
    print(f"Found {len(result['detected'])} GPU(s)")
    print(f"Primary GPU: {result['primary']['name']}")
    print(f"Detection method: {result['method']}")

    # Check capabilities
    caps = check_gpu_capabilities(result['primary'])
else:
    print(f"Detection failed: {result.get('error')}")

---

💻 Implementing GPU Acceleration

Example: GPU-Accelerated Data Processing


from bravura import ToolkitApp
from bravura.components import GPUDetector
import numpy as np

class GPUAcceleratedApp(ToolkitApp):
    def __init__(self):
        super().__init__()
        self.gpu_detector = GPUDetector()

    def on_start(self, inputs):
        """Process data with GPU if available."""
        # Detect GPU
        gpu_info = self.gpu_detector.detect_gpu()
        use_gpu = gpu_info.get('available', False)

        if use_gpu:
            self.log_info(f"Using GPU: {gpu_info['name']}")
            results = self._process_with_gpu(inputs)
        else:
            self.log_info("Using CPU processing")
            results = self._process_with_cpu(inputs)

        return results

    def _process_with_gpu(self, inputs):
        """GPU-accelerated processing."""
        # Check for GPU libraries
        try:
            import cupy as cp  # GPU-accelerated NumPy

            self.log_info("Using CuPy for GPU acceleration")

            # Convert data to GPU arrays
            data = cp.array(inputs['data'])

            # Perform GPU computation
            result = cp.fft.fft(data)
            result = cp.abs(result)

            # Convert back to CPU
            return {'result': cp.asnumpy(result)}

        except ImportError:
            self.log_warning("CuPy not installed, falling back to CPU")
            return self._process_with_cpu(inputs)

    def _process_with_cpu(self, inputs):
        """CPU fallback processing."""
        data = np.array(inputs['data'])
        result = np.fft.fft(data)
        result = np.abs(result)
        return {'result': result}

---

📚 GPU Libraries Integration

NVIDIA CUDA

For NVIDIA GPUs:


pip install cupy-cuda12x  # For CUDA 12.x
# or
pip install cupy-cuda11x  # For CUDA 11.x


import cupy as cp

# Check CUDA availability
cuda_available = cp.is_available()

if cuda_available:
    # Use CuPy like NumPy
    arr = cp.array([1, 2, 3, 4, 5])
    result = cp.sum(arr)

    # GPU memory info
    mempool = cp.get_default_memory_pool()
    print(f"Used GPU memory: {mempool.used_bytes() / 1024**2} MB")

PyTorch

Cross-platform GPU support:


pip install torch torchvision torchaudio


import torch

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Use GPU tensors
tensor = torch.randn(1000, 1000).to(device)
result = torch.matmul(tensor, tensor)

print(f"Processing on: {device}")

TensorFlow

Machine learning with GPU:


pip install tensorflow


import tensorflow as tf

# Check GPU availability
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    print(f"Found {len(gpus)} GPU(s)")

    # Use GPU acceleration
    with tf.device('/GPU:0'):
        a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
        b = tf.constant([[5.0, 6.0], [7.0, 8.0]])
        c = tf.matmul(a, b)

OpenCL (Cross-platform)

Works with NVIDIA, AMD, Intel:


pip install pyopencl


import pyopencl as cl

# Get available platforms
platforms = cl.get_platforms()

for platform in platforms:
    devices = platform.get_devices()
    for device in devices:
        print(f"Device: {device.name}")
        print(f"Type: {cl.device_type.to_string(device.type)}")

---

🎯 Best Practices

1. Always Provide CPU Fallback


def process_data(data, use_gpu=True):
    """Always have a CPU fallback."""
    if use_gpu:
        try:
            return gpu_process(data)
        except Exception as e:
            logger.warning(f"GPU processing failed: {e}")
            return cpu_process(data)
    else:
        return cpu_process(data)

2. Batch Processing for Efficiency


# Good: Process in batches
batch_size = 1000
for i in range(0, len(data), batch_size):
    batch = data[i:i+batch_size]
    result = gpu_process_batch(batch)

# Bad: Process item-by-item
for item in data:  # Too slow, overhead dominates
    result = gpu_process_single(item)

3. Memory Management


import cupy as cp

# Good: Clear GPU memory when done
def process_with_cleanup():
    data = cp.array(large_dataset)
    result = cp.fft.fft(data)

    # Clear memory
    del data
    cp.get_default_memory_pool().free_all_blocks()

    return cp.asnumpy(result)

4. Progress Reporting


class GPUProcessor(ToolkitApp):
    def process_large_dataset(self, data):
        """Process with progress updates."""
        batch_size = 1000
        total_batches = len(data) // batch_size

        results = []
        for i, batch_start in enumerate(range(0, len(data), batch_size)):
            # Process batch
            batch = data[batch_start:batch_start+batch_size]
            result = self._gpu_process_batch(batch)
            results.append(result)

            # Update progress
            progress = ((i + 1) / total_batches) * 100
            self.update_progress(
                progress,
                f"Processing batch {i+1}/{total_batches} on GPU"
            )

        return results

---

🔍 Platform-Specific Notes

Windows

NVIDIA: Best support via CUDA/CuPy
AMD: Use ROCm or OpenCL
Intel: Use OpenCL or oneAPI

Linux

NVIDIA: Excellent CUDA support
AMD: ROCm provides good support
Intel: oneAPI for integrated graphics

macOS

Apple Silicon (M1/M2/M3): Use Metal via PyTorch MPS backend
Intel Macs: Limited GPU compute support


# macOS Apple Silicon example
import torch

if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple Silicon GPU")
else:
    device = torch.device("cpu")

---

📊 Performance Comparison

Example: FFT Processing


import time
import numpy as np

def benchmark_processing(size=10000):
    """Compare CPU vs GPU performance."""
    data = np.random.randn(size)

    # CPU benchmark
    start = time.time()
    cpu_result = np.fft.fft(data)
    cpu_time = time.time() - start

    # GPU benchmark (if available)
    try:
        import cupy as cp
        gpu_data = cp.array(data)

        start = time.time()
        gpu_result = cp.fft.fft(gpu_data)
        cp.cuda.Stream.null.synchronize()  # Wait for GPU
        gpu_time = time.time() - start

        speedup = cpu_time / gpu_time
        print(f"CPU time: {cpu_time:.4f}s")
        print(f"GPU time: {gpu_time:.4f}s")
        print(f"Speedup: {speedup:.2f}x")

    except ImportError:
        print("CuPy not available, skipping GPU benchmark")

---

🐛 Troubleshooting

GPU Not Detected

Check drivers are installed:

- NVIDIA: Install CUDA Toolkit and drivers

- AMD: Install ROCm drivers

- Intel: Install compute runtime

Verify GPU visibility:

```python

from bravura.components import detect_gpus

result = detect_gpus()

print(f"Detection method: {result.get('method')}")

print(f"Error (if any): {result.get('error')}")

```

Check library installation:

```python

# Check CuPy

try:

import cupy

print(f"CuPy version: {cupy.__version__}")

except ImportError:

print("CuPy not installed")

```

Out of Memory Errors


# Reduce batch size
batch_size = 500  # Instead of 1000

# Or process in chunks
for chunk in chunks(data, chunk_size=1000):
    result = process(chunk)
    # Clear memory between chunks
    cp.get_default_memory_pool().free_all_blocks()

Performance Not Improved

Data transfer overhead: GPU works best with large datasets
Use batch processing: Small individual operations have high overhead
Keep data on GPU: Minimize CPU ↔ GPU transfers


# Bad: Too much transfer
for item in data:
    gpu_item = cp.array(item)  # Transfer
    result = process(gpu_item)
    cpu_result = cp.asnumpy(result)  # Transfer back

# Good: Batch transfer
gpu_data = cp.array(data)  # One transfer
gpu_results = batch_process(gpu_data)
cpu_results = cp.asnumpy(gpu_results)  # One transfer back

---

📖 Additional Resources

Official Documentation

CuPy: https://docs.cupy.dev/
PyTorch: https://pytorch.org/docs/
TensorFlow: https://www.tensorflow.org/guide/gpu
OpenCL: https://www.khronos.org/opencl/

Bravura Documentation

Component Reference: API_REFERENCE.md
Best Practices: BEST_PRACTICES.md
Examples: See Professional/Enterprise demo folders

---

💡 Example: Complete GPU Application


#!/usr/bin/env python3
"""
GPU-Accelerated Data Processor
Professional Edition Example
"""

from bravura import Toolkit, ToolkitApp
from bravura.components import GPUDetector
import numpy as np

class GPUDataProcessor(ToolkitApp):
    def __init__(self):
        super().__init__()
        self.detector = GPUDetector()

    def on_start(self, inputs):
        """Process data with GPU acceleration."""
        # Detect GPU
        gpu_info = self.detector.detect_gpu()
        self.log_info(f"GPU: {gpu_info.get('name', 'Not detected')}")

        # Generate sample data
        data_size = inputs.get('data_size', 100000)
        self.update_progress(10, "Generating dataset...")
        data = np.random.randn(data_size)

        # Process
        if gpu_info.get('available'):
            results = self._process_gpu(data)
        else:
            results = self._process_cpu(data)

        self.update_progress(100, "Complete!")
        return results

    def _process_gpu(self, data):
        """GPU processing path."""
        try:
            import cupy as cp

            self.update_progress(30, "Transferring to GPU...")
            gpu_data = cp.array(data)

            self.update_progress(50, "Processing on GPU...")
            result = cp.fft.fft(gpu_data)
            result = cp.abs(result)

            self.update_progress(80, "Transferring from GPU...")
            cpu_result = cp.asnumpy(result)

            return {'result': cpu_result, 'mode': 'GPU'}

        except Exception as e:
            self.log_error(f"GPU failed: {e}")
            return self._process_cpu(data)

    def _process_cpu(self, data):
        """CPU fallback."""
        self.update_progress(50, "Processing on CPU...")
        result = np.fft.fft(data)
        result = np.abs(result)
        return {'result': result, 'mode': 'CPU'}

# Run application
if __name__ == "__main__":
    toolkit = Toolkit(app_name="GPU Processor", use_gpu=True)
    root = toolkit.create_main_window(title="GPU Data Processor")

    processor = GPUDataProcessor()
    # Run your processing logic here

    toolkit.run()

---

Bravura - Professional GUI Framework

GPU Detection and Acceleration requires Professional or Enterprise tier license.