oktoblast / examples /python /basic_usage.py
OktoSeek's picture
Upload 20 files
8046910 verified
"""
OktoBLAS - Basic Usage Example
==============================
This example demonstrates basic OktoBLAS operations.
Installation:
pip install oktoblas
"""
import oktoblas as ob
import numpy as np
def main():
print("=" * 60)
print("OktoBLAS Basic Usage Example")
print("=" * 60)
# Show library info
print("\n1. Library Info:")
ob.info()
# FP32 Matrix Multiplication
print("\n2. FP32 GEMM:")
A = np.random.randn(1024, 1024).astype(np.float32)
B = np.random.randn(1024, 1024).astype(np.float32)
C = ob.matmul(A, B)
print(f" A: {A.shape} @ B: {B.shape} = C: {C.shape}")
print(f" Result sample: {C[0, 0]:.4f}")
# FP16 Matrix Multiplication (Tensor Cores)
print("\n3. FP16 GEMM (Tensor Cores):")
A16 = np.random.randn(1024, 1024).astype(np.float16)
B16 = np.random.randn(1024, 1024).astype(np.float16)
C16 = ob.matmul_fp16(A16, B16)
print(f" A: {A16.shape} @ B: {B16.shape} = C: {C16.shape}")
print(f" Result sample: {C16[0, 0]:.4f}")
# Fused Attention
print("\n4. Fused Attention:")
batch, seq_len, head_dim = 4, 256, 64
Q = np.random.randn(batch, seq_len, head_dim).astype(np.float32)
K = np.random.randn(batch, seq_len, head_dim).astype(np.float32)
V = np.random.randn(batch, seq_len, head_dim).astype(np.float32)
output = ob.attention(Q, K, V)
print(f" Q: {Q.shape}, K: {K.shape}, V: {V.shape}")
print(f" Output: {output.shape}")
print(f" Result sample: {output[0, 0, 0]:.4f}")
# Check CUDA availability
print("\n5. CUDA Status:")
print(f" CUDA Available: {ob.is_cuda_available()}")
# Benchmark
print("\n6. Benchmark (FP16 GEMM 2048x2048):")
try:
results = ob.benchmark("gemm_fp16", size=2048, iterations=50)
print(f" OktoBLAS: {results['oktoblas_tflops']:.1f} TFLOPS")
if 'pytorch_tflops' in results:
print(f" PyTorch: {results['pytorch_tflops']:.1f} TFLOPS")
print(f" Ratio: {results['ratio']:.1f}%")
except Exception as e:
print(f" Benchmark skipped: {e}")
print("\n" + "=" * 60)
print("Done!")
print("=" * 60)
if __name__ == "__main__":
main()