--reset

--cfg=f32,f16,bf16bf16f32,u8s8s8
--stag=ab,ba --wtag=ab,ba --dtag=ab
--runtime_dims_masks=0,2:1,0:2,2:3,1:0,3:1,1:2,3:3
--bia_dt=undef,f32
--bia_mask=1,2,3
10x30:30x20:10x20

--attr-scales=src:common:0.25*+wei:common:0.5*+dst:common:2*
--attr-post-ops=sum+add:f32+add:u8:per_dim_01+linear:0.5:1.5:2.0+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_tensor
10x30:30x20:10x20

--attr-scales=src:common:0.25*+wei:per_oc:0.5*+dst:common:2*
--attr-post-ops=relu+add:f32+add:u8:per_tensor+linear:0.5:1.5:2.0+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_dim_01
10x30:30x20:10x20

--attr-scales=
--attr-post-ops=sum:2+add:f32+add:u8:per_tensor+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_dim_01+linear:3:-1
10x30:30x1:10x1

--attr-post-ops=add:f32+add:u8:per_dim_01+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_tensor+linear:3:-1:2
10x30:30x1:10x1

# test any
--reset
--cfg=f32,f16,f16f16s8,f16f16u8,bf16bf16bf16,s8s8f32,s8s8f16
--bia_dt=undef
--stag=ab,ba,any --wtag=ab,ba,any --dtag=ab,any
1x30:30x20:1x20

# test x8x8x8
--reset
--cfg=u8s8s32,u8s8f32,s8s8s8,u8s8f16
--runtime_dims_masks=0:2,1:2
--bia_dt=undef,f32,u8
--stag=ab,ba --wtag=ab --dtag=ab
--attr-scales=src:common:0.25*+wei:common:0.5*+dst:common:2*
--attr-zero-points=src:common:1*+wei:common:-1*+dst:common:2*
--attr-post-ops=sum+relu+add:f32+add:u8:per_tensor+linear:0.5:1.5:2.0+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_dim_01
100x10:10x10:100x10

--cfg=s8s8s8,s8s8s32,s8s8u8
--runtime_dims_masks=2:1,2:3
--bia_dt=undef,u8
--stag=ba --wtag=ab,ba --dtag=ab
--attr-scales=src:common:0.25*+wei:common:0.5*+dst:common:2*
--attr-zero-points=src:common:1*+wei:common:-2*+dst:common:3*
--attr-post-ops=sum+relu+add:f32+add:u8:per_dim_01+linear:0.5:1.5:2.0+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_tensor
10x100:100x10:10x10

# 3d
--reset
--cfg=f32,f16,f16f16s8,f16f16u8,bf16bf16bf16,bf16bf16f32
--stag=abc,acb --wtag=abc,acb --dtag=abc
--runtime_dims_masks=0,4:2,0:4,4:6,2:0,6:2,2:4,6:6,1:1,5:3,1:5,5:7,3:1,7:3,3:5,7:7
--bia_dt=undef,f32
--bia_mask=4,6
--attr-scales=src:common:0.25*+wei:common:0.5*+dst:common:2*
--attr-post-ops=sum+add:f32+add:u8:per_dim_01+mul:f32:per_dim_0+add:s8:per_tensor+add:f32:per_dim_01+linear:2:-1
3x30x1:3x1x20:3x30x20

# test regressions
--batch=harness_matmul_regression_f32

--reset
--cfg=f32
--stag=ab --wtag=ab --dtag=ab
96x8:8x512:96:512

--reset
--cfg=bf16bf16bf16
--stag=ab --wtag=ab --dtag=ab
--bia_dt=bf16
2x4:4x3:2x3

--reset
--cfg=u8s8f32 --bia_dt=f32  --stag=any --wtag=any --dtag=any
--attr-post-ops=eltwise_gelu_tanh:0:0:21.5742
--attr-scales=src:common:0.25*+wei:per_oc:0.5*+dst:common:2*  --attr-scratchpad=user
24576x1024:1024x4096:24576x4096

# runtime_zero_point_wrong_values
--reset
--cfg=u8s8f32 
--attr-zero-points=wei:common:2*+src:common:1* 
1x2x4:1x4x12:1x2x12

# test all features at once: 6D tensors with two-way broadcast, bias, postops, scale
--reset
--cfg=bf16bf16bf16
--stag=abx --wtag=abx --dtag=abx
--bia_dt=f32 --bia_mask=4
--attr-scales=src:common:0.25*+wei:common:0.5*+dst:common:2*
--attr-post-ops=sum+add:f32+add:u8:per_dim_01+mul:f32:per_dim_0+add:s8:per_tensor+add:f32:per_dim_01+linear:2:-1
1x5x4x3x30x2:6x1x4x3x2x20:6x5x4x3x30x20

# Test layers of some key GPU DL Frameworks
--reset
--batch=option_set_fwks_key_gpu

# Test tf32 configuration
--reset
--batch=option_set_fwks_key_gpu_tf32
