From 2a29afd4c6bcfa00321018081a9f13525c7a3f85 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 26 Sep 2024 08:59:42 +0200 Subject: [PATCH] vulkan : argsort barriers must be under uniform control flow (ggml/951) a return before a barrier (that happens only in some threads in a workgroup) leads to UB. While the old code actually works on some devices, it fails on some others (i.e. "smaller" GPUs). BTW, I think it would be better to set specialization constants when the graph is built, in that way the local workgroup could be sized appropriately. But it would take a lot of work. Signed-off-by: Salvatore Mesoraca --- ggml/src/vulkan-shaders/argsort.comp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/ggml/src/vulkan-shaders/argsort.comp b/ggml/src/vulkan-shaders/argsort.comp index e55414b..d4fa45b 100644 --- a/ggml/src/vulkan-shaders/argsort.comp +++ b/ggml/src/vulkan-shaders/argsort.comp @@ -29,20 +29,18 @@ void main() { const int col = int(gl_LocalInvocationID.x); const uint row = gl_WorkGroupID.y; - if (col >= p.ncols_pad) { - return; - } - const uint row_offset = row * p.ncols; // initialize indices - dst_row[col] = col; + if (col < p.ncols_pad) { + dst_row[col] = col; + } barrier(); for (uint k = 2; k <= p.ncols_pad; k *= 2) { for (uint j = k / 2; j > 0; j /= 2) { const uint ixj = col ^ j; - if (ixj > col) { + if (col < p.ncols_pad && ixj > col) { if ((col & k) == 0) { if (dst_row[col] >= p.ncols || (dst_row[ixj] < p.ncols && (p.order == ASC ?