constant int max_size = 256; __kernel void gauss( global float* restrict matrix, int height, int width ) { if ((height > max_size) || (width > max_size)) { return; } size_t pivot_row = -1; size_t pivot_column = -1; bool is_row_relevant[max_size]; for (size_t row = 0; row < height; row++) { is_row_relevant[row] = true; } bool completed = false; while (!completed) { pivot_row = -1; pivot_column = -1; // Find the left-most, non-zero cell in every row. // These cells are candidates for pivots. size_t first_non_zero[max_size]; for (size_t row = 0; row < height; row++) { float row_cache[max_size]; for (size_t column = 0; column < width; column++) { row_cache[column] = matrix[row*width + column]; } first_non_zero[row] = -1; for (size_t column = width; column > 0; column--) { if (row_cache[column-1] != 0) { first_non_zero[row] = column-1; } } } // Find the top-most pivot candidate in a relevant row. // This is going to be our new pivot. for (size_t row = 0; row < height; row++) { if (is_row_relevant[row] && first_non_zero[row] != -1) { pivot_row = row; pivot_column = first_non_zero[row]; is_row_relevant[row] = false; break; } } // If no pivot was found, we're done. Otherwise, we isolate the pivot. if(pivot_row == -1 || pivot_column == -1) { completed = true; } else { float pivot_row_cache[max_size]; for (size_t column = 0; column < width; column++) { pivot_row_cache[column] = matrix[pivot_row * width + column]; } for (size_t row = 0; row < height; row++) { if (row != pivot_row) { float row_cache[max_size]; for (size_t column = 0; column < width; column++) { row_cache[column] = matrix[row * width + column]; } float row_value = matrix[row * width + pivot_column]; float pivot_value = matrix[pivot_row * width + pivot_column]; float factor = (-1) * (row_value / pivot_value); for (size_t column = 0; column < width; column++) { if (column == pivot_column) { matrix[row * width + column] = 0.0; } else { matrix[row * width + column] = row_cache[column] + factor * pivot_row_cache[column]; } } } } } } }