vertical softmax + version bump

TannerLow · TannerLow · commit bb2255e8f389 · 2024-05-15T16:46:06.000-05:00
diff --git a/build.gradle b/build.gradle
@@ -3,7 +3,7 @@ plugins {
 }
 
 group 'com.github.TannerLow'
-version '0.4'
+version '0.5'
 description 'Matrix Library with GPU compatibility.'
 
 repositories {
diff --git a/src/main/java/com/github/TannerLow/JavaMatrixMath/Matrix.java b/src/main/java/com/github/TannerLow/JavaMatrixMath/Matrix.java
@@ -122,7 +122,7 @@ public Matrix vectorizedReluDerivative() {
         return result;
     }
 
-    public Matrix softmax() {
+    public Matrix horizontalSoftmax() {
         Matrix result = new Matrix(rows, cols);
 
         float[] buffer = new float[rows];
@@ -154,6 +154,37 @@ public Matrix softmax() {
         return result;
     }
 
+    public Matrix verticalSoftmax() {
+        Matrix result = new Matrix(rows, cols);
+
+        float[] buffer = new float[cols];
+        for(int col = 0; col < cols; col++) {
+            // calculate the max values
+            buffer[col] = -Float.MAX_VALUE;
+            for(int i = 0; i < rows; i++) {
+                float value = data[col + i * cols];
+                if(value > buffer[col]) {
+                    buffer[col] = value;
+                }
+            }
+
+            // calculate the sums
+            float sum = 0;
+            float max = buffer[col];
+            for(int i = 0; i < rows; i++) {
+                sum += Math.exp(data[col + i * cols] - max);
+            }
+
+            // calculate the softmax vectors
+            for(int i = 0; i < rows; i++) {
+                int index = col + i * cols;
+                result.data[index] = (float) (Math.exp(data[index] - max) / sum);
+            }
+        }
+
+        return result;
+    }
+
 //    public Matrix fastBatchSoftmaxDerivative(Matrix output) {
 //        Matrix partialDerivatives = new Matrix(cols, cols);
 //
@@ -187,20 +218,25 @@ public static boolean isCompatibleWithGPU(GPU gpu) {
         return  gpu.isInitialized() &&
                 gpu.getKernel("Matrices::matrixMultiply") != null &&
                 gpu.getKernel("Matrices::addRowToRows") != null &&
-                gpu.getKernel("Matrices::relu") != null;
+                gpu.getKernel("Matrices::addColToCols") != null &&
+                gpu.getKernel("Matrices::relu") != null &&
+                gpu.getKernel("Matrices::horizontalSoftmax") != null &&
+                gpu.getKernel("Matrices::verticalSoftmax") != null;
     }
 
     public Matrix multiply(GPU gpu, Matrix other) {
         if(cols != other.rows) {
-            return null;
+            final int[] dimensionsA = {rows, cols};
+            final int[] dimensionsB = {other.rows, other.cols};
+            throw new DimensionsMismatchException(dimensionsA, dimensionsB);
         }
 
         cl_context context = gpu.getContext();
         cl_command_queue commandQueue = gpu.getCommandQueue();
         cl_kernel kernel = gpu.getKernel("Matrices::matrixMultiply");
 
         if(kernel == null) {
-            return null;
+            throw new NullPointerException("Matrices::matrixMultiply not found to be loaded in GPU");
         }
 
         Matrix result = new Matrix(rows, other.cols);
@@ -369,7 +405,7 @@ public Matrix relu(GPU gpu) {
         cl_kernel kernel = gpu.getKernel("Matrices::relu");
 
         if(kernel == null) {
-            return null;
+            throw new NullPointerException("Matrices::relu not found to be loaded in GPU");
         }
 
         Matrix result = new Matrix(rows, cols);
@@ -409,13 +445,13 @@ public Matrix relu(GPU gpu) {
         return result;
     }
 
-    public Matrix softmax(GPU gpu) {
+    public Matrix horizontalSoftmax(GPU gpu) {
         cl_context context = gpu.getContext();
         cl_command_queue commandQueue = gpu.getCommandQueue();
-        cl_kernel kernel = gpu.getKernel("Matrices::softmax");
+        cl_kernel kernel = gpu.getKernel("Matrices::horizontalSoftmax");
 
         if(kernel == null) {
-            return null;
+            throw new NullPointerException("Matrices::horizontalSoftmax not found to be loaded in GPU");
         }
 
         Matrix result = new Matrix(rows, cols);
@@ -454,4 +490,51 @@ public Matrix softmax(GPU gpu) {
 
         return result;
     }
+
+    public Matrix verticalSoftmax(GPU gpu) {
+        cl_context context = gpu.getContext();
+        cl_command_queue commandQueue = gpu.getCommandQueue();
+        cl_kernel kernel = gpu.getKernel("Matrices::verticalSoftmax");
+
+        if(kernel == null) {
+            throw new NullPointerException("Matrices::verticalSoftmax not found to be loaded in GPU");
+        }
+
+        Matrix result = new Matrix(rows, cols);
+
+        Pointer pointerIn = Pointer.to(data);
+        Pointer pointerOut = Pointer.to(result.data);
+
+        // Allocate the memory objects for the input- and output data
+        cl_mem memoryIn = clCreateBuffer(context,
+                CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                Sizeof.cl_float * data.length, pointerIn, null);
+        cl_mem memoryOut = clCreateBuffer(context,
+                CL_MEM_READ_WRITE,
+                Sizeof.cl_float * result.data.length, null, null);
+
+        // Set the arguments for the kernel
+        int argNum = 0;
+        clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryOut));
+        clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryIn));
+        clSetKernelArg(kernel, argNum++, Sizeof.cl_uint, Pointer.to(new int[]{cols}));
+        clSetKernelArg(kernel, argNum++, Sizeof.cl_uint, Pointer.to(new int[]{rows}));
+
+        // Set the work-item dimensions
+        long local_work_sizes[] = new long[]{1};
+        long global_work_sizes[] = new long[]{cols};
+
+        // Execute the kernel
+        clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
+                global_work_sizes, local_work_sizes, 0, null, null);
+
+        // Read the output data
+        clEnqueueReadBuffer(commandQueue, memoryOut, CL_TRUE, 0,
+                result.data.length * Sizeof.cl_float, pointerOut, 0, null, null);
+
+        clReleaseMemObject(memoryIn);
+        clReleaseMemObject(memoryOut);
+
+        return result;
+    }
 }
diff --git a/src/main/resources/kernels/Matrices.cl b/src/main/resources/kernels/Matrices.cl
@@ -106,10 +106,10 @@ relu(__global float* output,
     }
 }
 
-// Add row to rows: C = exp(A[i]) for all rows i.
-__kernel void softmax(__global float* output,
-                      __global float* input,
-                      const int rowSize)
+// Softmax each element of each row with all elements of that row
+__kernel void horizontalSoftmax(__global float* output,
+                                __global float* input,
+                                const int rowSize)
 {
     int globalRow = get_global_id(0);
 
@@ -138,3 +138,35 @@ __kernel void softmax(__global float* output,
         output[index] = exp(input[index] - max) / sum;
     }
 }
+
+// Softmax each element of each column with all elements of that column
+__kernel void verticalSoftmax(__global float* output,
+                              __global float* input,
+                              const int rowSize,
+                              const int colSize)
+{
+    int globalCol = get_global_id(0);
+
+    // get the max value of the column
+    float max = -3.4028235E37f;
+    float value;
+    for (int i = 0; i < colSize; i++) {
+        value = input[globalCol + i * rowSize];
+        if(value > max) {
+            max = value;
+        }
+    }
+
+    // Calculate sum of exponentials of input elements
+    float sum = 0.0f;
+    for (int i = 0; i < colSize; i++) {
+        sum += exp(input[globalCol + i * rowSize] - max);
+    }
+
+    // Calculate softmax for each element
+    int index;
+    for (int i = 0; i < colSize; i++) {
+        index = globalCol + i * rowSize;
+        output[index] = exp(input[index] - max) / sum;
+    }
+}
diff --git a/src/test/java/com/github/TannerLow/JavaMatrixMath/CpuTest.java b/src/test/java/com/github/TannerLow/JavaMatrixMath/CpuTest.java
@@ -10,7 +10,8 @@ public static void testAll() {
         testAddColToCols();
         testRelu();
         testVectorizedReluDerivative();
-        testSoftmax();
+        testHorizontalSoftmax();
+        testVerticalSoftmax();
     }
 
     private static void testMultiply() {
@@ -114,13 +115,32 @@ private static void testVectorizedReluDerivative() {
         }
     }
 
-    private static void testSoftmax() {
+    private static void testHorizontalSoftmax() {
         float[] data = {1.1f,2.2f,0.2f,-1.7f};
         float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
 
         Matrix m = new Matrix(1, 4, data);
 
-        Matrix result = m.softmax();
+        Matrix result = m.horizontalSoftmax();
+
+        if(result.rows != m.rows || result.cols != m.cols) {
+            throw new TestFailedException();
+        }
+
+        for(int i = 0; i < result.data.length; i++) {
+            if(!TestMath.withinMariginOfError(expected[i], result.data[i], 0.0005f)) {
+                throw new TestFailedException();
+            }
+        }
+    }
+
+    private static void testVerticalSoftmax() {
+        float[] data = {1.1f,2.2f,0.2f,-1.7f};
+        float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
+
+        Matrix m = new Matrix(4, 1, data);
+
+        Matrix result = m.verticalSoftmax();
 
         if(result.rows != m.rows || result.cols != m.cols) {
             throw new TestFailedException();
diff --git a/src/test/java/com/github/TannerLow/JavaMatrixMath/GpuTest.java b/src/test/java/com/github/TannerLow/JavaMatrixMath/GpuTest.java
@@ -19,7 +19,8 @@ public static void testAll() throws IOException {
             testAddRowToRows();
             testAddColToCols();
             testRelu();
-            testSoftmax();
+            testHorizontalSoftmax();
+            testVerticalSoftmax();
         }
     }
 
@@ -35,10 +36,12 @@ private static void setup() throws IOException {
         int programId = gpu.loadProgram(matricesKernelCode);
         gpu.loadKernel(programId, "Matrices", "matrixMultiply");
         gpu.loadKernel(programId, "Matrices", "addRowToRows");
+        gpu.loadKernel(programId, "Matrices", "addColToCols");
         gpu.loadKernel(programId, "Matrices", "relu");
-        gpu.loadKernel(programId, "Matrices", "softmax");
+        gpu.loadKernel(programId, "Matrices", "horizontalSoftmax");
+        gpu.loadKernel(programId, "Matrices", "verticalSoftmax");
 
-        if(!gpu.isInitialized()) {
+        if(!gpu.isInitialized() || !Matrix.isCompatibleWithGPU(gpu)) {
             throw new IllegalStateException("GPU in unexpected state.");
         }
     }
@@ -93,7 +96,7 @@ private static void testAddColToCols() {
         Matrix a = new Matrix(3,2, aData);
         Matrix b = new Matrix(3,1, bData);
 
-        Matrix result = a.addColToCols(b);
+        Matrix result = a.addColToCols(gpu, b);
 
         if(result.rows != a.rows || result.cols != a.cols) {
             throw new TestFailedException();
@@ -125,13 +128,33 @@ private static void testRelu() {
         }
     }
 
-    private static void testSoftmax() {
+    private static void testHorizontalSoftmax() {
         float[] data = {1.1f,2.2f,0.2f,-1.7f};
         float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
 
         Matrix m = new Matrix(1, 4, data);
 
-        Matrix result = m.softmax(gpu);
+        Matrix result = m.horizontalSoftmax(gpu);
+
+        if(result.rows != m.rows || result.cols != m.cols) {
+            throw new TestFailedException();
+        }
+
+        for(int i = 0; i < result.data.length; i++) {
+            if(!TestMath.withinMariginOfError(expected[i], result.data[i], 0.0005f)) {
+                System.out.println(expected[i] + " vs. " + result.data[i]);
+                throw new TestFailedException();
+            }
+        }
+    }
+
+    private static void testVerticalSoftmax() {
+        float[] data = {1.1f,2.2f,0.2f,-1.7f};
+        float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
+
+        Matrix m = new Matrix(4, 1, data);
+
+        Matrix result = m.verticalSoftmax(gpu);
 
         if(result.rows != m.rows || result.cols != m.cols) {
             throw new TestFailedException();

Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@ plugins {`
`3`	`3`	`}`
`4`	`4`
`5`	`5`	`group 'com.github.TannerLow'`
`6`		`-version '0.4'`
	`6`	`+version '0.5'`
`7`	`7`	`description 'Matrix Library with GPU compatibility.'`
`8`	`8`
`9`	`9`	`repositories {`