diff --git a/hat/backends/ffi/opencl/cpp/opencl_backend.cpp b/hat/backends/ffi/opencl/cpp/opencl_backend.cpp index e29974f77e0..199d4abd60a 100644 --- a/hat/backends/ffi/opencl/cpp/opencl_backend.cpp +++ b/hat/backends/ffi/opencl/cpp/opencl_backend.cpp @@ -66,7 +66,7 @@ OpenCLBackend::OpenCLConfig::OpenCLConfig(int mode): } OpenCLBackend::OpenCLQueue::OpenCLQueue() - : eventMax(256), events(new cl_event[eventMax]), eventc(0){ + : eventMax(10000), events(new cl_event[eventMax]), eventc(0){ } cl_event *OpenCLBackend::OpenCLQueue::eventListPtr(){ diff --git a/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/OpenCLBackend.java b/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/OpenCLBackend.java index b1980ab6e68..7738c9445a8 100644 --- a/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/OpenCLBackend.java +++ b/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/OpenCLBackend.java @@ -50,15 +50,17 @@ public record Mode(int bits) { private static final int SHOW_COMPUTE_MODEL_BIT = 1 <<8; private static final int INFO_BIT = 1 <<9; private static final int TRACE_COPIES_BIT = 1 << 10; + + public static Mode of() { - List<Mode> modes = new ArrayList<>(); + // List<Mode> modes = new ArrayList<>(); if (( ((System.getenv("HAT") instanceof String e)?e:"")+ ((System.getProperty("HAT") instanceof String p)?p:"")) instanceof String opts) { - Arrays.stream(opts.split(",")).forEach(opt -> - modes.add(of(opt)) - ); + // Arrays.stream(opts.split(",")).forEach(opt -> + return of(opts); + // ); } - return of(modes); + return of(); } public static Mode of(int bits) { @@ -93,8 +95,16 @@ public static Mode of(String name) { case "PROFILE" -> PROFILE(); case "INFO" -> INFO(); default -> { - System.out.println("Unexpected opt '"+name+"'"); - yield Mode.of(0); + if (name.contains(",")) { + List<Mode> modes = new ArrayList<>(); + Arrays.stream(name.split(",")).forEach(opt -> + modes.add(of(opt)) + ); + yield of(modes); + } else { + System.out.println("Unexpected opt '" + name + "'"); + yield Mode.of(0); + } } }; } @@ -244,6 +254,9 @@ public long getBackend(int mode, int platform, int device, int unused) { } return backendHandle; } + public OpenCLBackend(String spec) { + this(Mode.of(spec)); + } public OpenCLBackend(Mode mode) { super("opencl_backend"); this.mode = mode; @@ -255,6 +268,7 @@ public OpenCLBackend(Mode mode) { } } + public OpenCLBackend() { this(Mode.of().or(Mode.GPU())); } diff --git a/hat/backends/ffi/shared/include/shared.h b/hat/backends/ffi/shared/include/shared.h index 1c89e062167..8030a7be741 100644 --- a/hat/backends/ffi/shared/include/shared.h +++ b/hat/backends/ffi/shared/include/shared.h @@ -146,11 +146,16 @@ extern void hexdump(void *ptr, int buflen); void setBits(int bitBits) { bits|=bitBits; } - void resetBits(int bitsToReset) { + void xorBits(int bitsToReset) { // say bits = 0b0111 (7) and bitz = 0b0100 (4) int xored = bits^bitsToReset; // xored = 0b0011 (3) bits = xored; } + void resetBits(int bitsToReset) { + // say bits = 0b0111 (7) and bitz = 0b0100 (4) + bits = bits&~bitsToReset; // xored = 0b0011 (3) + //bits = xored; + } int getBits() { return bits; } diff --git a/hat/bld b/hat/bld index 83468bbac22..90f0cfa6d78 100644 --- a/hat/bld +++ b/hat/bld @@ -72,9 +72,7 @@ void main(String[] args) { * */ - var dir = DirEntry.current(); - - // var dir = DirEntry.current(); + var dir = DirEntry.current(); var hatCoreDir = dir.existingDir("hat"); var backends = dir.existingDir("backends"); var examples = dir.existingDir("examples"); @@ -100,9 +98,31 @@ void main(String[] args) { out.println(); var verbose = false; + var wrapJar= buildDir.jarFile("wrap.jar"); + var clWrapJar= buildDir.jarFile("clwrap.jar"); + var glWrapJar= buildDir.jarFile("glwrap.jar"); + var cuWrapJar= buildDir.jarFile("cuwrap.jar"); + var hatJar = buildDir.jarFile("hat-1.0.jar"); + + var hatJavacOpts = javacBuilder($ -> $ + .enable_preview() + .add_modules("jdk.incubator.code") + .add_exports_to_all_unnamed("java.base", "jdk.internal", "jdk.internal.vm.annotation") + .current_source() + ); - var wrap= jar(jar -> jar - .jarFile(buildDir.jarFile("wrap.jar")) + var hatJarOptions = jarBuilder($ -> $ + .verbose(verbose) + ); + jar(hatJarOptions, jar -> jar + .jarFile(hatJar) + .maven_style_root(hatCoreDir) + .javac(hatJavacOpts, javac -> { + }) + ); + + jar(jar -> jar + .jarFile(wrapJar) .maven_style_root(wrapsDir.dir("wrap")) .javac(javac -> javac.current_source()) ); @@ -124,9 +144,9 @@ void main(String[] args) { out.println("Using existing extracted "+openclCapability.jarFile(buildDir).fileName()); } jar(jar -> jar - .jarFile(buildDir.jarFile("clwrap.jar")) + .jarFile(clWrapJar) .maven_style_root(wrapsDir.dir("clwrap")) - .javac(javac -> javac.current_source().class_path(wrap, openclCapability.jarFile(buildDir))) + .javac(javac -> javac.current_source().class_path(wrapJar,hatJar, openclCapability.jarFile(buildDir))) ); } else { out.println("This platform does not have OpenCL"); @@ -147,13 +167,13 @@ void main(String[] args) { out.println("Using existing extracted "+openglCapability.jarFile(buildDir).fileName()); } jar(jar -> jar - .jarFile(buildDir.jarFile("glwrap.jar")) + .jarFile(glWrapJar) .maven_style_root(wrapsDir.dir("glwrap")) .javac(javac -> javac .current_source() .exclude(javaSrc -> javaSrc.matches("^.*/wrap/glwrap/GLCallbackEventHandler\\.java$")) //.exclude(javaSrc -> javaSrc.matches("^.*/wrap/glwrap/GLFuncEventHandler\\.java$")) - .class_path(wrap, openglCapability.jarFile(buildDir)) + .class_path(wrapJar, openglCapability.jarFile(buildDir)) ) ); } else { @@ -167,37 +187,19 @@ void main(String[] args) { out.println("This platform does not have CUDA"); } } - var hatJavacOpts = javacBuilder($ -> $ - .enable_preview() - .add_modules("jdk.incubator.code") - .add_exports_to_all_unnamed("java.base", "jdk.internal", "jdk.internal.vm.annotation") - .current_source() - ); - - var hatJarOptions = jarBuilder($ -> $ - .verbose(verbose) - ); - var hatJarFile = buildDir.jarFile("hat-1.0.jar"); - jar(hatJarOptions, jar -> jar - .jarFile(hatJarFile) - .maven_style_root(hatCoreDir) - .javac(hatJavacOpts, javac -> { - }) - ); - + // Here we create all ffi-backend jars. var ffiBackends = backends.existingDir("ffi"); - ffiBackends - .subDirs() + ffiBackends.subDirs() .filter(backend -> backend.failsToMatch("^.*(spirv|hip|shared|target|.idea)$")) .forEach(backend -> { - var jarFile = buildDir.jarFile("hat-backend-ffi-" + backend.fileName() + "-1.0.jar"); - out.println(jarFile.fileName()); + var ffiBackendJarFile = buildDir.jarFile("hat-backend-ffi-" + backend.fileName() + "-1.0.jar"); + out.println(ffiBackendJarFile.fileName()); jar(hatJarOptions, jar -> jar - .jarFile(jarFile) + .jarFile(ffiBackendJarFile) .maven_style_root(backend) - .javac(hatJavacOpts, javac -> javac.class_path(hatJarFile)) + .javac(hatJavacOpts, javac -> javac.class_path(hatJar)) ); }); @@ -208,19 +210,19 @@ void main(String[] args) { var jextractedBackendSharedResult = jar(hatJarOptions, jar -> jar .jarFile(buildDir.jarFile("hat-backend-jextracted-shared-1.0.jar")) .maven_style_root(jextractedBackendSharedDir) - .javac(hatJavacOpts, javac -> javac.verbose(true) - .class_path(hatJarFile) + .javac(hatJavacOpts, javac -> javac.verbose(verbose) + .class_path(hatJar) ) ); if (openclCapability.available()){ var jextractedBackendOpenCLDir = jextractedBackends.dir("opencl"); out.println("OpenCL jextracted "+jextractedBackendOpenCLDir.path()); - var jextractedOpenCLBackendOpenCLResult = jar(hatJarOptions, jar -> jar + jar(hatJarOptions, jar -> jar .jarFile(buildDir.jarFile("hat-backend-jextracted-opencl-1.0.jar")) .maven_style_root(jextractedBackendOpenCLDir) - .javac(hatJavacOpts, javac -> javac.verbose(true) - .class_path(hatJarFile, openclCapability.jarFile(buildDir), jextractedBackendSharedResult ) + .javac(hatJavacOpts, javac -> javac.verbose(verbose) + .class_path(hatJar, openclCapability.jarFile(buildDir), jextractedBackendSharedResult ) ) ); } @@ -231,10 +233,10 @@ void main(String[] args) { .subDirs() .filter(backend -> backend.failsToMatch("^.*(target|.idea)$")) .forEach(backend -> { - var jarFile = buildDir.jarFile("hat-backend-java-" + backend.fileName() + "-1.0.jar"); - out.println(jarFile.fileName()); + var backendJarFile = buildDir.jarFile("hat-backend-java-" + backend.fileName() + "-1.0.jar"); + out.println(backendJarFile.fileName()); jar(hatJarOptions, jar -> jar - .jarFile(jarFile) + .jarFile(backendJarFile) .dir_list(backend.dir("src/main/resources")) ); }); @@ -242,36 +244,43 @@ void main(String[] args) { // here we create the example jars examples.subDirs() - .filter(example -> example.failsToMatch("^.*(experiments|nbody|target|.idea)$")) + .filter(example -> example.failsToMatch("^.*(experiments|nbody|life|target|.idea)$")) .forEach(example -> { - var jarFile = buildDir.jarFile("hat-example-" + example.fileName() + "-1.0.jar"); - out.println(jarFile.fileName()); + var exampleJarFile = buildDir.jarFile("hat-example-" + example.fileName() + "-1.0.jar"); + out.println(exampleJarFile.fileName()); jar(hatJarOptions, jar -> jar - .jarFile(jarFile) + .jarFile(exampleJarFile) .maven_style_root(example) - .javac(hatJavacOpts, javac -> javac.class_path(hatJarFile, openclCapability.jarFile(buildDir), - buildDir.jarFile("wrap.jar"), - buildDir.jarFile("clwrap.jar"))) - .manifest(manifest -> manifest - .main_class(example.fileName() + ".Main") - ) + .javac(hatJavacOpts, javac -> javac.class_path(hatJar)) + .manifest(manifest -> manifest .main_class(example.fileName() + ".Main")) ); }); + if (jextractCapability.available() && openclCapability.available()) { + var example = examples.dir("life"); + var exampleJarFile = buildDir.jarFile("hat-example-" + example.fileName() + "-1.0.jar"); + out.println(exampleJarFile.fileName()); + jar(hatJarOptions, jar -> jar + .jarFile(exampleJarFile) + .maven_style_root(example) + .javac(hatJavacOpts, javac -> javac + .class_path(hatJar, wrapJar, clWrapJar, openclCapability.jarFile(buildDir), buildDir.jarFile("hat-backend-ffi-opencl-1.0.jar")) + ) + ); + } + if (jextractCapability.available() && openclCapability.available() && openglCapability.available()) { var example = examples.dir("nbody"); - var jarFile = buildDir.jarFile("hat-example-" + example.fileName() + "-1.0.jar"); - out.println(jarFile.fileName()); + var exampleJarFile = buildDir.jarFile("hat-example-" + example.fileName() + "-1.0.jar"); + out.println(exampleJarFile.fileName()); jar(hatJarOptions, jar -> jar - .jarFile(jarFile) + .jarFile(exampleJarFile) .maven_style_root(example) - .javac(hatJavacOpts, javac -> javac.class_path(hatJarFile, - buildDir.jarFile("wrap.jar"), - buildDir.jarFile("clwrap.jar"), - buildDir.jarFile("glwrap.jar"), - openclCapability.jarFile(buildDir), openglCapability.jarFile(buildDir))) + .javac(hatJavacOpts, javac -> javac + .class_path( hatJar, wrapJar, clWrapJar, glWrapJar, openclCapability.jarFile(buildDir), openglCapability.jarFile(buildDir),buildDir.jarFile("hat-backend-ffi-opencl-1.0.jar")) + ) ); } diff --git a/hat/examples/life/src/main/java/life/Main.java b/hat/examples/life/src/main/java/life/Main.java index 400a5249ac5..6a10202dc18 100644 --- a/hat/examples/life/src/main/java/life/Main.java +++ b/hat/examples/life/src/main/java/life/Main.java @@ -27,36 +27,41 @@ import hat.Accelerator; import hat.ComputeContext; import hat.KernelContext; -import static hat.ifacemapper.MappableIface.*; - +import hat.backend.ffi.OpenCLBackend; import hat.buffer.Buffer; import hat.ifacemapper.Schema; +import hat.ifacemapper.SegmentMapper; import io.github.robertograham.rleparser.RleParser; import io.github.robertograham.rleparser.domain.PatternData; import jdk.incubator.code.CodeReflection; -import wrap.Scalar; -import wrap.Sequence; import wrap.clwrap.CLPlatform; import wrap.clwrap.CLWrapComputeContext; import java.lang.foreign.Arena; -import java.lang.foreign.MemoryLayout; import java.lang.foreign.MemorySegment; import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandles; import java.util.List; import java.util.stream.IntStream; +import static hat.ifacemapper.MappableIface.RO; +import static hat.ifacemapper.MappableIface.RW; import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static java.lang.foreign.ValueLayout.JAVA_INT; -import static wrap.LayoutBuilder.structOf; public class Main { - final static int ZeroBase = 0; public final static byte ALIVE = (byte) 0xff; public final static byte DEAD = 0x00; public interface CellGrid extends Buffer { + /* + * struct CellGrid{ + * int width; + * int height; + * byte[width*height*2] cellArray; + * } + */ int width(); int height(); @@ -76,14 +81,25 @@ static CellGrid create(Accelerator accelerator, int width, int height) { ValueLayout valueLayout = JAVA_BYTE; long headerOffset = JAVA_INT.byteOffset() * 2; - default CellGrid copySliceTo(byte[] bytes, int to) { + default void copySliceTo(byte[] bytes, int to) { long offset = headerOffset + to * valueLayout.byteOffset(); MemorySegment.copy(Buffer.getMemorySegment(this), valueLayout, offset, bytes, 0, width() * height()); - return this; + + } + + default int wxh() { + return width() * height(); } } public interface Control extends Buffer { + /* + * struct Control{ + * int from; + * int to; + * long generation + * } + */ int from(); void from(int from); @@ -92,351 +108,262 @@ public interface Control extends Buffer { void to(int to); - Schema<Control> schema = Schema.of(Control.class, lifeSupport -> lifeSupport.fields("from", "to")); + long generation(); - static Control create(Accelerator accelerator, CellGrid CLWrapCellGrid) { - var instance = schema.allocate(accelerator); - instance.to(CLWrapCellGrid.width() * CLWrapCellGrid.height()); - instance.from(0); - return instance; - } - } + void generation(long generation); + void requiredFrameRate(long requiredFrameRate); + long requiredFrameRate(); + void maxGenerations(long maxGenerations); + long maxGenerations(); - public static class Compute { - @CodeReflection - public static int val(@RO CellGrid grid, int from, int w, int x, int y) { - return grid.cell( ((long) y * w) + x +from)&1; - } + Schema<Control> schema = Schema.of( + Control.class, control -> + control.fields("from", "to", "generation", "requiredFrameRate", "maxGenerations")); - @CodeReflection - public static void life(@RO KernelContext kc, @RO Control control, @RW CellGrid cellGrid) { - if (kc.x < kc.maxX) { - int w = cellGrid.width(); - int h = cellGrid.height(); - int from = control.from(); - int to = control.to(); - int x = kc.x % w; - int y = kc.x / w; - byte cell = cellGrid.cell(kc.x + from); - if (x>0 && x<(w-1) && y>0 && y<(h-1)) { // passports please - int count = - val(cellGrid,from,w,x-1,y-1) - +val(cellGrid,from,w,x-1,y+0) - +val(cellGrid,from,w,x-1,y+1) - +val(cellGrid,from,w,x+0,y-1) - +val(cellGrid,from,w,x+0,y+1) - +val(cellGrid,from,w,x+1,y+0) - +val(cellGrid,from,w,x+1,y-1) - +val(cellGrid,from,w,x+1,y+1); - cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23. - } - cellGrid.cell(kc.x + to, cell); - } + static Control create(Accelerator accelerator, CellGrid cellGrid) { + var instance = schema.allocate(accelerator); + instance.from(cellGrid.width() * cellGrid.height()); + instance.to(0); + instance.generation(0); + return instance; } - @CodeReflection - static public void compute(final ComputeContext cc, Viewer viewer, Control ctrl, CellGrid grid) { - // while (viewer.isVisible()) { - cc.dispatchKernel( - grid.width() * grid.height(), - kc -> Compute.life(kc, ctrl, grid) - ); - int to = ctrl.from(); ctrl.from(ctrl.to()); ctrl.to(to); //swap from/to - - - // if (start==0L) { - // start = System.currentTimeMillis(); - // }else { - // this.controls.generation.setText(String.format("%8d", ++generationCounter)); - // this.controls.generationsPerSecond.setText( - // String.format("%5.2f", (generationCounter * 1000f) / (System.currentTimeMillis() - start)) - // ); - viewer.mainPanel.repaint(); - // } - - // if (viewer.isReadyForUpdate()) { - // viewer.update(grid, to); - // } - // } - } } - public static class CLWrapCellGrid { - /* - * struct CellGrid{ - * int width; - * int height; - * byte[width*height*2] cellArray; - * } - */ - final MemoryLayout layout; - final MemorySegment segment; - final Scalar width; - final Scalar height; - final Sequence cellArray; - - - final private int w; - final private int h; - final private int wxh; - - CLWrapCellGrid(Arena arena, int w, int h) { - this.w = w; - this.h = h; - this.wxh = w * h; - this.layout = structOf("CLWrapCellGrid", $ -> $ - .i32("width") - .i32("height") - .i8Seq("cellArray", (long) wxh * 2) - ); - this.segment = arena.allocate(layout); - this.width = Scalar.of(segment, layout, "width", this.w); - this.height = Scalar.of(segment, layout, "height", this.h); - this.cellArray = Sequence.of(segment, layout, "cellArray"); - } - - int width() { - return w;//width.i32(); - } - - int height() { - return h;//height.i32(); - } - byte cell(int idx) { - return cellArray.i8(idx); - } + public static class Compute { + public static final String codeHeader= """ + #define ALIVE -1 + #define DEAD 0 + typedef struct control_s{ + int from; + int to; + long generation; + }control_t; - void cell(int idx, byte v) { - cellArray.set(idx, v);// - } + typedef struct cellGrid_s{ + int width; + int height; + signed char cellArray[0]; + }cellGrid_t; - CLWrapCellGrid copySliceTo(byte[] bytes, int to) { - MemorySegment.copy(segment, JAVA_BYTE, - JAVA_INT.byteSize() + JAVA_INT.byteSize() + to * JAVA_BYTE.byteSize(), - bytes, 0, wxh); - return this; - } + """; - public int wxh() { - return wxh; - } - } + final static String codeVal = """ + inline int val(__global cellGrid_t *CLWrapCellGrid, int from, int w, int x, int y) { + return CLWrapCellGrid->cellArray[((y * w) + x + from)] & 1; + } + """; - public static class CLWrapControl { - final MemorySegment segment; - final MemoryLayout layout; - final Scalar from; - final Scalar to; - final Scalar generation; - - - CLWrapControl(Arena arena, CLWrapCellGrid CLWrapCellGrid) { - this.layout = structOf("CLWrapControl", $ -> $ - .i32("from") - .i32("to") - .i64("generation") - ); - this.segment = arena.allocate(this.layout); - this.from = Scalar.of(this.segment, this.layout, "from", CLWrapCellGrid.width() * CLWrapCellGrid.height()); - this.to = Scalar.of(this.segment, this.layout, "to", 0); - this.generation = Scalar.of(this.segment, this.layout, "generation", 0); + @CodeReflection + public static int val(@RO CellGrid grid, int from, int w, int x, int y) { + return grid.cell(((long) y * w) + x + from) & 1; } - int from() { - return this.from.i32(); + final static String codeLifePerIdx = """ + __kernel void life( __global cellGrid_t *CLWrapCellGrid ,__global control_t *CLWrapControl ){ + int kcx = get_global_id(0); + int w = CLWrapCellGrid->width; + int h = CLWrapCellGrid->height; + int from = CLWrapControl->from; + int to = CLWrapControl->to; + int x = kcx % w; + int y = kcx / w; + signed char cell = CLWrapCellGrid->cellArray[kcx + from]; + if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please + int count = + val(CLWrapCellGrid, from, w, x - 1, y - 1) + + val(CLWrapCellGrid, from, w, x - 1, y + 0) + + val(CLWrapCellGrid, from, w, x - 1, y + 1) + + val(CLWrapCellGrid, from, w, x + 0, y - 1) + + val(CLWrapCellGrid, from, w, x + 0, y + 1) + + val(CLWrapCellGrid, from, w, x + 1, y + 0) + + val(CLWrapCellGrid, from, w, x + 1, y - 1) + + val(CLWrapCellGrid, from, w, x + 1, y + 1); + cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23. + } + CLWrapCellGrid->cellArray[kcx + to]= cell; } + """; - int to() { - return this.to.i32(); + @CodeReflection + public static void lifePerIdx(int idx, @RO Control control, @RW CellGrid cellGrid) { + int w = cellGrid.width(); + int h = cellGrid.height(); + int from = control.from(); + int to = control.to(); + int x = idx % w; + int y = idx / w; + byte cell = cellGrid.cell(idx + from); + if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please + int count = + val(cellGrid, from, w, x - 1, y - 1) + + val(cellGrid, from, w, x - 1, y + 0) + + val(cellGrid, from, w, x - 1, y + 1) + + val(cellGrid, from, w, x + 0, y - 1) + + val(cellGrid, from, w, x + 0, y + 1) + + val(cellGrid, from, w, x + 1, y + 0) + + val(cellGrid, from, w, x + 1, y - 1) + + val(cellGrid, from, w, x + 1, y + 1); + cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23. + } + cellGrid.cell(idx + to, cell); } - void generation(long generation) { - this.generation.set(generation); - } - void swap() { - int from = from(); - int to = to(); - this.to.set(from); - this.from.set(to); + @CodeReflection + public static void life(@RO KernelContext kc, @RO Control control, @RW CellGrid cellGrid) { + if (kc.x < kc.maxX) { + Compute.lifePerIdx(kc.x, control, cellGrid); + } } - } - - public static int val(CLWrapCellGrid grid, int from, int w, int x, int y) { - return grid.cell((y * w) + x + from) & 1; - } - public static void life(int kcx, CLWrapControl CLWrapControl, CLWrapCellGrid CLWrapCellGrid) { - - int w = CLWrapCellGrid.width(); - int h = CLWrapCellGrid.height(); - int from = CLWrapControl.from(); - int to = CLWrapControl.to(); - int x = kcx % w; - int y = kcx / w; - byte cell = CLWrapCellGrid.cell(kcx + from); - if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please - int count = - val(CLWrapCellGrid, from, w, x - 1, y - 1) - + val(CLWrapCellGrid, from, w, x - 1, y + 0) - + val(CLWrapCellGrid, from, w, x - 1, y + 1) - + val(CLWrapCellGrid, from, w, x + 0, y - 1) - + val(CLWrapCellGrid, from, w, x + 0, y + 1) - + val(CLWrapCellGrid, from, w, x + 1, y + 0) - + val(CLWrapCellGrid, from, w, x + 1, y - 1) - + val(CLWrapCellGrid, from, w, x + 1, y + 1); - cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23. + @CodeReflection + static public void compute(final ComputeContext cc, Viewer viewer, Control ctrl, CellGrid cellGrid) { + long framesSinceLastChange = 0; + long lastFrame = System.currentTimeMillis(); + while (ctrl.generation() < ctrl.maxGenerations()) { + final long now = System.currentTimeMillis(); + cc.dispatchKernel( + cellGrid.width() * cellGrid.height(), + kc -> Compute.life(kc, ctrl, cellGrid) + ); + int to = ctrl.from(); + ctrl.from(ctrl.to()); + ctrl.to(to); + boolean displayThisGeneration = + viewer.mainPanel.state.equals(Viewer.MainPanel.State.Done) + && (now - lastFrame >= ( 1000 / ctrl.requiredFrameRate())); + if (displayThisGeneration) { + lastFrame = now; + viewer.controls.updateGenerationCounter(ctrl.generation(), framesSinceLastChange, + (1000 / ctrl.requiredFrameRate())); + + cellGrid.copySliceTo(viewer.mainPanel.rasterData, ctrl.from()); + viewer.mainPanel.state = Viewer.MainPanel.State.Scheduled; + viewer.mainPanel.repaint(); + } + framesSinceLastChange++; + ctrl.generation(ctrl.generation()+1); + } } - CLWrapCellGrid.cell(kcx + to, cell); - // } } public static void main(String[] args) { + Accelerator accelerator = new Accelerator(MethodHandles.lookup(), new OpenCLBackend("GPU,MINIMIZE_COPIES")); + Arena arena = Arena.global(); PatternData patternData = RleParser.readPatternData( Main.class.getClassLoader().getResourceAsStream("orig.rle") ); // We oversize the grid by adding 1 to n,e,w and s - CLWrapCellGrid CLWrapCellGrid = new CLWrapCellGrid( - Arena.global(), + + CellGrid cellGrid = CellGrid.create(accelerator, patternData.getMetaData().getWidth() + 2, - patternData.getMetaData().getHeight() + 2 - ); + patternData.getMetaData().getHeight() + 2); // By shifting all cells +1,+1 so we only need to scan 1..width-1, 1..height-1 // we don't worry about possibly finding cells in 0,n width,n or n,0 height,n - patternData.getLiveCells().getCoordinates().stream().forEach(c -> { - CLWrapCellGrid.cell((1 + c.getX()) + (1 + c.getY()) * CLWrapCellGrid.width(), ALIVE); - // CLWrapCellGrid.cell(CLWrapCellGrid.wxh + (1 + c.getX()) + (1 + c.getY()) * CLWrapCellGrid.width(), ALIVE); - } + patternData.getLiveCells().getCoordinates().stream().forEach(c -> + cellGrid.cell((1 + c.getX()) + (1 + c.getY()) * cellGrid.width(), ALIVE) ); - CLWrapControl CLWrapControl = new CLWrapControl(arena, CLWrapCellGrid); - Viewer viewer = new Viewer("Life", CLWrapCellGrid); + Control control = Control.create(accelerator, cellGrid); - CLWrapComputeContext CLWrapComputeContext = new CLWrapComputeContext(arena, 20); + CLWrapComputeContext clWrapComputeContext = new CLWrapComputeContext(arena, 20); List<CLPlatform> platforms = CLPlatform.platforms(arena); - System.out.println("platforms " + platforms.size()); + // System.out.println("platforms " + platforms.size()); CLPlatform platform = platforms.get(0); - platform.devices.forEach(device -> { - System.out.println(" Compute Units " + device.computeUnits()); - System.out.println(" Device Name " + device.deviceName()); - System.out.println(" Device Vendor " + device.deviceVendor()); - System.out.println(" Built In Kernels " + device.builtInKernels()); - }); CLPlatform.CLDevice device = platform.devices.get(0); - System.out.println(" Compute Units " + device.computeUnits()); - System.out.println(" Device Name " + device.deviceName()); - System.out.println(" Device Vendor " + device.deviceVendor()); - - System.out.println(" Built In Kernels " + device.builtInKernels()); + // System.out.println(" Compute Units " + device.computeUnits()); + // System.out.println(" Device Name " + device.deviceName()); + // System.out.println(" Device Vendor " + device.deviceVendor()); + // System.out.println(" Built In Kernels " + device.builtInKernels()); CLPlatform.CLDevice.CLContext context = device.createContext(); - var code = """ - #define ALIVE -1 - #define DEAD 0 - typedef struct control_s{ - int from; - int to; - long generation; - }control_t; - - typedef struct cellGrid_s{ - int width; - int height; - signed char cellArray[0]; - }cellGrid_t; - - inline int val(__global cellGrid_t *CLWrapCellGrid, int from, int w, int x, int y) { - return CLWrapCellGrid->cellArray[((y * w) + x + from)] & 1; - } - __kernel void life( __global cellGrid_t *CLWrapCellGrid ,__global control_t *CLWrapControl ){ - int kcx = get_global_id(0); - int w = CLWrapCellGrid->width; - int h = CLWrapCellGrid->height; - int from = CLWrapControl->from; - int to = CLWrapControl->to; - int x = kcx % w; - int y = kcx / w; - signed char cell = CLWrapCellGrid->cellArray[kcx + from]; - if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please - int count = - val(CLWrapCellGrid, from, w, x - 1, y - 1) - + val(CLWrapCellGrid, from, w, x - 1, y + 0) - + val(CLWrapCellGrid, from, w, x - 1, y + 1) - + val(CLWrapCellGrid, from, w, x + 0, y - 1) - + val(CLWrapCellGrid, from, w, x + 0, y + 1) - + val(CLWrapCellGrid, from, w, x + 1, y + 0) - + val(CLWrapCellGrid, from, w, x + 1, y - 1) - + val(CLWrapCellGrid, from, w, x + 1, y + 1); - cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23. - } - CLWrapCellGrid->cellArray[kcx + to]= cell; - } - """; - var program = context.buildProgram(code); + var program = context.buildProgram(Compute.codeHeader +Compute.codeVal + Compute.codeLifePerIdx); CLPlatform.CLDevice.CLContext.CLProgram.CLKernel kernel = program.getKernel("life"); - CLWrapComputeContext.MemorySegmentState cellGridState = CLWrapComputeContext.register(CLWrapCellGrid.segment); - CLWrapComputeContext.MemorySegmentState controlState = CLWrapComputeContext.register(CLWrapControl.segment); - - - CLWrapCellGrid.copySliceTo(viewer.mainPanel.rasterData, CLWrapControl.to()); - CLWrapControl.swap(); + boolean useHat = true; + boolean useBufferBitz = false; + Viewer viewer = new Viewer("Life", cellGrid, useHat); + cellGrid.copySliceTo(viewer.mainPanel.rasterData, control.to()); + var tempFrom = control.from(); + control.from(control.to()); + control.to(tempFrom); viewer.mainPanel.repaint(); + control.requiredFrameRate(10); + control.maxGenerations(1000000); viewer.waitForStart(); - - long start = System.currentTimeMillis(); - long generationCounter = 0; - - long requiredFrameRate = 10; - long generations = 1000000; - long generationsSinceLastChange = 0; - long framesSinceLastChange = 0; - - long msPerFrame = 1000 / requiredFrameRate; - long lastFrame = start; - controlState.copyToDevice = true; - controlState.copyFromDevice = true; - cellGridState.copyToDevice = true; - viewer.mainPanel.state = Viewer.MainPanel.State.Done; - while (generationCounter < generations) { - boolean alwaysCopy = viewer.controls.alwaysCopy(); - long now = System.currentTimeMillis(); - boolean displayThisGeneration = - viewer.mainPanel.state.equals(Viewer.MainPanel.State.Done) - && (now - lastFrame >= msPerFrame); - - if (viewer.controls.useGPU()) { - cellGridState.copyToDevice = alwaysCopy || generationCounter == 0; // only first - cellGridState.copyFromDevice = alwaysCopy || displayThisGeneration; - kernel.run(CLWrapComputeContext, CLWrapCellGrid.wxh, cellGridState, controlState); - } else { - IntStream.range(0, CLWrapCellGrid.wxh()).parallel().forEach(kcx -> - life(kcx, CLWrapControl, CLWrapCellGrid) - ); + if (useHat){ + accelerator.compute(cc->Compute.compute(cc, viewer, control, cellGrid )); + }else { + CLWrapComputeContext.MemorySegmentState cellGridState = useBufferBitz?null:clWrapComputeContext.register(Buffer.getMemorySegment((CellGrid) cellGrid)); + CLWrapComputeContext.MemorySegmentState controlState = useBufferBitz?null:clWrapComputeContext.register(Buffer.getMemorySegment(control)); + + long start = System.currentTimeMillis(); + long generationsSinceLastChange = 0; + long framesSinceLastChange = 0; + long lastFrame = start; + if (!useBufferBitz) { + controlState.copyToDevice = true; + controlState.copyFromDevice = true; + cellGridState.copyToDevice = true; + }else{ + // System.out.println("At start control"+SegmentMapper.BufferState.of(control).setHostDirty(true)); + + // System.out.println("At start cellgrid "+SegmentMapper.BufferState.of(cellGrid).setHostDirty(true).setDeviceDirty(true)); } - CLWrapControl.generation(generationCounter); - CLWrapControl.swap(); - ++generationCounter; - ++generationsSinceLastChange; - if (displayThisGeneration) { - if (viewer.controls.updated) { - // When the user changes something we have to update FPS - generationsSinceLastChange = 0; - framesSinceLastChange = 0; - viewer.controls.updated = false; + viewer.mainPanel.state = Viewer.MainPanel.State.Done; + while (control.generation() < control.maxGenerations()) { + boolean alwaysCopy = !viewer.controls.minimizeCopies(); + final long now = System.currentTimeMillis(); + boolean displayThisGeneration = + viewer.mainPanel.state.equals(Viewer.MainPanel.State.Done) + && (now - lastFrame >= ( 1000 / control.requiredFrameRate())); + + + if (viewer.controls.useGPU()) { + if (useBufferBitz){ + SegmentMapper.BufferState bufferState = SegmentMapper.BufferState.of(cellGrid); + bufferState.setHostDirty(alwaysCopy || (control.generation() == 0)); // only first + bufferState.setDeviceDirty(alwaysCopy || displayThisGeneration); + // System.out.println("displayThisGeneration: "+displayThisGeneration + " useBufferBitz == true so "+bufferState); + kernel.run(clWrapComputeContext, cellGrid.wxh(), cellGrid, control); + }else { + cellGridState.copyToDevice = alwaysCopy || control.generation() == 0; // only first + cellGridState.copyFromDevice = alwaysCopy || displayThisGeneration; + kernel.run(clWrapComputeContext, cellGrid.wxh(), cellGridState, controlState); + } + } else { + IntStream.range(0, cellGrid.wxh()).parallel().forEach(kcx -> + Compute.lifePerIdx(kcx, control, cellGrid) + ); + } + tempFrom = control.from(); + control.from(control.to()); + control.to(tempFrom); + control.generation(control.generation() + 1); + + ++generationsSinceLastChange; + if (displayThisGeneration) { + if (viewer.controls.updated) { + // When the user changes something we have to update FPS + generationsSinceLastChange = 0; + framesSinceLastChange = 0; + viewer.controls.updated = false; + } + viewer.controls.updateGenerationCounter(generationsSinceLastChange, framesSinceLastChange, + ( 1000 / control.requiredFrameRate())); + cellGrid.copySliceTo(viewer.mainPanel.rasterData, control.from()); + viewer.mainPanel.state = Viewer.MainPanel.State.Scheduled; + viewer.mainPanel.repaint(); + lastFrame = now; + framesSinceLastChange++; } - viewer.controls.updateGenerationCounter(generationsSinceLastChange, framesSinceLastChange, msPerFrame); - CLWrapCellGrid.copySliceTo(viewer.mainPanel.rasterData, CLWrapControl.from()); - viewer.mainPanel.state = Viewer.MainPanel.State.Scheduled; - viewer.mainPanel.repaint(); - lastFrame = now; - framesSinceLastChange++; } } } diff --git a/hat/examples/life/src/main/java/life/Main.java.no b/hat/examples/life/src/main/java/life/Main.java.no deleted file mode 100644 index fa696439076..00000000000 --- a/hat/examples/life/src/main/java/life/Main.java.no +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -package life; - -import hat.Accelerator; -import hat.ComputeContext; -import hat.KernelContext; -import hat.backend.Backend; -import hat.buffer.Buffer; -import hat.ifacemapper.MappableIface.*; -import hat.ifacemapper.Schema; -import io.github.robertograham.rleparser.RleParser; -import io.github.robertograham.rleparser.domain.PatternData; - -import java.lang.foreign.MemorySegment; -import java.lang.foreign.ValueLayout; -import java.lang.invoke.MethodHandles; -import jdk.incubator.code.CodeReflection; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_INT; - -public class Main { - - public interface CellGrid extends Buffer { - int width(); - - int height(); - - byte cell(long idx); - - void cell(long idx, byte b); - - Schema<CellGrid> schema = Schema.of(CellGrid.class, lifeData -> lifeData - .arrayLen("width", "height").stride(2).array("cell") - ); - - static CellGrid create(Accelerator accelerator, int width, int height) { - return schema.allocate(accelerator, width, height); - } - - ValueLayout valueLayout = JAVA_BYTE; - long headerOffset = JAVA_INT.byteOffset() * 2; - - default CellGrid copySliceTo(byte[] bytes, int to) { - long offset = headerOffset + to * valueLayout.byteOffset(); - MemorySegment.copy(Buffer.getMemorySegment(this), valueLayout, offset, bytes, 0, width() * height()); - return this; - } - } - - public interface Control extends Buffer { - int from(); - - void from(int from); - - int to(); - - void to(int to); - - Schema<Control> schema = Schema.of(Control.class, lifeSupport -> lifeSupport.fields("from", "to")); - - static Control create(Accelerator accelerator, CellGrid CLWrapCellGrid) { - var instance = schema.allocate(accelerator); - instance.to(CLWrapCellGrid.width() * CLWrapCellGrid.height()); - instance.from(0); - return instance; - } - } - - - public final static byte ALIVE = (byte) 0xff; - public final static byte DEAD = 0x00; - - public static class Compute { - @CodeReflection - public static int val(@RO CellGrid grid, int from, int w, int x, int y) { - return grid.cell( ((long) y * w) + x +from)&1; - } - - @CodeReflection - public static void life(@RO KernelContext kc, @RO Control CLWrapControl, @RW CellGrid CLWrapCellGrid) { - if (kc.x < kc.maxX) { - int w = CLWrapCellGrid.width(); - int h = CLWrapCellGrid.height(); - int from = CLWrapControl.from(); - int to = CLWrapControl.to(); - int x = kc.x % w; - int y = kc.x / w; - byte cell = CLWrapCellGrid.cell(kc.x + from); - if (x>0 && x<(w-1) && y>0 && y<(h-1)) { // passports please - int count = - val(CLWrapCellGrid,from,w,x-1,y-1) - +val(CLWrapCellGrid,from,w,x-1,y+0) - +val(CLWrapCellGrid,from,w,x-1,y+1) - +val(CLWrapCellGrid,from,w,x+0,y-1) - +val(CLWrapCellGrid,from,w,x+0,y+1) - +val(CLWrapCellGrid,from,w,x+1,y+0) - +val(CLWrapCellGrid,from,w,x+1,y-1) - +val(CLWrapCellGrid,from,w,x+1,y+1); - cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23. - } - CLWrapCellGrid.cell(kc.x + to, cell); - } - } - - - @CodeReflection - static public void compute(final ComputeContext cc, Viewer viewer, Control ctrl, CellGrid grid) { - // while (viewer.isVisible()) { - cc.dispatchKernel( - grid.width() * grid.height(), - kc -> Compute.life(kc, ctrl, grid) - ); - int to = ctrl.from(); ctrl.from(ctrl.to()); ctrl.to(to); //swap from/to - if (viewer.isReadyForUpdate()) { - viewer.update(grid, to); - } - // } - } - } - - - public static void main(String[] args) { - boolean headless = Boolean.getBoolean("headless") || (args.length > 0 && args[0].equals("--headless")); - - Accelerator accelerator = new Accelerator(MethodHandles.lookup(), /*Backend.JAVA_MULTITHREADED);//*/Backend.FIRST); - - PatternData patternData = RleParser.readPatternData( - Main.class.getClassLoader().getResourceAsStream("orig.rle") - ); - CellGrid CLWrapCellGrid = CellGrid.create(accelerator, - patternData.getMetaData().getWidth() + 2, - patternData.getMetaData().getHeight() + 2 - - ); - patternData.getLiveCells().getCoordinates().stream().forEach(c -> - CLWrapCellGrid.cell((1 + c.getX()) + (1 + c.getY()) * CLWrapCellGrid.width(), ALIVE) - ); - - Control CLWrapControl = Control.create(accelerator, CLWrapCellGrid); - final Viewer viewer = new Viewer("Life", CLWrapControl, CLWrapCellGrid); - viewer.update(CLWrapCellGrid, 0); - viewer.waitForStart(); - while (viewer.isVisible()) { - accelerator.compute(cc -> Compute.compute(cc, viewer, CLWrapControl, CLWrapCellGrid)); - } - - } -} diff --git a/hat/examples/life/src/main/java/life/Viewer.java b/hat/examples/life/src/main/java/life/Viewer.java index 654aae0715c..4a882fa93cd 100644 --- a/hat/examples/life/src/main/java/life/Viewer.java +++ b/hat/examples/life/src/main/java/life/Viewer.java @@ -49,14 +49,14 @@ import java.awt.image.DataBufferByte; public class Viewer extends JFrame { - + boolean useHat = false; private final Object doorBell = new Object(); final Controls controls; final MainPanel mainPanel; volatile private boolean started=false; static final public class MainPanel extends JComponent { - enum State {Scheduled, Done}; + public enum State {Scheduled, Done}; public volatile State state = State.Done; final double IN = 1.1; @@ -67,20 +67,13 @@ enum State {Scheduled, Done}; private double zoomFactor; private double prevZoomFactor; private boolean zooming; - private boolean released; + private boolean mouseReleased; private double xOffset = 0; private double yOffset = 0; private Point startPoint; - class Drag{ - public int xDiff; - public int yDiff; - Drag(int xDiff, int yDiff) { - this.xDiff = xDiff; - this.yDiff = yDiff; - } - } + record Drag(int xDiff, int yDiff){ } Drag drag = null; @Override @@ -115,13 +108,13 @@ public void mouseDragged(MouseEvent e) { addMouseListener(new MouseAdapter() { @Override public void mousePressed(MouseEvent e) { - released = false; + mouseReleased = false; startPoint = MouseInfo.getPointerInfo().getLocation(); + repaint(); } - @Override public void mouseReleased(MouseEvent e) { - released = true; + mouseReleased = true; repaint(); } }); @@ -131,50 +124,55 @@ public void mouseReleased(MouseEvent e) { public void paint(Graphics g) { super.paint(g); Graphics2D g2 = (Graphics2D) g; - AffineTransform at = new AffineTransform(); + AffineTransform affineTransform = new AffineTransform(); if (zooming) { double xRel = MouseInfo.getPointerInfo().getLocation().getX() - getLocationOnScreen().getX(); double yRel = MouseInfo.getPointerInfo().getLocation().getY() - getLocationOnScreen().getY(); double zoomDiv = zoomFactor / prevZoomFactor; xOffset = (zoomDiv) * (xOffset) + (1 - zoomDiv) * xRel; yOffset = (zoomDiv) * (yOffset) + (1 - zoomDiv) * yRel; - at.translate(xOffset, yOffset); + affineTransform.translate(xOffset, yOffset); prevZoomFactor = zoomFactor; zooming = false; } else if (drag!= null) { - at.translate(xOffset +drag.xDiff, yOffset + drag.yDiff); - if (released) { + affineTransform.translate(xOffset +drag.xDiff, yOffset + drag.yDiff); + if (mouseReleased) { xOffset += drag.xDiff; yOffset += drag.yDiff; drag = null; } } else{ - at.translate(xOffset, yOffset); + affineTransform.translate(xOffset, yOffset); } - at.scale(zoomFactor, zoomFactor); - g2.transform(at); - g2.setColor(Color.BLACK); - g2.fillRect(0-5000, 0-5000, image.getWidth()+10000, image.getHeight()+10000); + affineTransform.scale(zoomFactor, zoomFactor); + g2.transform(affineTransform); + g2.setColor(Color.DARK_GRAY); + g2.fillRect(-image.getWidth(),-image.getHeight(), image.getWidth()*3, image.getHeight()*3); g2.drawImage(image, 0,0, image.getWidth(), image.getHeight(), 0, 0, image.getWidth(), image.getHeight(), this); state = State.Done; } } public static class Controls{ + private boolean useHat; private JTextField generationTextField; - private JTextField generationsPerSecondTextField; - private JButton startButton; - private JToggleButton useGPUToggleButton; - private JToggleButton alwaysCopyToggleButton; - private JComboBox<String> generationsPerFrameComboBox; + private JTextField generationsPerSecondTextField; + private JButton startButton; + private JToggleButton useGPUToggleButton; + private JToggleButton minimizeCopiesToggleButton; + private JComboBox<String> generationsPerFrameComboBox; public volatile boolean updated = false; - Controls(JMenuBar menuBar){ + Controls(JMenuBar menuBar, boolean useHat){ + this.useHat = useHat; ((JButton) menuBar.add(new JButton("Exit"))).addActionListener(_ -> System.exit(0)); this.startButton = (JButton) menuBar.add(new JButton("Start")); - this.useGPUToggleButton =addToggle(menuBar, "Java", "GPU"); - this.alwaysCopyToggleButton = addToggle(menuBar,"Minimize Moves","Always Copy"); - // this.generationsPerFrameComboBox = (JComboBox<String>) menuBar.add(new JComboBox<String>( - // new String[]{"1", "10", "20"}) - // ); + if (!useHat) { + this.useGPUToggleButton = addToggle(menuBar, "Java", "GPU"); + this.minimizeCopiesToggleButton = addToggle(menuBar, "Always Copy", "Minimize Moves"); + this.minimizeCopiesToggleButton.setEnabled(false); + useGPUToggleButton.addChangeListener(event->{ + this.minimizeCopiesToggleButton.setEnabled(useGPUToggleButton.isSelected()); + }); + } generationTextField = addLabelledTextField(menuBar,"Gen"); generationsPerSecondTextField = addLabelledTextField(menuBar,"Gen/Sec"); } @@ -200,8 +198,8 @@ JTextField addLabelledTextField(JMenuBar menuBar, String name){ return textField; } - public boolean alwaysCopy() { - return alwaysCopyToggleButton.isSelected(); + public boolean minimizeCopies() { + return minimizeCopiesToggleButton.isSelected(); } public boolean useGPU() { @@ -220,11 +218,12 @@ public void updateGenerationCounter(long generationCounter, long frameCounter, l } } - Viewer(String title, Main.CLWrapCellGrid CLWrapCellGrid) { + Viewer(String title, Main.CellGrid cellGrid, boolean useHat) { super(title); - this.mainPanel = new MainPanel(new BufferedImage(CLWrapCellGrid.width(), CLWrapCellGrid.height(), BufferedImage.TYPE_BYTE_GRAY)); + this.useHat = useHat; + this.mainPanel = new MainPanel(new BufferedImage(cellGrid.width(), cellGrid.height(), BufferedImage.TYPE_BYTE_GRAY)); JMenuBar menuBar = new JMenuBar(); - this.controls = new Controls(menuBar); + this.controls = new Controls(menuBar, useHat); setJMenuBar(menuBar); controls.startButton.addActionListener(_ -> {started=true;synchronized (doorBell) {doorBell.notify();}}); this.getContentPane().add(this.mainPanel); diff --git a/hat/examples/life/src/main/java/life/Viewer.java.no b/hat/examples/life/src/main/java/life/Viewer.java.no deleted file mode 100644 index 9da2ae08bf1..00000000000 --- a/hat/examples/life/src/main/java/life/Viewer.java.no +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -package life; - -import javax.swing.Box; -import javax.swing.JButton; -import javax.swing.JComponent; -import javax.swing.JFrame; -import javax.swing.JLabel; -import javax.swing.JMenuBar; -import javax.swing.JTextField; -import javax.swing.WindowConstants; -import java.awt.Color; -import java.awt.Dimension; -import java.awt.Graphics; -import java.awt.Graphics2D; -import java.awt.GraphicsEnvironment; -import java.awt.MouseInfo; -import java.awt.Point; -import java.awt.Rectangle; -import java.awt.event.MouseAdapter; -import java.awt.event.MouseEvent; -import java.awt.event.MouseMotionAdapter; -import java.awt.geom.AffineTransform; -import java.awt.image.BufferedImage; -import java.awt.image.DataBufferByte; - -public class Viewer extends JFrame { - - - private final Object doorBell = new Object(); - final Controls controls; - final MainPanel mainPanel; - volatile private boolean started=false; - - static final public class MainPanel extends JComponent { - final double IN = 1.1; - final double OUT = 1/IN; - private final BufferedImage image; - final byte[] rasterData; - private final double initialZoomFactor; - private double zoomFactor; - private double prevZoomFactor; - private boolean zooming; - private boolean released; - private double xOffset = 0; - private double yOffset = 0; - private Point startPoint; - - - class Drag{ - public int xDiff; - public int yDiff; - Drag(int xDiff, int yDiff) { - this.xDiff = xDiff; - this.yDiff = yDiff; - } - } - Drag drag = null; - - @Override - public Dimension getPreferredSize() { - return new Dimension((int)(image.getWidth()*zoomFactor), (int)(image.getHeight()*zoomFactor)); - } - public MainPanel(BufferedImage image) { - this.image = image; - Rectangle bounds = GraphicsEnvironment.getLocalGraphicsEnvironment().getMaximumWindowBounds(); - this.initialZoomFactor = Math.min((bounds.width-20)/(float)image.getWidth(), - (bounds.height-20)/(float)image.getHeight()); - this.rasterData = ((DataBufferByte) image.getRaster().getDataBuffer()).getData(); - this.prevZoomFactor =initialZoomFactor; - this.zoomFactor = initialZoomFactor; - addMouseWheelListener(e -> { - zooming = true; - zoomFactor = zoomFactor * ((e.getWheelRotation() < 0)?IN:OUT); - if (zoomFactor < initialZoomFactor ){ - zoomFactor = initialZoomFactor; - prevZoomFactor = zoomFactor; - } - repaint(); - }); - addMouseMotionListener(new MouseMotionAdapter() { - @Override - public void mouseDragged(MouseEvent e) { - Point curPoint = e.getLocationOnScreen(); - drag = new Drag(curPoint.x - startPoint.x, curPoint.y - startPoint.y); - repaint(); - } - }); - addMouseListener(new MouseAdapter() { - @Override - public void mousePressed(MouseEvent e) { - released = false; - startPoint = MouseInfo.getPointerInfo().getLocation(); - } - - @Override - public void mouseReleased(MouseEvent e) { - released = true; - repaint(); - } - }); - } - - @Override - public void paint(Graphics g) { - super.paint(g); - Graphics2D g2 = (Graphics2D) g; - AffineTransform at = new AffineTransform(); - if (zooming) { - double xRel = MouseInfo.getPointerInfo().getLocation().getX() - getLocationOnScreen().getX(); - double yRel = MouseInfo.getPointerInfo().getLocation().getY() - getLocationOnScreen().getY(); - double zoomDiv = zoomFactor / prevZoomFactor; - xOffset = (zoomDiv) * (xOffset) + (1 - zoomDiv) * xRel; - yOffset = (zoomDiv) * (yOffset) + (1 - zoomDiv) * yRel; - at.translate(xOffset, yOffset); - prevZoomFactor = zoomFactor; - zooming = false; - } else if (drag!= null) { - at.translate(xOffset +drag.xDiff, yOffset + drag.yDiff); - if (released) { - xOffset += drag.xDiff; - yOffset += drag.yDiff; - drag = null; - } - } else{ - at.translate(xOffset, yOffset); - } - at.scale(zoomFactor, zoomFactor); - g2.transform(at); - g2.setColor(Color.BLACK); - g2.fillRect(0-5000, 0-5000, image.getWidth()+10000, image.getHeight()+10000); - g2.drawImage(image, 0,0, image.getWidth(), image.getHeight(), 0, 0, image.getWidth(), image.getHeight(), this); - } - } - public static class Controls{ - JTextField generation; - JTextField generationsPerSecond; - - JButton start; - JMenuBar menuBar; - Controls(){ - menuBar = new JMenuBar(); - ((JButton) menuBar.add(new JButton("Exit"))).addActionListener(_ -> System.exit(0)); - this.start = (JButton) menuBar.add(new JButton("Start")); - menuBar.add(Box.createHorizontalStrut(40)); - generation = create ("Gen"); - generationsPerSecond = create ("Gen/Sec"); - } - JTextField create (String name){ - menuBar.add(new JLabel(name)); - JTextField textField = (JTextField) menuBar.add(new JTextField("",5)); - textField.setEditable(false); - return textField; - } - } - - Viewer(String title, Main.Control CLWrapControl,Main.CellGrid CLWrapCellGrid) { - super(title); - this.mainPanel = new MainPanel(new BufferedImage(CLWrapCellGrid.width(), CLWrapCellGrid.height(), BufferedImage.TYPE_BYTE_GRAY)); - this.controls = new Controls(); - setJMenuBar(controls.menuBar); - controls.start.addActionListener(_ -> {started=true;synchronized (doorBell) {doorBell.notify();}}); - this.getContentPane().add(this.mainPanel); - this.setLocationRelativeTo(null); - this.pack(); - this.setVisible(true); - this.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE); - } - - public void waitForStart() { - while (!started) { - synchronized (doorBell) { - try { - doorBell.wait(); - } catch (final InterruptedException ie) { - ie.getStackTrace(); - } - } - } - } - long start=0L; - int generationCounter=0; - public boolean isVisible(){ - return true; - } - public boolean isReadyForUpdate(){ - if (start==0L) { - start = System.currentTimeMillis(); - }else { - this.controls.generation.setText(String.format("%8d", ++generationCounter)); - this.controls.generationsPerSecond.setText( - String.format("%5.2f", (generationCounter * 1000f) / (System.currentTimeMillis() - start)) - ); - mainPanel.repaint(); - } - return true; - } - - public void update(Main.CellGrid CLWrapCellGrid, int to) { - CLWrapCellGrid.copySliceTo(mainPanel.rasterData, to); - mainPanel.repaint(); - } -} diff --git a/hat/examples/nbody/src/main/java/nbody/CLWrap.java b/hat/examples/nbody/src/main/java/nbody/CLWrap.java deleted file mode 100644 index 53f802be260..00000000000 --- a/hat/examples/nbody/src/main/java/nbody/CLWrap.java +++ /dev/null @@ -1,473 +0,0 @@ -package nbody; -/* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * - Neither the name of Oracle nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -import opencl.opencl_h; - -import java.io.IOException; -import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.ValueLayout; -import java.util.ArrayList; -import java.util.List; - -//import static java.lang.foreign.ValueLayout.JAVA_INT; -import static opencl.opencl_h.CL_DEVICE_TYPE_ALL; -import static opencl.opencl_h.CL_MEM_READ_WRITE; -import static opencl.opencl_h.CL_MEM_USE_HOST_PTR; -import static opencl.opencl_h.CL_QUEUE_PROFILING_ENABLE; - -public class CLWrap { - public static MemorySegment NULL = MemorySegment.NULL; - - // https://streamhpc.com/blog/2013-04-28/opencl-error-codes/ - static class Platform { - static class Device { - final Platform platform; - final MemorySegment deviceId; - - int intDeviceInfo(int query) { - var value = 0; - if ((opencl_h.clGetDeviceInfo(deviceId, query, opencl_h.C_INT.byteSize(), platform.intValuePtr, NULL)) != opencl_h.CL_SUCCESS()) { - System.out.println("Failed to get query " + query); - } else { - value = platform.intValuePtr.get(opencl_h.C_INT, 0); - } - return value; - } - - String strDeviceInfo(int query) { - String value = null; - if ((opencl_h.clGetDeviceInfo(deviceId, query, 2048, platform.byte2048ValuePtr, platform.intValuePtr)) != opencl_h.CL_SUCCESS()) { - System.out.println("Failed to get query " + query); - } else { - int len = platform.intValuePtr.get(opencl_h.C_INT, 0); - byte[] bytes = platform.byte2048ValuePtr.toArray(ValueLayout.JAVA_BYTE); - value = new String(bytes).substring(0, len - 1); - } - return value; - } - - int computeUnits() { - return intDeviceInfo(opencl_h.CL_DEVICE_MAX_COMPUTE_UNITS()); - } - - String deviceName() { - return strDeviceInfo(opencl_h.CL_DEVICE_NAME()); - } - - String builtInKernels() { - return strDeviceInfo(opencl_h.CL_DEVICE_BUILT_IN_KERNELS()); - } - - Device(Platform platform, MemorySegment deviceId) { - this.platform = platform; - this.deviceId = deviceId; - } - - public static class Context { - Device device; - MemorySegment context; - MemorySegment queue; - - Context(Device device, MemorySegment context) { - this.device = device; - this.context = context; - var statusPtr = device.platform.openCL.arena.allocateFrom(opencl_h.C_INT, 1); - - var queue_props = CL_QUEUE_PROFILING_ENABLE(); - if ((this.queue = opencl_h.clCreateCommandQueue(context, device.deviceId, queue_props, statusPtr)) == NULL) { - int status = statusPtr.get(opencl_h.C_INT, 0); - opencl_h.clReleaseContext(context); - // delete[] platforms; - // delete[] device_ids; - return; - } - - } - - static public class Program { - Context context; - String source; - MemorySegment program; - String log; - - Program(Context context, String source) { - this.context = context; - this.source = source; - MemorySegment sourcePtr = context.device.platform.openCL.arena.allocateFrom(source); - var sourcePtrPtr = context.device.platform.openCL.arena.allocateFrom(opencl_h.C_POINTER, sourcePtr); - // sourcePtrPtr.set(opencl_h.C_POINTER, 0, sourcePtr); - var sourceLenPtr = context.device.platform.openCL.arena.allocateFrom(opencl_h.C_LONG, source.length()); - // sourceLenPtr.set(opencl_h.C_LONG, 0, source.length()); - var statusPtr = context.device.platform.openCL.arena.allocateFrom(opencl_h.C_INT, 0); - if ((program = opencl_h.clCreateProgramWithSource(context.context, 1, sourcePtrPtr, sourceLenPtr, statusPtr)) == NULL) { - int status = statusPtr.get(opencl_h.C_INT, 0); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to createProgram " + status); - } - System.out.println("failed to createProgram"); - } else { - int status = statusPtr.get(opencl_h.C_INT, 0); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to create program " + status); - } - var deviceIdPtr = context.device.platform.openCL.arena.allocateFrom(opencl_h.C_POINTER, context.device.deviceId); - // deviceIdPtr.set(opencl_h.C_POINTER, 0, context.device.deviceId); - if ((status = opencl_h.clBuildProgram(program, 1, deviceIdPtr, NULL, NULL, NULL)) != opencl_h.CL_SUCCESS()) { - System.out.println("failed to build" + status); - // dont return we may still be able to get log! - } - - var logLenPtr = context.device.platform.openCL.arena.allocate(opencl_h.C_LONG, 1); - - if ((status = opencl_h.clGetProgramBuildInfo(program, context.device.deviceId, opencl_h.CL_PROGRAM_BUILD_LOG(), 0, NULL, logLenPtr)) != opencl_h.CL_SUCCESS()) { - System.out.println("failed to get log build " + status); - } else { - long logLen = logLenPtr.get(opencl_h.C_LONG, 0); - var logPtr = context.device.platform.openCL.arena.allocate(opencl_h.C_CHAR, 1 + logLen); - if ((status = opencl_h.clGetProgramBuildInfo(program, context.device.deviceId, opencl_h.CL_PROGRAM_BUILD_LOG(), logLen, logPtr, logLenPtr)) != opencl_h.CL_SUCCESS()) { - System.out.println("clGetBuildInfo (getting log) failed"); - } else { - byte[] bytes = logPtr.toArray(ValueLayout.JAVA_BYTE); - log = new String(bytes).substring(0, (int) logLen); - } - } - } - } - - public static class Kernel { - Program program; - MemorySegment kernel; - String kernelName; - - public Kernel(Program program, String kernelName) { - this.program = program; - this.kernelName = kernelName; - var statusPtr = program.context.device.platform.openCL.arena.allocateFrom(opencl_h.C_INT, opencl_h.CL_SUCCESS()); - MemorySegment kernelNamePtr = program.context.device.platform.openCL.arena.allocateFrom(kernelName); - kernel = opencl_h.clCreateKernel(program.program, kernelNamePtr, statusPtr); - int status = statusPtr.get(opencl_h.C_INT, 0); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to create kernel " + status); - } - } - - public void run(int range, Object... args) { - var bufPtr = program.context.device.platform.openCL.arena.allocate(opencl_h.cl_mem, args.length); - var statusPtr = program.context.device.platform.openCL.arena.allocateFrom(opencl_h.C_INT, opencl_h.CL_SUCCESS()); - int status; - var eventMax = args.length * 4 + 1; - int eventc = 0; - var eventsPtr = program.context.device.platform.openCL.arena.allocate(opencl_h.cl_event, eventMax); - boolean block = false;// true; - for (int i = 0; i < args.length; i++) { - if (args[i] instanceof MemorySegment memorySegment) { - MemorySegment clMem = opencl_h.clCreateBuffer(program.context.context, - CL_MEM_USE_HOST_PTR() | CL_MEM_READ_WRITE(), - memorySegment.byteSize(), - memorySegment, - statusPtr); - status = statusPtr.get(opencl_h.C_INT, 0); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to create memory buffer " + status); - } - bufPtr.set(opencl_h.cl_mem, i * opencl_h.cl_mem.byteSize(), clMem); - status = opencl_h.clEnqueueWriteBuffer(program.context.queue, - clMem, - block ? opencl_h.CL_TRUE() : opencl_h.CL_FALSE(), //block? - 0, - memorySegment.byteSize(), - memorySegment, - block ? 0 : eventc, - block ? NULL : ((eventc == 0) ? NULL : eventsPtr), - block ? NULL : eventsPtr.asSlice(eventc * opencl_h.cl_event.byteSize(), opencl_h.cl_event) - ); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to enqueue write " + status); - } - if (!block) { - eventc++; - } - var clMemPtr = program.context.device.platform.openCL.arena.allocateFrom(opencl_h.C_POINTER, clMem); - - status = opencl_h.clSetKernelArg(kernel, i, opencl_h.C_POINTER.byteSize(), clMemPtr); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to set arg " + status); - } - } else { - bufPtr.set(opencl_h.cl_mem, i * opencl_h.cl_mem.byteSize(), NULL); - switch (args[i]){ - case Integer intArg->{ - var intPtr = program.context.device.platform.openCL.arena.allocateFrom(opencl_h.C_INT, intArg); - status = opencl_h.clSetKernelArg(kernel, i, opencl_h.C_INT.byteSize(), intPtr); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to set arg " + status); - } - } - case Float floatArg->{ - var floatPtr = program.context.device.platform.openCL.arena.allocateFrom(opencl_h.C_FLOAT, floatArg); - status = opencl_h.clSetKernelArg(kernel, i, opencl_h.C_FLOAT.byteSize(), floatPtr); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to set arg " + status); - } - } - default -> throw new IllegalStateException("Unexpected value: " + args[i]); - } - } - } - - // We need to store x,y,z sizes so this is a kind of int3 - var globalSizePtr = program.context.device.platform.openCL.arena.allocate(opencl_h.C_INT, 3); - globalSizePtr.set(opencl_h.C_INT, 0, range); - globalSizePtr.set(opencl_h.C_INT, 1*opencl_h.C_INT.byteSize(), 0); - globalSizePtr.set(opencl_h.C_INT, 2*opencl_h.C_INT.byteSize(), 0); - status = opencl_h.clEnqueueNDRangeKernel( - program.context.queue, - kernel, - 1, // this must match the # of dims we are using in this case 1 of 3 - NULL, - globalSizePtr, - NULL, - block ? 0 : eventc, - block ? NULL : ((eventc == 0) ? NULL : eventsPtr), - block ? NULL : eventsPtr.asSlice(eventc * opencl_h.cl_event.byteSize(), opencl_h.cl_event - ) - ); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to enqueue NDRange " + status); - } - - if (block) { - opencl_h.clFlush(program.context.queue); - } else { - eventc++; - status = opencl_h.clWaitForEvents(eventc, eventsPtr); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to wait for ndrange events " + status); - } - } - - for (int i = 0; i < args.length; i++) { - if (args[i] instanceof MemorySegment memorySegment) { - MemorySegment clMem = bufPtr.get(opencl_h.cl_mem, (long) i * opencl_h.cl_mem.byteSize()); - status = opencl_h.clEnqueueReadBuffer(program.context.queue, - clMem, - block ? opencl_h.CL_TRUE() : opencl_h.CL_FALSE(), - 0, - memorySegment.byteSize(), - memorySegment, - block ? 0 : eventc, - block ? NULL : ((eventc == 0) ? NULL : eventsPtr), - block ? NULL : eventsPtr.asSlice(eventc * opencl_h.cl_event.byteSize(), opencl_h.cl_event)// block?NULL:readEventPtr - ); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to enqueue read " + status); - } - if (!block) { - eventc++; - } - } - } - if (!block) { - status = opencl_h.clWaitForEvents(eventc, eventsPtr); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to wait for events " + status); - } - } - for (int i = 0; i < args.length; i++) { - if (args[i] instanceof MemorySegment memorySegment) { - MemorySegment clMem = bufPtr.get(opencl_h.cl_mem, (long) i * opencl_h.cl_mem.byteSize()); - status = opencl_h.clReleaseMemObject(clMem); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to release memObject " + status); - } - } - } - } - } - - public Kernel getKernel(String kernelName) { - return new Kernel(this, kernelName); - } - } - - public Program buildProgram(String source) { - var program = new Program(this, source); - return program; - } - } - - public Context createContext() { - - var statusPtr = platform.openCL.arena.allocateFrom(opencl_h.C_INT, 0); - MemorySegment context; - var deviceIds = platform.openCL.arena.allocateFrom(opencl_h.C_POINTER, this.deviceId); - if ((context = opencl_h.clCreateContext(NULL, 1, deviceIds, NULL, NULL, statusPtr)) == NULL) { - int status = statusPtr.get(opencl_h.C_INT, 0); - System.out.println("Failed to get context "); - return null; - } else { - int status = statusPtr.get(opencl_h.C_INT, 0); - if (status != opencl_h.CL_SUCCESS()) { - System.out.println("failed to get context " + status); - } - return new Context(this, context); - } - } - } - - int intPlatformInfo(int query) { - var value = 0; - if ((opencl_h.clGetPlatformInfo(platformId, query, opencl_h.C_INT.byteSize(), intValuePtr, NULL)) != opencl_h.CL_SUCCESS()) { - System.out.println("Failed to get query " + query); - } else { - value = intValuePtr.get(opencl_h.C_INT, 0); - } - return value; - } - - String strPlatformInfo(int query) { - String value = null; - int status; - if ((status = opencl_h.clGetPlatformInfo(platformId, query, 2048, byte2048ValuePtr, intValuePtr)) != opencl_h.CL_SUCCESS()) { - System.err.println("Failed to get query " + query); - } else { - int len = intValuePtr.get(opencl_h.C_INT, 0); - byte[] bytes = byte2048ValuePtr.toArray(ValueLayout.JAVA_BYTE); - value = new String(bytes).substring(0, len - 1); - } - return value; - } - - CLWrap openCL; - MemorySegment platformId; - List<Device> devices = new ArrayList<>(); - final MemorySegment intValuePtr; - final MemorySegment byte2048ValuePtr; - - String platformName() { - return strPlatformInfo(opencl_h.CL_PLATFORM_NAME()); - } - - String vendorName() { - return strPlatformInfo(opencl_h.CL_PLATFORM_VENDOR()); - } - - String version() { - return strPlatformInfo(opencl_h.CL_PLATFORM_VERSION()); - } - - public Platform(CLWrap openCL, MemorySegment platformId) { - this.openCL = openCL; - this.platformId = platformId; - this.intValuePtr = openCL.arena.allocateFrom(opencl_h.C_INT, 0); - this.byte2048ValuePtr = openCL.arena.allocate(opencl_h.C_CHAR, 2048); - var devicecPtr = openCL.arena.allocateFrom(opencl_h.C_INT, 0); - int status; - if ((status = opencl_h.clGetDeviceIDs(platformId, CL_DEVICE_TYPE_ALL(), 0, NULL, devicecPtr)) != opencl_h.CL_SUCCESS()) { - System.err.println("Failed getting devicec for platform 0 "); - } else { - int devicec = devicecPtr.get(opencl_h.C_INT, 0); - // System.out.println("platform 0 has " + devicec + " device" + ((devicec > 1) ? "s" : "")); - var deviceIdsPtr = openCL.arena.allocate(opencl_h.C_POINTER, devicec); - if ((status = opencl_h.clGetDeviceIDs(platformId, CL_DEVICE_TYPE_ALL(), devicec, deviceIdsPtr, devicecPtr)) != opencl_h.CL_SUCCESS()) { - System.err.println("Failed getting deviceids for platform 0 "); - } else { - // System.out.println("We have "+devicec+" device ids"); - for (int i = 0; i < devicec; i++) { - devices.add(new Device(this, deviceIdsPtr.get(opencl_h.C_POINTER, i * opencl_h.C_POINTER.byteSize()))); - } - } - } - } - } - - List<Platform> platforms = new ArrayList<>(); - - Arena arena; - - CLWrap(Arena arena) { - this.arena = arena; - var platformcPtr = arena.allocateFrom(opencl_h.C_INT, 0); - - if ((opencl_h.clGetPlatformIDs(0, NULL, platformcPtr)) != opencl_h.CL_SUCCESS()) { - System.out.println("Failed to get opencl platforms"); - } else { - int platformc = platformcPtr.get(opencl_h.C_INT, 0); - // System.out.println("There are "+platformc+" platforms"); - var platformIdsPtr = arena.allocate(opencl_h.C_POINTER, platformc); - if ((opencl_h.clGetPlatformIDs(platformc, platformIdsPtr, platformcPtr)) != opencl_h.CL_SUCCESS()) { - System.out.println("Failed getting platform ids"); - } else { - for (int i = 0; i < platformc; i++) { - // System.out.println("We should have the ids"); - platforms.add(new Platform(this, platformIdsPtr.get(opencl_h.C_POINTER, i))); - } - } - } - } - - - public static void main(String[] args) throws IOException { - try (var arena = Arena.ofConfined()) { - CLWrap openCL = new CLWrap(arena); - - Platform.Device[] selectedDevice = new Platform.Device[1]; - openCL.platforms.forEach(platform -> { - System.out.println("Platform Name " + platform.platformName()); - platform.devices.forEach(device -> { - System.out.println(" Compute Units " + device.computeUnits()); - System.out.println(" Device Name " + device.deviceName()); - System.out.println(" Built In Kernels " + device.builtInKernels()); - selectedDevice[0] = device; - }); - }); - var context = selectedDevice[0].createContext(); - var program = context.buildProgram(""" - __kernel void squares(__global int* in,__global int* out ){ - int gid = get_global_id(0); - out[gid] = in[gid]*in[gid]; - } - """); - var kernel = program.getKernel("squares"); - var in = arena.allocate(opencl_h.C_INT, 512); - var out = arena.allocate(opencl_h.C_INT, 512); - for (int i = 0; i < 512; i++) { - in.set(opencl_h.C_INT, (int) i * opencl_h.C_INT.byteSize(), i); - } - kernel.run(512, in, out); - for (int i = 0; i < 512; i++) { - System.out.println(i + " " + out.get(opencl_h.C_INT, (int) i * opencl_h.C_INT.byteSize())); - } - } - } -} diff --git a/hat/examples/nbody/src/main/java/nbody/GLWrap.java b/hat/examples/nbody/src/main/java/nbody/GLWrap.java deleted file mode 100644 index 6bb042c5b41..00000000000 --- a/hat/examples/nbody/src/main/java/nbody/GLWrap.java +++ /dev/null @@ -1,262 +0,0 @@ -package nbody; -/* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * - Neither the name of Oracle nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -import opengl.glutDisplayFunc$func; -import opengl.glutIdleFunc$func; - -import javax.imageio.ImageIO; -import java.awt.image.BufferedImage; -import java.awt.image.DataBufferByte; -import java.io.IOException; -import java.io.InputStream; -import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.ValueLayout; -import java.util.Arrays; - -import static java.lang.foreign.ValueLayout.JAVA_INT; -import static opengl.opengl_h.C_CHAR; -import static opengl.opengl_h.C_FLOAT; -import static opengl.opengl_h.C_INT; -import static opengl.opengl_h.GLUT_DEPTH; -import static opengl.opengl_h.GLUT_DOUBLE; -import static opengl.opengl_h.GLUT_RGB; -import static opengl.opengl_h.GL_AMBIENT; -import static opengl.opengl_h.GL_COLOR_BUFFER_BIT; -import static opengl.opengl_h.GL_COLOR_MATERIAL; -import static opengl.opengl_h.GL_DEPTH_BUFFER_BIT; -import static opengl.opengl_h.GL_DEPTH_TEST; -import static opengl.opengl_h.GL_DIFFUSE; -import static opengl.opengl_h.GL_FRONT; -import static opengl.opengl_h.GL_LIGHT0; -import static opengl.opengl_h.GL_LIGHTING; -import static opengl.opengl_h.GL_LINEAR; -import static opengl.opengl_h.GL_NEAREST; -import static opengl.opengl_h.GL_ONE; -import static opengl.opengl_h.GL_POSITION; -import static opengl.opengl_h.GL_QUADS; -import static opengl.opengl_h.GL_SHININESS; -import static opengl.opengl_h.GL_SMOOTH; -import static opengl.opengl_h.GL_SPECULAR; -import static opengl.opengl_h.GL_SRC_ALPHA; -import static opengl.opengl_h.GL_TEXTURE_2D; -import static opengl.opengl_h.GL_TEXTURE_MAG_FILTER; -import static opengl.opengl_h.GL_TEXTURE_MIN_FILTER; -import static opengl.opengl_h.GL_UNSIGNED_BYTE; -import static opengl.opengl_h.glActiveTexture; -import static opengl.opengl_h.glBegin; -import static opengl.opengl_h.glBindTexture; -import static opengl.opengl_h.glBlendFunc; -import static opengl.opengl_h.glClear; -import static opengl.opengl_h.glClearColor; -import static opengl.opengl_h.glColor3f; -import static opengl.opengl_h.glDisable; -import static opengl.opengl_h.glEnable; -import static opengl.opengl_h.glEnd; -import static opengl.opengl_h.glGenTextures; -import static opengl.opengl_h.glLightfv; -import static opengl.opengl_h.glLoadIdentity; -import static opengl.opengl_h.glMaterialfv; -import static opengl.opengl_h.glPopMatrix; -import static opengl.opengl_h.glPushMatrix; -import static opengl.opengl_h.glRotatef; -import static opengl.opengl_h.glScalef; -import static opengl.opengl_h.glShadeModel; -import static opengl.opengl_h.glTexCoord2f; -import static opengl.opengl_h.glTexImage2D; -import static opengl.opengl_h.glTexParameteri; -import static opengl.opengl_h.glVertex3f; -import static opengl.opengl_h.glutCreateWindow; -import static opengl.opengl_h.glutDisplayFunc; -import static opengl.opengl_h.glutIdleFunc; -import static opengl.opengl_h.glutInit; -import static opengl.opengl_h.glutInitDisplayMode; -import static opengl.opengl_h.glutInitWindowSize; -import static opengl.opengl_h.glutMainLoop; -import static opengl.opengl_h.glutPostRedisplay; -import static opengl.opengl_h.glutSolidTeapot; -import static opengl.opengl_h.glutSwapBuffers; -import static opengl.opengl_h_2.GL_BLEND; -import static opengl.opengl_h_2.GL_RGBA; - -public class GLWrap { - public static class GLTexture { - final Arena arena; - final MemorySegment data; - final int width; - final int height; - int idx; - GLTexture(Arena arena, InputStream textureStream) { - this.arena = arena; - BufferedImage img = null; - try { - img = ImageIO.read(textureStream); - this.width = img.getWidth(); - this.height = img.getHeight(); - BufferedImage image = new BufferedImage(width,height, BufferedImage.TYPE_4BYTE_ABGR_PRE); - image.getGraphics().drawImage(img, 0, 0, null); - var raster = image.getRaster(); - var dataBuffer = raster.getDataBuffer(); - data = arena.allocateFrom(C_CHAR, ((DataBufferByte) dataBuffer).getData()); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } - - public static class GLWindow { - Arena arena; - int width; - int height; - String name; - GLTexture[] textures; - MemorySegment textureBuf; - GLWindow(Arena arena, int width, int height, String name, GLTexture... textures) { - this.arena = arena; - this.width = width; - this.height = height; - this.name = name; - this.textures = textures; - var argc = arena.allocateFrom(C_INT, 0); - glutInit(argc, argc); - glutInitDisplayMode(GLUT_DOUBLE() | GLUT_RGB() | GLUT_DEPTH()); - glutInitWindowSize(width, height); - glutCreateWindow(arena.allocateFrom("NBODY!")); - - glClearColor(0f, 0f, 0f, 0f); - // Setup Lighting see https://www.khronos.org/opengl/wiki/How_lighting_works - glShadeModel(GL_SMOOTH()); - glEnable(GL_BLEND()); - glBlendFunc(GL_SRC_ALPHA(), GL_ONE()); - glEnable(GL_TEXTURE_2D()); - textureBuf = arena.allocate(C_INT, textures.length*C_INT.byteSize()); - glGenTextures(textures.length, textureBuf); - int[] count = {0}; - Arrays.stream(textures).forEach(texture -> { - texture.idx=count[0]++; - glBindTexture(GL_TEXTURE_2D(), textureBuf.get(JAVA_INT, texture.idx * JAVA_INT.byteSize())); - glTexImage2D(GL_TEXTURE_2D(), 0, GL_RGBA(), texture.width, - texture.height, 0, GL_RGBA(), GL_UNSIGNED_BYTE(), texture.data); - glTexParameteri(GL_TEXTURE_2D(), GL_TEXTURE_MAG_FILTER(), GL_LINEAR()); - glTexParameteri(GL_TEXTURE_2D(), GL_TEXTURE_MIN_FILTER(), GL_NEAREST()); - }); - var useLighting = false; - if (useLighting) { - glEnable(GL_LIGHTING()); - - var light = GL_LIGHT0(); // .... GL_LIGHT_0 .. -> 7 - - var pos = arena.allocateFrom(C_FLOAT, new float[]{0.0f, 15.0f, -15.0f, 0}); - - glLightfv(light, GL_POSITION(), pos); - - var red_ambient_light = arena.allocateFrom(C_FLOAT, new float[]{1f, 0.0f, 0.0f, 0.0f}); - - var grey_diffuse_light = arena.allocateFrom(C_FLOAT, new float[]{1f, 1f, 1f, 0.0f}); - - var yellow_specular_light = arena.allocateFrom(C_FLOAT, new float[]{1.0f, 1.0f, 0.0f, 0.0f}); - glLightfv(light, GL_AMBIENT(), red_ambient_light); - glLightfv(light, GL_DIFFUSE(), grey_diffuse_light); - glLightfv(light, GL_SPECULAR(), yellow_specular_light); - - var shini = arena.allocate(C_FLOAT, 113); - glMaterialfv(GL_FRONT(), GL_SHININESS(), shini); - - var useColorMaterials = false; - if (useColorMaterials) { - glEnable(GL_COLOR_MATERIAL()); - } else { - glDisable(GL_COLOR_MATERIAL()); - } - glEnable(light); - glEnable(GL_DEPTH_TEST()); - } else { - glDisable(GL_LIGHTING()); - } - glutDisplayFunc(glutDisplayFunc$func.allocate(this::display, arena)); - glutIdleFunc(glutIdleFunc$func.allocate(this::onIdle, arena)); - } - void display() { - glClear(GL_COLOR_BUFFER_BIT() | GL_DEPTH_BUFFER_BIT()); - glPushMatrix(); - glLoadIdentity(); - glRotatef(0f, 0f,0f, 0f); - //glRotatef(rot, 0f, 1f, 0f); - // glTranslatef(0f, 0f, trans); - glScalef(.1f, .1f, 1); - - glActiveTexture(textureBuf.get(ValueLayout.JAVA_INT, 0)); - glBindTexture(GL_TEXTURE_2D(), textureBuf.get(ValueLayout.JAVA_INT, 0)); - glColor3f(1f, 1f, 1f); - glBegin(GL_QUADS()); - { - float dx = -.5f; - float dy = -.5f; - float dz = -.5f; - float x = 0f; - float y= 0f; - float z = 0f; - glTexCoord2f(0, 1); - glVertex3f(x + dx, y + dy + 1, z + dz); - glTexCoord2f(0, 0); - glVertex3f(x + dx, y + dy, z + dz); - glTexCoord2f(1, 0); - glVertex3f(x + dx + 1, y + dy, z + dz); - glTexCoord2f(1, 1); - glVertex3f(x + dx + 1, y + dy + 1, z + dz); - } - glEnd(); - glColor3f(0.8f, 0.1f, 0.1f); - glutSolidTeapot(1d); - glPopMatrix(); - glutSwapBuffers(); - } - - void onIdle() { - glutPostRedisplay(); - } - - public void mainLoop() { - glutMainLoop(); - } - } - - - public void main(String[] args) throws IOException { - try (var arena = Arena.ofConfined()) { - new GLWindow(arena, 800,800,"name", - new GLTexture(arena, GLWrap.class.getResourceAsStream("/particle.png")) - ).mainLoop(); - } - } -} - diff --git a/hat/examples/nbody/src/main/java/nbody/Main.java b/hat/examples/nbody/src/main/java/nbody/Main.java index d2cbe98d09a..fcbbb118b27 100644 --- a/hat/examples/nbody/src/main/java/nbody/Main.java +++ b/hat/examples/nbody/src/main/java/nbody/Main.java @@ -34,7 +34,7 @@ public class Main { public static void main(String[] args) throws IOException { int particleCount = args.length > 2 ? Integer.parseInt(args[2]) : 32768; - NBodyGLWindow.Mode mode = NBodyGLWindow.Mode.of(args.length > 3 ? args[3] : NBodyGLWindow.Mode.OpenCL.toString()); + NBodyGLWindow.Mode mode = NBodyGLWindow.Mode.of(args.length > 3 ? args[3] : NBodyGLWindow.Mode.HAT.toString()); System.out.println("mode" + mode); try (var arena = mode.equals(NBodyGLWindow.Mode.JavaMT4) || mode.equals(NBodyGLWindow.Mode.JavaMT) ? Arena.ofShared() : Arena.ofConfined()) { var particleTexture = new GLTexture(arena, NBody.class.getResourceAsStream("/particle.png")); diff --git a/hat/examples/nbody/src/main/java/nbody/Main.java.no b/hat/examples/nbody/src/main/java/nbody/Main.java.no deleted file mode 100644 index 22081db7a50..00000000000 --- a/hat/examples/nbody/src/main/java/nbody/Main.java.no +++ /dev/null @@ -1,520 +0,0 @@ -package nbody; -/* - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - - -import hat.Accelerator; -import hat.ComputeContext; -import hat.KernelContext; -import hat.backend.Backend; -import hat.buffer.Buffer; -import static hat.ifacemapper.MappableIface.*; -import hat.ifacemapper.Schema; -import jdk.incubator.code.CodeReflection; - -import java.io.IOException; -import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; -import java.lang.invoke.MethodHandles; -import java.util.stream.IntStream; - -import static java.lang.foreign.ValueLayout.JAVA_FLOAT; -import static java.lang.foreign.ValueLayout.JAVA_INT; -import static opengl.opengl_h.GL_COLOR_BUFFER_BIT; -import static opengl.opengl_h.GL_DEPTH_BUFFER_BIT; -import static opengl.opengl_h.GL_QUADS; -import static opengl.opengl_h.GL_TEXTURE_2D; -import static opengl.opengl_h.glBegin; -import static opengl.opengl_h.glBindTexture; -import static opengl.opengl_h.glClear; -import static opengl.opengl_h.glColor3f; -import static opengl.opengl_h.glEnd; -import static opengl.opengl_h.glLoadIdentity; -import static opengl.opengl_h.glPopMatrix; -import static opengl.opengl_h.glPushMatrix; -import static opengl.opengl_h.glRotatef; -import static opengl.opengl_h.glScalef; -import static opengl.opengl_h.glTexCoord2f; -import static opengl.opengl_h.glVertex3f; -import static opengl.opengl_h.glutSwapBuffers; -import static opengl.opengl_h_3.C_FLOAT; - - -public class Main { - public interface Universe extends Buffer { - int length(); - - interface Body extends Struct { - float x(); - - float y(); - - float z(); - - float vx(); - - float vy(); - - float vz(); - - void x(float x); - - void y(float y); - - void z(float z); - - void vx(float vx); - - void vy(float vy); - - void vz(float vz); - } - - Body body(long idx); - - Schema<Universe> schema = Schema.of(Universe.class, resultTable -> resultTable - - .arrayLen("length").array("body", array -> array - .fields("x", "y", "z", "vx", "vy", "vz") - ) - ); - - static Universe create(Accelerator accelerator, int length) { - return schema.allocate(accelerator, length); - } - - } - - public static class NBody extends GLWrap.GLWindow { - - protected final static float delT = .1f; - - protected final static float espSqr = 0.1f; - - protected final static float mass = .5f; - - @CodeReflection - static public void nbodyKernel(@RO KernelContext kc, @RW Universe universe, float mass, float delT, float espSqr) { - float accx = 0.0f; - float accy = 0.0f; - float accz = 0.0f; - Universe.Body me = universe.body(kc.x); - - for (int i = 0; i < kc.maxX; i++) { - Universe.Body body = universe.body(i); - float dx = body.x() - me.x(); - float dy = body.y() - me.y(); - float dz = body.z() - me.z(); - float invDist = (float) (1.0f / Math.sqrt(((dx * dx) + (dy * dy) + (dz * dz) + espSqr))); - float s = mass * invDist * invDist * invDist; - accx = accx + (s * dx); - accy = accy + (s * dy); - accz = accz + (s * dz); - } - accx = accx * delT; - accy = accy * delT; - accz = accz * delT; - me.x(me.x() + (me.vx() * delT + accx * .5f * delT)); - me.y(me.y() + (me.vy() * delT + accy * .5f * delT)); - me.z(me.z() + (me.vz() * delT + accz * .5f * delT)); - me.vx(me.vx() + accx); - me.vy(me.vy() + accy); - me.vz(me.vz() + accz); - } - - @CodeReflection - public static void nbodyCompute(@RO ComputeContext cc, @RW Universe universe, float mass, float delT, float espSqr) { - float cmass = mass; - float cdelT = delT; - float cespSqr= espSqr; - - cc.dispatchKernel(universe.length(), kc -> nbodyKernel(kc, universe, cmass, cdelT, cespSqr)); - } - - - private static int STRIDE = 4; - private static int Xidx = 0; - private static int Yidx = 1; - private static int Zidx = 2; - - final float[] xyzPos; - final float[] xyzVel; - - final GLWrap.GLTexture particle; - final MemorySegment xyzPosSeg; - final MemorySegment xyzVelSeg; - final Universe universe; - final Accelerator accelerator; - final CLWrap.Platform.Device.Context.Program.Kernel kernel; - - int count; - int frames = 0; - long startTime = 0l; - - public enum Mode { - HAT(), - OpenCL(""" - __kernel void nbody( __global float *xyzPos ,__global float* xyzVel, float mass, float delT, float espSqr ){ - int body = get_global_id(0); - int STRIDE=4; - int Xidx=0; - int Yidx=1; - int Zidx=2; - int bodyStride = body*STRIDE; - int bodyStrideX = bodyStride+Xidx; - int bodyStrideY = bodyStride+Yidx; - int bodyStrideZ = bodyStride+Zidx; - - float accx = 0.0; - float accy = 0.0; - float accz = 0.0; - float myPosx = xyzPos[bodyStrideX]; - float myPosy = xyzPos[bodyStrideY]; - float myPosz = xyzPos[bodyStrideZ]; - for (int i = 0; i < get_global_size(0); i++) { - int iStride = i*STRIDE; - float dx = xyzPos[iStride+Xidx] - myPosx; - float dy = xyzPos[iStride+Yidx] - myPosy; - float dz = xyzPos[iStride+Zidx] - myPosz; - float invDist = (float) 1.0/sqrt((float)((dx * dx) + (dy * dy) + (dz * dz) + espSqr)); - float s = mass * invDist * invDist * invDist; - accx = accx + (s * dx); - accy = accy + (s * dy); - accz = accz + (s * dz); - } - accx = accx * delT; - accy = accy * delT; - accz = accz * delT; - xyzPos[bodyStrideX] = myPosx + (xyzVel[bodyStrideX] * delT) + (accx * 0.5 * delT); - xyzPos[bodyStrideY] = myPosy + (xyzVel[bodyStrideY] * delT) + (accy * 0.5 * delT); - xyzPos[bodyStrideZ] = myPosz + (xyzVel[bodyStrideZ] * delT) + (accz * 0.5 * delT); - - xyzVel[bodyStrideX] = xyzVel[bodyStrideX] + accx; - xyzVel[bodyStrideY] = xyzVel[bodyStrideY] + accy; - xyzVel[bodyStrideZ] = xyzVel[bodyStrideZ] + accz; - - } - """), - OpenCL4(""" - __kernel void nbody( __global float4 *xyzPos ,__global float4* xyzVel, float mass, float delT, float espSqr ){ - float4 acc = (0.0,0.0,0.0,0.0); - float4 myPos = xyzPos[get_global_id(0)]; - float4 myVel = xyzVel[get_global_id(0)]; - for (int i = 0; i < get_global_size(0); i++) { - float4 delta = xyzPos[i] - myPos; - float invDist = (float) 1.0/sqrt((float)((delta.x * delta.x) + (delta.y * delta.y) + (delta.z * delta.z) + espSqr)); - float s = mass * invDist * invDist * invDist; - acc= acc + (s * delta); - } - acc = acc*delT; - myPos = myPos + (myVel * delT) + (acc * delT)/2; - myVel = myVel + acc; - xyzPos[get_global_id(0)] = myPos; - xyzVel[get_global_id(0)] = myVel; - - } - """), - JavaSeq(false), - JavaMT(true); - final public boolean hat; - final public String code; - final public boolean isOpenCL; - final public boolean isJava; - final public boolean isMultiThreaded; - - Mode() { - this.hat = true; - this.code = null; - this.isOpenCL = false; - this.isJava = false; - this.isMultiThreaded = false; - } - - Mode(String code) { - this.hat = true; - this.code = code; - this.isOpenCL = true; - this.isJava = false; - this.isMultiThreaded = false; - } - - Mode(boolean isMultiThreaded) { - this.hat = true; - this.code = null; - this.isOpenCL = false; - this.isJava = true; - this.isMultiThreaded = isMultiThreaded; - } - - public static Mode of(String name, Mode defaultMode) { - return switch (name) { - case "HAT" -> NBody.Mode.HAT; - case "OpenCL" -> NBody.Mode.OpenCL; - case "JavaSeq" -> NBody.Mode.JavaSeq; - case "JavaMT" -> NBody.Mode.JavaMT; - case "OpenCL4" -> NBody.Mode.OpenCL4; - default -> defaultMode; - }; - } - } - - final Mode mode; - - public NBody(Arena arena, int width, int height, GLWrap.GLTexture particle, int count, Mode mode) { - super(arena, width, height, "nbody", particle); - this.particle = particle; - this.count = count; - this.xyzPos = new float[count * STRIDE]; - this.xyzVel = new float[count * STRIDE]; - this.mode = mode; - final float maxDist = 80f; - - System.out.println(count + " particles"); - - switch (mode) { - case OpenCL, OpenCL4, JavaMT, JavaSeq -> { - for (int body = 0; body < count; body++) { - final float theta = (float) (Math.random() * Math.PI * 2); - final float phi = (float) (Math.random() * Math.PI * 2); - final float radius = (float) (Math.random() * maxDist); - - // get random 3D coordinates in sphere - xyzPos[(body * STRIDE) + Xidx] = (float) (radius * Math.cos(theta) * Math.sin(phi)); - xyzPos[(body * STRIDE) + Yidx] = (float) (radius * Math.sin(theta) * Math.sin(phi)); - xyzPos[(body * STRIDE) + Zidx] = (float) (radius * Math.cos(phi)); - } - } - default -> { - } - - } - switch (mode){ - case OpenCL,OpenCL4->{ - xyzPosSeg = arena.allocateFrom(JAVA_FLOAT, xyzPos); - xyzVelSeg = arena.allocateFrom(JAVA_FLOAT, xyzVel); - CLWrap openCL = new CLWrap(arena); - - CLWrap.Platform.Device[] selectedDevice = new CLWrap.Platform.Device[1]; - openCL.platforms.forEach(platform -> { - System.out.println("Platform Name " + platform.platformName()); - platform.devices.forEach(device -> { - System.out.println(" Compute Units " + device.computeUnits()); - System.out.println(" Device Name " + device.deviceName()); - System.out.println(" Built In Kernels " + device.builtInKernels()); - selectedDevice[0] = device; - }); - }); - var context = selectedDevice[0].createContext(); - var program = context.buildProgram(mode.code); - kernel = program.getKernel("nbody"); - accelerator = null; - universe = null; - } - case JavaMT,JavaSeq->{ - kernel = null; - xyzPosSeg = null; - xyzVelSeg = null; - accelerator = null; - universe = null; - } - case HAT->{ - kernel = null; - xyzPosSeg = null; - xyzVelSeg = null; - accelerator = new Accelerator(MethodHandles.lookup(), - Backend.FIRST - ); - universe = Universe.create(accelerator, count); - for (int body = 0; body < count; body++) { - Universe.Body b = universe.body(body); - final float theta = (float) (Math.random() * Math.PI * 2); - final float phi = (float) (Math.random() * Math.PI * 2); - final float radius = (float) (Math.random() * maxDist); - - // get random 3D coordinates in sphere - b.x((float) (radius * Math.cos(theta) * Math.sin(phi))); - b.y((float) (radius * Math.sin(theta) * Math.sin(phi))); - b.z((float) (radius * Math.cos(phi))); - } - } - default -> { - kernel = null; - xyzPosSeg = null; - xyzVelSeg = null; - accelerator = null; - universe = null; - } - } - } - - - float rot = 0f; - - public static void run(int body, int size, float[] xyzPos, float[] xyzVel, float mass, float delT, float espSqr) { - float accx = 0.f; - float accy = 0.f; - float accz = 0.f; - int bodyStride = body * STRIDE; - int bodyStrideX = bodyStride + Xidx; - int bodyStrideY = bodyStride + Yidx; - int bodyStrideZ = bodyStride + Zidx; - - final float myPosx = xyzPos[bodyStrideX]; - final float myPosy = xyzPos[bodyStrideY]; - final float myPosz = xyzPos[bodyStrideZ]; - - for (int i = 0; i < size; i++) { - int iStride = i * STRIDE; - int iStrideX = iStride + Xidx; - int iStrideY = iStride + Yidx; - int iStrideZ = iStride + Zidx; - final float dx = xyzPos[iStrideX] - myPosx; - final float dy = xyzPos[iStrideY] - myPosy; - final float dz = xyzPos[iStrideZ] - myPosz; - final float invDist = 1 / (float) Math.sqrt((dx * dx) + (dy * dy) + (dz * dz) + espSqr); - final float s = mass * invDist * invDist * invDist; - accx = accx + (s * dx); - accy = accy + (s * dy); - accz = accz + (s * dz); - } - accx = accx * delT; - accy = accy * delT; - accz = accz * delT; - xyzPos[bodyStrideX] = myPosx + (xyzVel[bodyStrideX] * delT) + (accx * .5f * delT); - xyzPos[bodyStrideY] = myPosy + (xyzVel[bodyStrideY] * delT) + (accy * .5f * delT); - xyzPos[bodyStrideZ] = myPosz + (xyzVel[bodyStrideZ] * delT) + (accz * .5f * delT); - - xyzVel[bodyStrideX] = xyzVel[bodyStrideX] + accx; - xyzVel[bodyStrideY] = xyzVel[bodyStrideY] + accy; - xyzVel[bodyStrideZ] = xyzVel[bodyStrideZ] + accz; - } - - void display() { - if (startTime == 0) { - startTime = System.currentTimeMillis(); - } - glClear(GL_COLOR_BUFFER_BIT() | GL_DEPTH_BUFFER_BIT()); - glPushMatrix(); - glLoadIdentity(); - glRotatef(-rot / 2f, 0f, 0f, 1f); - //glRotatef(rot, 0f, 1f, 0f); - // glTranslatef(0f, 0f, trans); - glScalef(.01f, .01f, .01f); - glColor3f(1f, 1f, 1f); - - switch (mode){ - case JavaMT,JavaSeq ->{ - if (mode.isMultiThreaded) { - IntStream.range(0, count).parallel().forEach( - i -> run(i, count, xyzPos, xyzVel, mass, delT, espSqr) - ); - } else { - IntStream.range(0, count).forEach( - i -> run(i, count, xyzPos, xyzVel, mass, delT, espSqr) - ); - } - } - case OpenCL,OpenCL4->{ - kernel.run(count, xyzPosSeg, xyzVelSeg, mass, delT, espSqr); - } - case HAT->{ - float cmass = mass; - float cdelT = delT; - float cespSqr = espSqr; - Universe cuniverse = universe; - accelerator.compute(cc -> nbodyCompute(cc, cuniverse, cmass, cdelT, cespSqr)); - } - } - - glBegin(GL_QUADS()); - { - glBindTexture(GL_TEXTURE_2D(), textureBuf.get(JAVA_INT, particle.idx * JAVA_INT.byteSize())); - float dx = -.5f; - float dy = -.5f; - float dz = -.5f; - - for (int i = 0; i < count; i++) { - float x=0,y=0,z=0; - switch (mode){ - case OpenCL4 ,OpenCL -> { - x = xyzPosSeg.get(C_FLOAT, (i * STRIDE * C_FLOAT.byteSize()) + (Xidx * C_FLOAT.byteSize())); - y = xyzPosSeg.get(C_FLOAT, (i * STRIDE * C_FLOAT.byteSize()) + (Yidx * C_FLOAT.byteSize())); - z = xyzPosSeg.get(C_FLOAT, (i * STRIDE * C_FLOAT.byteSize()) + (Zidx * C_FLOAT.byteSize())); - } - case JavaMT, JavaSeq -> { - x = xyzPos[(i * STRIDE) + Xidx]; - y = xyzPos[(i * STRIDE) + Yidx]; - z = xyzPos[(i * STRIDE) + Zidx]; - } - case HAT ->{ - Universe.Body body = universe.body(i); - x=body.x(); - y=body.y(); - z=body.z(); - } - } - final int LEFT = 0; - final int RIGHT = 1; - final int TOP = 0; - final int BOTTOM = 1; - glTexCoord2f(LEFT, BOTTOM); - glVertex3f(x + dx + LEFT, y + dy + BOTTOM, z + dz); - glTexCoord2f(LEFT, TOP); - glVertex3f(x + dx + LEFT, y + dy + TOP, z + dz); - glTexCoord2f(RIGHT, TOP); - glVertex3f(x + dx + RIGHT, y + dy + TOP, z + dz); - glTexCoord2f(RIGHT, BOTTOM); - glVertex3f(x + dx + RIGHT, y + dy + BOTTOM, z + dz); - } - } - glEnd(); - glColor3f(0.8f, 0.1f, 0.1f); - glPopMatrix(); - glutSwapBuffers(); - frames++; - long elapsed = System.currentTimeMillis() - startTime; - if (elapsed > 200 || (frames % 100) == 0) { - float secs = elapsed / 1000f; - // System.out.println((frames / secs) + "fps"); - } - } - - void onIdle() { - rot += 1f; - super.onIdle(); - } - } - - public void main(String[] args) { - int particleCount = 32768; - NBody.Mode mode = NBody.Mode.HAT;//NBody.Mode.OpenCL4;//NBody.Mode.of("HAT", NBody.Mode.OpenCL); - System.out.println("mode" + mode); - try (var arena = Arena.ofConfined()) { - var particleTexture = new GLWrap.GLTexture(arena, NBody.class.getResourceAsStream("/particle.png")); - new NBody(arena, 1000, 1000, particleTexture, particleCount, mode).mainLoop(); - } - } -} - diff --git a/hat/examples/nbody/src/main/java/nbody/NBodyGLWindow.java b/hat/examples/nbody/src/main/java/nbody/NBodyGLWindow.java index 192e15f0108..7391c9cfd96 100644 --- a/hat/examples/nbody/src/main/java/nbody/NBodyGLWindow.java +++ b/hat/examples/nbody/src/main/java/nbody/NBodyGLWindow.java @@ -62,13 +62,13 @@ public class NBodyGLWindow extends GLWindow { protected final float mass = .5f; - final GLTexture particle; + protected final GLTexture particle; protected final Wrap.Float4Arr xyzPosFloatArr; protected final Wrap.Float4Arr xyzVelFloatArr; protected int bodyCount; protected int frameCount = 0; - final long startTime = System.currentTimeMillis(); + protected final long startTime = System.currentTimeMillis(); protected final Mode mode; @@ -170,13 +170,13 @@ protected void moveBodies() { } } - static final float WEST = 0; - static final float EAST = 1; - static final float NORTH = 0; - static final float SOUTH = 1; - static float dx = -.5f; - static float dy = -.5f; - static float dz = -.5f; + protected static final float WEST = 0; + protected static final float EAST = 1; + protected static final float NORTH = 0; + protected static final float SOUTH = 1; + protected static float dx = -.5f; + protected static float dy = -.5f; + protected static float dz = -.5f; @@ -193,9 +193,8 @@ public void display() { glScalef(.01f, .01f, .01f); glColor3f(1f, 1f, 1f); glQuads(() -> { - - for (int bodyIdx = 0; bodyIdx < bodyCount; bodyIdx++) { - var bodyf4 = xyzPosFloatArr.get(bodyIdx); + for (int bodyIdx = 0; bodyIdx < bodyCount; bodyIdx++) { + var bodyf4 = xyzPosFloatArr.get(bodyIdx); /* * Textures are mapped to a quad by defining the vertices in @@ -210,15 +209,16 @@ public void display() { * Ideally we need to rotate this to point to the camera (see billboarding) */ - glTexCoord2f(WEST, SOUTH); - glVertex3f(bodyf4.x() + WEST + dx, bodyf4.y() + SOUTH + dy, bodyf4.z() + dz); - glTexCoord2f(WEST, NORTH); - glVertex3f(bodyf4.x() + WEST + dx, bodyf4.y() + NORTH + dy, bodyf4.z() + dz); - glTexCoord2f(EAST, NORTH); - glVertex3f(bodyf4.x() + EAST + dx, bodyf4.y() + NORTH + dy, bodyf4.z() + dz); - glTexCoord2f(EAST, SOUTH); - glVertex3f(bodyf4.x() + EAST + dx, bodyf4.y() + SOUTH + dy, bodyf4.z() + dz); - } + glTexCoord2f(WEST, SOUTH); + glVertex3f(bodyf4.x() + WEST + dx, bodyf4.y() + SOUTH + dy, bodyf4.z() + dz); + glTexCoord2f(WEST, NORTH); + glVertex3f(bodyf4.x() + WEST + dx, bodyf4.y() + NORTH + dy, bodyf4.z() + dz); + glTexCoord2f(EAST, NORTH); + glVertex3f(bodyf4.x() + EAST + dx, bodyf4.y() + NORTH + dy, bodyf4.z() + dz); + glTexCoord2f(EAST, SOUTH); + glVertex3f(bodyf4.x() + EAST + dx, bodyf4.y() + SOUTH + dy, bodyf4.z() + dz); + +} }); }); @@ -254,10 +254,11 @@ public void onIdle() { public enum Mode { - OpenCL, Cuda, OpenCL4, Cuda4, JavaSeq, JavaMT, JavaSeq4, JavaMT4; + HAT,OpenCL, Cuda, OpenCL4, Cuda4, JavaSeq, JavaMT, JavaSeq4, JavaMT4; public static Mode of(String s) { return switch (s) { + case "HAT" -> Mode.HAT; case "OpenCL" -> Mode.OpenCL; case "Cuda" -> Mode.Cuda; case "JavaSeq" -> Mode.JavaSeq; @@ -272,7 +273,7 @@ public static Mode of(String s) { } public static void main(String[] args) throws IOException { int particleCount = args.length > 2 ? Integer.parseInt(args[2]) : 32768/2/2; - Mode mode = Mode.of(args.length>3?args[3]: Mode.JavaMT.toString()); + Mode mode = Mode.of(args.length>3?args[3]: Mode.HAT.toString()); System.out.println("mode" + mode); try (var arena = mode.equals(Mode.JavaMT)||mode.equals(Mode.JavaMT4) ? Arena.ofShared() : Arena.ofConfined()) { var particleTexture = new GLTexture(arena, NBodyGLWindow.class.getResourceAsStream("/particle.png")); diff --git a/hat/examples/nbody/src/main/java/nbody/opencl/NBody.java b/hat/examples/nbody/src/main/java/nbody/opencl/NBody.java index 7bad130181b..12fa7012d0c 100644 --- a/hat/examples/nbody/src/main/java/nbody/opencl/NBody.java +++ b/hat/examples/nbody/src/main/java/nbody/opencl/NBody.java @@ -25,6 +25,34 @@ package nbody.opencl; +import hat.Accelerator; +import hat.ComputeContext; +import hat.backend.ffi.OpenCLBackend; +import hat.buffer.Buffer; +import hat.KernelContext; +import hat.ifacemapper.MappableIface; +import static hat.ifacemapper.MappableIface.*; +import static opengl.opengl_h.glMatrixMode; +import static opengl.opengl_h.glRasterPos2f; +import static opengl.opengl_h.glScalef; +import static opengl.opengl_h.glTexCoord2f; +import static opengl.opengl_h.glVertex3f; +import static opengl.opengl_h.glutBitmapCharacter; +import static opengl.opengl_h.glutBitmapTimesRoman24$segment; +import static opengl.opengl_h.glutSwapBuffers; +import static opengl.opengl_h_1.glBindTexture; +import static opengl.opengl_h_1.glClear; +import static opengl.opengl_h_1.glClearColor; +import static opengl.opengl_h_1.glColor3f; +import static opengl.opengl_h_1.glDisable; +import static opengl.opengl_h_1.glEnable; +import static opengl.opengl_h_2.GL_COLOR_BUFFER_BIT; +import static opengl.opengl_h_2.GL_DEPTH_BUFFER_BIT; +import static opengl.opengl_h_2.GL_MODELVIEW; +import static opengl.opengl_h_2.GL_TEXTURE_2D; + +import hat.ifacemapper.Schema; +import jdk.incubator.code.CodeReflection; import nbody.NBodyGLWindow; import wrap.clwrap.CLPlatform; import wrap.clwrap.CLWrapComputeContext; @@ -32,109 +60,325 @@ import java.io.IOException; import java.lang.foreign.Arena; +import java.lang.invoke.MethodHandles; public class NBody { public static class CLNBodyGLWindow extends NBodyGLWindow { + public interface Universe extends Buffer { + int length(); + + interface Body extends Struct { + float x(); + + float y(); + + float z(); + + float vx(); + + float vy(); + + float vz(); + + void x(float x); + + void y(float y); + + void z(float z); + + void vx(float vx); + + void vy(float vy); + + void vz(float vz); + } + + Body body(long idx); + /* + typedef Body_s{ + float x; + float y; + float vx; + float vy; + } Body_t; + + typedef Universe_s{ + int length; + Body body[1]; + }Universe_t; + + */ + Schema<Universe> schema = Schema.of(Universe.class, resultTable -> resultTable + + .arrayLen("length").array("body", array -> array + .fields("x", "y", "z", "vx", "vy", "vz") + ) + ); + + static Universe create(Accelerator accelerator, int length) { + return schema.allocate(accelerator, length); + } + + } + + + @CodeReflection + static public void nbodyKernel(@RO KernelContext kc, @RW Universe universe, float mass, float delT, float espSqr) { + float accx = 0.0f; + float accy = 0.0f; + float accz = 0.0f; + Universe.Body me = universe.body(kc.x); + + for (int i = 0; i < kc.maxX; i++) { + Universe.Body body = universe.body(i); + float dx = body.x() - me.x(); + float dy = body.y() - me.y(); + float dz = body.z() - me.z(); + float invDist = (float) (1.0f / Math.sqrt(((dx * dx) + (dy * dy) + (dz * dz) + espSqr))); + float s = mass * invDist * invDist * invDist; + accx = accx + (s * dx); + accy = accy + (s * dy); + accz = accz + (s * dz); + } + accx = accx * delT; + accy = accy * delT; + accz = accz * delT; + me.x(me.x() + (me.vx() * delT + accx * .5f * delT)); + me.y(me.y() + (me.vy() * delT + accy * .5f * delT)); + me.z(me.z() + (me.vz() * delT + accz * .5f * delT)); + me.vx(me.vx() + accx); + me.vy(me.vy() + accy); + me.vz(me.vz() + accz); + } + + @CodeReflection + public static void nbodyCompute(@RO ComputeContext cc, @RW Universe universe, float mass, float delT, float espSqr) { + float cmass = mass; + float cdelT = delT; + float cespSqr= espSqr; + + cc.dispatchKernel(universe.length(), kc -> nbodyKernel(kc, universe, cmass, cdelT, cespSqr)); + } + + final CLPlatform.CLDevice.CLContext.CLProgram.CLKernel kernel; - final CLWrapComputeContext CLWrapComputeContext; + final CLWrapComputeContext clWrapComputeContext; final CLWrapComputeContext.MemorySegmentState vel; final CLWrapComputeContext.MemorySegmentState pos; - - +final Accelerator accelerator; +final Universe universe; public CLNBodyGLWindow( Arena arena, int width, int height, GLTexture particle, int bodyCount, Mode mode) { super( arena, width, height, particle, bodyCount, mode); - this.CLWrapComputeContext = new CLWrapComputeContext(arena, 20); - this.vel = CLWrapComputeContext.register(xyzVelFloatArr.ptr()); - this.pos = CLWrapComputeContext.register(xyzPosFloatArr.ptr()); - - var platforms = CLPlatform.platforms(arena); - System.out.println("platforms " + platforms.size()); - var platform = platforms.get(0); - platform.devices.forEach(device -> { + final float maxDist = 80f; + if (mode.equals(Mode.HAT)){ + kernel = null; + clWrapComputeContext = null; + vel=null; + pos=null; + accelerator = new Accelerator(MethodHandles.lookup(), + new OpenCLBackend(OpenCLBackend.Mode.of("GPU")) + ); + universe = Universe.create(accelerator, bodyCount); + for (int body = 0; body < bodyCount; body++) { + Universe.Body b = universe.body(body); + final float theta = (float) (Math.random() * Math.PI * 2); + final float phi = (float) (Math.random() * Math.PI * 2); + final float radius = (float) (Math.random() * maxDist); + + // get random 3D coordinates in sphere + b.x((float) (radius * Math.cos(theta) * Math.sin(phi))); + b.y((float) (radius * Math.sin(theta) * Math.sin(phi))); + b.z((float) (radius * Math.cos(phi))); + } + + + }else { + this.universe = null; + this.accelerator=null; + this.clWrapComputeContext = new CLWrapComputeContext(arena, 20); + this.vel = clWrapComputeContext.register(xyzVelFloatArr.ptr()); + this.pos = clWrapComputeContext.register(xyzPosFloatArr.ptr()); + + var platforms = CLPlatform.platforms(arena); + System.out.println("platforms " + platforms.size()); + var platform = platforms.get(0); + platform.devices.forEach(device -> { + System.out.println(" Compute Units " + device.computeUnits()); + System.out.println(" Device Name " + device.deviceName()); + System.out.println(" Device Vendor " + device.deviceVendor()); + System.out.println(" Built In Kernels " + device.builtInKernels()); + }); + var device = platform.devices.get(0); System.out.println(" Compute Units " + device.computeUnits()); System.out.println(" Device Name " + device.deviceName()); System.out.println(" Device Vendor " + device.deviceVendor()); + System.out.println(" Built In Kernels " + device.builtInKernels()); - }); - var device = platform.devices.get(0); - System.out.println(" Compute Units " + device.computeUnits()); - System.out.println(" Device Name " + device.deviceName()); - System.out.println(" Device Vendor " + device.deviceVendor()); - - System.out.println(" Built In Kernels " + device.builtInKernels()); - var context = device.createContext(); - String code = switch (mode) { - case Mode.OpenCL -> """ - __kernel void nbody( __global float *xyzPos ,__global float* xyzVel, float mass, float delT, float espSqr ){ - int body = get_global_id(0); - int STRIDE=4; - int Xidx=0; - int Yidx=1; - int Zidx=2; - int bodyStride = body*STRIDE; - int bodyStrideX = bodyStride+Xidx; - int bodyStrideY = bodyStride+Yidx; - int bodyStrideZ = bodyStride+Zidx; - - float accx = 0.0; - float accy = 0.0; - float accz = 0.0; - float myPosx = xyzPos[bodyStrideX]; - float myPosy = xyzPos[bodyStrideY]; - float myPosz = xyzPos[bodyStrideZ]; - for (int i = 0; i < get_global_size(0); i++) { - int iStride = i*STRIDE; - float dx = xyzPos[iStride+Xidx] - myPosx; - float dy = xyzPos[iStride+Yidx] - myPosy; - float dz = xyzPos[iStride+Zidx] - myPosz; - float invDist = (float) 1.0/sqrt((float)((dx * dx) + (dy * dy) + (dz * dz) + espSqr)); - float s = mass * invDist * invDist * invDist; - accx = accx + (s * dx); - accy = accy + (s * dy); - accz = accz + (s * dz); + var context = device.createContext(); + String code = switch (mode) { + case Mode.OpenCL -> """ + __kernel void nbody( __global float *xyzPos ,__global float* xyzVel, float mass, float delT, float espSqr ){ + int body = get_global_id(0); + int STRIDE=4; + int Xidx=0; + int Yidx=1; + int Zidx=2; + int bodyStride = body*STRIDE; + int bodyStrideX = bodyStride+Xidx; + int bodyStrideY = bodyStride+Yidx; + int bodyStrideZ = bodyStride+Zidx; + + float accx = 0.0; + float accy = 0.0; + float accz = 0.0; + float myPosx = xyzPos[bodyStrideX]; + float myPosy = xyzPos[bodyStrideY]; + float myPosz = xyzPos[bodyStrideZ]; + for (int i = 0; i < get_global_size(0); i++) { + int iStride = i*STRIDE; + float dx = xyzPos[iStride+Xidx] - myPosx; + float dy = xyzPos[iStride+Yidx] - myPosy; + float dz = xyzPos[iStride+Zidx] - myPosz; + float invDist = (float) 1.0/sqrt((float)((dx * dx) + (dy * dy) + (dz * dz) + espSqr)); + float s = mass * invDist * invDist * invDist; + accx = accx + (s * dx); + accy = accy + (s * dy); + accz = accz + (s * dz); + } + accx = accx * delT; + accy = accy * delT; + accz = accz * delT; + xyzPos[bodyStrideX] = myPosx + (xyzVel[bodyStrideX] * delT) + (accx * 0.5 * delT); + xyzPos[bodyStrideY] = myPosy + (xyzVel[bodyStrideY] * delT) + (accy * 0.5 * delT); + xyzPos[bodyStrideZ] = myPosz + (xyzVel[bodyStrideZ] * delT) + (accz * 0.5 * delT); + + xyzVel[bodyStrideX] = xyzVel[bodyStrideX] + accx; + xyzVel[bodyStrideY] = xyzVel[bodyStrideY] + accy; + xyzVel[bodyStrideZ] = xyzVel[bodyStrideZ] + accz; + } - accx = accx * delT; - accy = accy * delT; - accz = accz * delT; - xyzPos[bodyStrideX] = myPosx + (xyzVel[bodyStrideX] * delT) + (accx * 0.5 * delT); - xyzPos[bodyStrideY] = myPosy + (xyzVel[bodyStrideY] * delT) + (accy * 0.5 * delT); - xyzPos[bodyStrideZ] = myPosz + (xyzVel[bodyStrideZ] * delT) + (accz * 0.5 * delT); + """; + /* case Mode.OpenCL4 -> """ + __kernel void nbody( __global float4 *xyzPos ,__global float4* xyzVel, float mass, float delT, float espSqr ){ + float4 acc = (0.0,0.0,0.0,0.0); + float4 myPos = xyzPos[get_global_id(0)]; + float4 myVel = xyzVel[get_global_id(0)]; + for (int i = 0; i < get_global_size(0); i++) { + float4 delta = xyzPos[i] - myPos; + float invDist = (float) 1.0/sqrt((float)((delta.x * delta.x) + (delta.y * delta.y) + (delta.z * delta.z) + espSqr)); + float s = mass * invDist * invDist * invDist; + acc= acc + (s * delta); + } + acc = acc*delT; + myPos = myPos + (myVel * delT) + (acc * delT)/2; + myVel = myVel + acc; + xyzPos[get_global_id(0)] = myPos; + xyzVel[get_global_id(0)] = myVel; - xyzVel[bodyStrideX] = xyzVel[bodyStrideX] + accx; - xyzVel[bodyStrideY] = xyzVel[bodyStrideY] + accy; - xyzVel[bodyStrideZ] = xyzVel[bodyStrideZ] + accz; + } + """;*/ + case Mode.OpenCL4 -> """ + __kernel void nbody( __global float4 *xyzPos ,__global float4* xyzVel, float mass, float delT, float espSqr ){ + float4 acc = (0.0,0.0,0.0,0.0); + float4 myPos = xyzPos[get_global_id(0)]; + float4 myVel = xyzVel[get_global_id(0)]; + for (int i = 0; i < get_global_size(0); i++) { + float4 delta = xyzPos[i] - myPos; + float invDist = (float) 1.0/sqrt((float)((delta.x * delta.x) + (delta.y * delta.y) + (delta.z * delta.z) + espSqr)); + float s = mass * invDist * invDist * invDist; + acc= acc + (s * delta); + } + acc = acc*delT; + myPos = myPos + (myVel * delT) + (acc * delT)/2; + myVel = myVel + acc; + xyzPos[get_global_id(0)] = myPos; + xyzVel[get_global_id(0)] = myVel; - } - """; - case Mode.OpenCL4 -> """ - __kernel void nbody( __global float4 *xyzPos ,__global float4* xyzVel, float mass, float delT, float espSqr ){ - float4 acc = (0.0,0.0,0.0,0.0); - float4 myPos = xyzPos[get_global_id(0)]; - float4 myVel = xyzVel[get_global_id(0)]; - for (int i = 0; i < get_global_size(0); i++) { - float4 delta = xyzPos[i] - myPos; - float invDist = (float) 1.0/sqrt((float)((delta.x * delta.x) + (delta.y * delta.y) + (delta.z * delta.z) + espSqr)); - float s = mass * invDist * invDist * invDist; - acc= acc + (s * delta); } - acc = acc*delT; - myPos = myPos + (myVel * delT) + (acc * delT)/2; - myVel = myVel + acc; - xyzPos[get_global_id(0)] = myPos; - xyzVel[get_global_id(0)] = myVel; + """; + default -> throw new IllegalStateException(); + }; + var program = context.buildProgram(code); + kernel = program.getKernel("nbody"); + } + } + @Override + public void display() { + if (mode.equals(Mode.HAT)) { + moveBodies(); + glClearColor(0f, 0f, 0f, 0f); + glClear(GL_COLOR_BUFFER_BIT() | GL_DEPTH_BUFFER_BIT()); + glEnable(GL_TEXTURE_2D()); // Annoyingly important, + glBindTexture(GL_TEXTURE_2D(), textureBuf.get(particle.idx)); + + glPushMatrix1(() -> { + glScalef(.01f, .01f, .01f); + glColor3f(1f, 1f, 1f); + glQuads(() -> { + for (int bodyIdx = 0; bodyIdx < bodyCount; bodyIdx++) { + var bodyf4 = universe.body(bodyIdx);//xyzPosFloatArr.get(bodyIdx); + + /* + * Textures are mapped to a quad by defining the vertices in + * the order SW,NW,NE,SE + & + * 2--->3 + * ^ | + * | v + * 1 4 + * + * Here we are describing the 'texture plane' for the body. + * Ideally we need to rotate this to point to the camera (see billboarding) + */ + + glTexCoord2f(WEST, SOUTH); + glVertex3f(bodyf4.x() + WEST + dx, bodyf4.y() + SOUTH + dy, bodyf4.z() + dz); + glTexCoord2f(WEST, NORTH); + glVertex3f(bodyf4.x() + WEST + dx, bodyf4.y() + NORTH + dy, bodyf4.z() + dz); + glTexCoord2f(EAST, NORTH); + glVertex3f(bodyf4.x() + EAST + dx, bodyf4.y() + NORTH + dy, bodyf4.z() + dz); + glTexCoord2f(EAST, SOUTH); + glVertex3f(bodyf4.x() + EAST + dx, bodyf4.y() + SOUTH + dy, bodyf4.z() + dz); } - """; - default -> throw new IllegalStateException(); - }; - var program = context.buildProgram(code); - kernel = program.getKernel("nbody"); + }); + }); + + glDisable(GL_TEXTURE_2D()); // Annoyingly important .. took two days to work that out + //glUseProgram(0); + glMatrixMode(GL_MODELVIEW()); + glPushMatrix1(() -> { + glColor3f(0.0f, 1.0f, 0.0f); + var font = glutBitmapTimesRoman24$segment(); + long elapsed = System.currentTimeMillis() - startTime; + float secs = elapsed / 1000f; + var FPS = "Mode: "+mode.toString()+" Bodies "+bodyCount+" FPS: "+((frameCount / secs)); + // System.out.print(" gw "+glutGet(GLUT_SCREEN_WIDTH())+" gh "+glutGet(GLUT_SCREEN_HEIGHT())); + // System.out.print(" a "+aspect+",s "+size); + // System.out.println(" w "+width+" h"+height); + + glRasterPos2f(-.8f, .7f); + for (int c : FPS.getBytes()) { + glutBitmapCharacter(font, c); + } + }); + glutSwapBuffers(); + frameCount++; + }else{ + super.display(); + } } @Override protected void moveBodies() { - if (mode.equals(Mode.OpenCL4) || mode.equals(Mode.OpenCL)) { + if (mode.equals(Mode.HAT)) { + float cmass = mass; + float cdelT = delT; + float cespSqr = espSqr; + Universe cuniverse = universe; + accelerator.compute(cc -> nbodyCompute(cc, cuniverse, cmass, cdelT, cespSqr)); + }else if (mode.equals(Mode.OpenCL4) || mode.equals(Mode.OpenCL)) { if (frameCount == 0) { vel.copyToDevice = true; pos.copyToDevice = true; @@ -145,7 +389,7 @@ protected void moveBodies() { vel.copyFromDevice = false; pos.copyFromDevice = true; - kernel.run(CLWrapComputeContext, bodyCount, pos, vel, mass, delT, espSqr); + kernel.run(clWrapComputeContext, bodyCount, pos, vel, mass, delT, espSqr); } else { super.moveBodies(); } @@ -154,7 +398,7 @@ protected void moveBodies() { public static void main(String[] args) throws IOException { int particleCount = args.length > 2 ? Integer.parseInt(args[2]) : 32768; - NBodyGLWindow.Mode mode = NBodyGLWindow.Mode.of(args.length > 3 ? args[3] : NBodyGLWindow.Mode.OpenCL.toString()); + NBodyGLWindow.Mode mode = NBodyGLWindow.Mode.of(args.length > 3 ? args[3] : NBodyGLWindow.Mode.OpenCL4.toString()); System.out.println("mode" + mode); try (var arena = mode.equals(NBodyGLWindow.Mode.JavaMT4) || mode.equals(NBodyGLWindow.Mode.JavaMT) ? Arena.ofShared() : Arena.ofConfined()) { var particleTexture = new GLTexture(arena, NBody.class.getResourceAsStream("/particle.png")); diff --git a/hat/hat/src/main/java/hat/buffer/Buffer.java b/hat/hat/src/main/java/hat/buffer/Buffer.java index dddefb832ee..07fe1c81432 100644 --- a/hat/hat/src/main/java/hat/buffer/Buffer.java +++ b/hat/hat/src/main/java/hat/buffer/Buffer.java @@ -49,7 +49,7 @@ default void clearDeviceDirty(){ } default void setHostDirty(){ - SegmentMapper.BufferState.of(this).setHostDirty(); + SegmentMapper.BufferState.of(this).setHostDirty(true); } interface Union extends MappableIface { diff --git a/hat/hat/src/main/java/hat/ifacemapper/SegmentMapper.java b/hat/hat/src/main/java/hat/ifacemapper/SegmentMapper.java index 532e86dbce8..a233051e642 100644 --- a/hat/hat/src/main/java/hat/ifacemapper/SegmentMapper.java +++ b/hat/hat/src/main/java/hat/ifacemapper/SegmentMapper.java @@ -402,6 +402,10 @@ static long byteSize(){ MemoryLayout.PathElement.groupElement("magic2") ); + static final VarHandle vendorPtr = stateMemoryLayout.varHandle( + MemoryLayout.PathElement.groupElement("vendorPtr") + ); + public static long getLayoutSizeAfterPadding(GroupLayout layout) { return layout.byteSize() + ((layout.byteSize() % BufferState.alignment) == 0 ? 0 : BufferState.alignment - (layout.byteSize() % BufferState.alignment)); @@ -424,50 +428,79 @@ public BufferState assignBits(int bits) { BufferState.bits.set(segment, paddedSize, bits); return this; } - public BufferState orBits(int bits) { - BufferState.bits.set(segment, paddedSize, getBits()|bits); + public BufferState and(int bitz) { + BufferState.bits.set(segment, paddedSize, getBits()&bitz); + return this; + } + public BufferState or(int bitz) { + BufferState.bits.set(segment, paddedSize, getBits()|bitz); return this; } - public BufferState resetBits(int bits) { - int bitz = getBits(); // say bits = 0b0111 (7) and bitz = 0b0100 (4) - int xored = bits^bitz; // xored = 0b0011 (3) - BufferState.bits.set(segment, paddedSize, xored); + + public BufferState xor(int bitz) { + // if getBits() = 0b0111 (7) and bitz = 0b0100 (4) xored = 0x0011 3 + // if getBits() = 0b0011 (3) and bitz = 0b0100 (4) xored = 0x0111 7 + BufferState.bits.set(segment, paddedSize, getBits()^bitz); return this; } + public BufferState andNot(int bitz) { + // if getBits() = 0b0111 (7) and bitz = 0b0100 (4) andNot = 0b0111 & 0b1011 = 0x0011 3 + // if getBits() = 0b0011 (3) and bitz = 0b0100 (4) andNot = 0b0011 & 0b1011 = 0x0011 3 + BufferState.bits.set(segment, paddedSize, getBits()&~bitz); + return this; + } + + public int getBits() { return (Integer) BufferState.bits.get(segment, paddedSize); } - public boolean testAllBitsAreSet(int bits) { - return (getBits()&bits)==bits; + public MemorySegment getVendorPtr(){return (MemorySegment) BufferState.vendorPtr.get(segment, paddedSize);} + public void setVendorPtr(MemorySegment vendorPtr){BufferState.vendorPtr.set(segment, paddedSize,vendorPtr);} + public boolean all(int bitz) { + return (getBits()&bitz)==bitz; } - public boolean testAnyBitsAreSet(int bits) { - return (getBits()&bits)!=0; + public boolean any(int bitz) { + return (getBits()&bitz)!=0; + } + public BufferState setHostDirty(boolean dirty) { + if (dirty){ + or(BIT_HOST_DIRTY); + }else{ + andNot(BIT_HOST_DIRTY); + } + return this; + } + public BufferState setDeviceDirty(boolean dirty) { + if (dirty){ + or(BIT_DEVICE_DIRTY); + }else{ + andNot(BIT_DEVICE_DIRTY); // this is wrong we want bits&=!BIT_DEVICE_DIRTY + } + return this; } - public boolean isHostNew() { - return testAllBitsAreSet(BIT_HOST_NEW); + return all(BIT_HOST_NEW); } public boolean isHostDirty() { - return testAllBitsAreSet(BIT_HOST_DIRTY); + return all(BIT_HOST_DIRTY); } public boolean isHostNewOrDirty() { - return testAllBitsAreSet(BIT_HOST_NEW|BIT_HOST_DIRTY); + return all(BIT_HOST_NEW|BIT_HOST_DIRTY); } public boolean isDeviceDirty() { - return testAllBitsAreSet(BIT_DEVICE_DIRTY); + return all(BIT_DEVICE_DIRTY); } public BufferState clearDeviceDirty() { - return resetBits(BIT_DEVICE_DIRTY); + return xor(BIT_DEVICE_DIRTY); } public BufferState resetHostDirty() { - return resetBits(BIT_HOST_DIRTY); + return xor(BIT_HOST_DIRTY); } public BufferState resetHostNew() { - return resetBits(BIT_HOST_NEW); + return xor(BIT_HOST_NEW); } - public long magic1() { return (Long) BufferState.magic1.get(segment, paddedSize); } @@ -492,15 +525,17 @@ public String toString() { if (ok()){ builder.append("State:ok").append("\n"); builder.append("State:Bits:").append(paddedString(getBits())); - if (testAllBitsAreSet(BIT_HOST_DIRTY)){ + if (all(BIT_HOST_DIRTY)){ builder.append(",").append("HOST_DIRTY"); } - if (testAllBitsAreSet(BIT_DEVICE_DIRTY)){ + if (all(BIT_DEVICE_DIRTY)){ builder.append(",").append("DEVICE_DIRTY"); } - if (testAllBitsAreSet(BIT_HOST_NEW)){ + if (all(BIT_HOST_NEW)){ builder.append(",").append("HOST_NEW"); } + var vendorPtr = getVendorPtr(); + builder.append(",").append("VENDOR_PTR:").append(Long.toHexString(vendorPtr.address())); builder.append("\n"); @@ -510,8 +545,7 @@ public String toString() { return builder.toString(); } - public void setHostDirty() { - } + } default T allocate(Arena arena, BoundSchema<?> boundSchema) { diff --git a/hat/intellij/.idea/misc.xml b/hat/intellij/.idea/misc.xml index ab873a552b4..d71f747b7ca 100644 --- a/hat/intellij/.idea/misc.xml +++ b/hat/intellij/.idea/misc.xml @@ -1,6 +1,6 @@ <?xml version="1.0" encoding="UTF-8"?> <project version="4"> - <component name="ProjectRootManager" version="2" languageLevel="JDK_X" project-jdk-name="24-ea" project-jdk-type="JavaSDK"> + <component name="ProjectRootManager" version="2" languageLevel="JDK_X" project-jdk-name="24-ea (2)" project-jdk-type="JavaSDK"> <output url="file://$PROJECT_DIR$/out" /> </component> </project> \ No newline at end of file diff --git a/hat/intellij/clwrap.iml b/hat/intellij/clwrap.iml index c1c0d6000d8..c29873b296f 100644 --- a/hat/intellij/clwrap.iml +++ b/hat/intellij/clwrap.iml @@ -18,5 +18,6 @@ <SOURCES /> </library> </orderEntry> + <orderEntry type="module" module-name="hat" /> </component> </module> \ No newline at end of file diff --git a/hat/intellij/life.iml b/hat/intellij/life.iml index df6f3db7360..9780d9b96fa 100644 --- a/hat/intellij/life.iml +++ b/hat/intellij/life.iml @@ -14,5 +14,6 @@ <orderEntry type="module" module-name="backend_spirv" /> <orderEntry type="module" module-name="wrap" /> <orderEntry type="module" module-name="clwrap" /> + <orderEntry type="module" module-name="backend_ffi_opencl" /> </component> </module> \ No newline at end of file diff --git a/hat/wrap/clwrap/src/main/java/wrap/clwrap/CLPlatform.java b/hat/wrap/clwrap/src/main/java/wrap/clwrap/CLPlatform.java index 4a7eb1f2b96..b4d3cfda0cb 100644 --- a/hat/wrap/clwrap/src/main/java/wrap/clwrap/CLPlatform.java +++ b/hat/wrap/clwrap/src/main/java/wrap/clwrap/CLPlatform.java @@ -24,6 +24,8 @@ */ package wrap.clwrap; +import hat.buffer.Buffer; +import hat.ifacemapper.SegmentMapper; import opencl.opencl_h; import wrap.ArenaHolder; import wrap.Wrap; @@ -34,18 +36,27 @@ import java.util.List; import static java.lang.foreign.MemorySegment.NULL; -import static opencl.opencl_h.*; +import static opencl.opencl_h.CL_DEVICE_BUILT_IN_KERNELS; +import static opencl.opencl_h.CL_DEVICE_MAX_COMPUTE_UNITS; +import static opencl.opencl_h.CL_DEVICE_NAME; +import static opencl.opencl_h.CL_DEVICE_TYPE_ALL; +import static opencl.opencl_h.CL_DEVICE_VENDOR; +import static opencl.opencl_h.CL_MEM_READ_WRITE; +import static opencl.opencl_h.CL_MEM_USE_HOST_PTR; +import static opencl.opencl_h.CL_PROGRAM_BUILD_LOG; +import static opencl.opencl_h.CL_QUEUE_PROFILING_ENABLE; +import static opencl.opencl_h.CL_SUCCESS; // https://streamhpc.com/blog/2013-04-28/opencl-error-codes/ public class CLPlatform implements ArenaHolder { public static List<CLPlatform> platforms(Arena arena) { var arenaWrapper = ArenaHolder.wrap(arena); List<CLPlatform> platforms = new ArrayList<>(); - var platformc = arenaWrapper.intPtr( 0); + var platformc = arenaWrapper.intPtr(0); if ((opencl_h.clGetPlatformIDs(0, NULL, platformc.ptr())) != CL_SUCCESS()) { System.out.println("Failed to get opencl platforms"); } else { - var platformIds = arenaWrapper.ptrArr( platformc.get()); + var platformIds = arenaWrapper.ptrArr(platformc.get()); if ((opencl_h.clGetPlatformIDs(platformc.get(), platformIds.ptr(), NULL)) != CL_SUCCESS()) { System.out.println("Failed getting platform ids"); } else { @@ -89,6 +100,7 @@ public int computeUnits() { public String deviceName() { return strDeviceInfo(CL_DEVICE_NAME()); } + public String deviceVendor() { return strDeviceInfo(CL_DEVICE_VENDOR()); } @@ -194,7 +206,7 @@ public void run(CLWrapComputeContext clWrapComputeContext, int range, Object... for (int i = 0; i < args.length; i++) { if (args[i] instanceof CLWrapComputeContext.MemorySegmentState memorySegmentState) { if (memorySegmentState.clMemPtr == null) { - memorySegmentState.clMemPtr = CLWrapComputeContext.ClMemPtr.of(arena(),opencl_h.clCreateBuffer(program.context.context, + memorySegmentState.clMemPtr = CLWrapComputeContext.ClMemPtr.of(arena(), opencl_h.clCreateBuffer(program.context.context, CL_MEM_USE_HOST_PTR() | CL_MEM_READ_WRITE(), memorySegmentState.memorySegment.byteSize(), memorySegmentState.memorySegment, @@ -202,7 +214,7 @@ public void run(CLWrapComputeContext clWrapComputeContext, int range, Object... if (!status.isOK()) { throw new RuntimeException("failed to create memory buffer " + status.get()); } - } + } if (memorySegmentState.copyToDevice) { status.set(opencl_h.clEnqueueWriteBuffer(program.context.queue, memorySegmentState.clMemPtr.get(), @@ -223,6 +235,46 @@ public void run(CLWrapComputeContext clWrapComputeContext, int range, Object... if (!status.isOK()) { System.out.println("failed to set arg " + status); } + } else if (args[i] instanceof Buffer buffer) { + // System.out.println("Arg "+i+" is a buffer so checking if we need to write"); + SegmentMapper.BufferState bufferState = SegmentMapper.BufferState.of(buffer); + + //System.out.println("Before possible write"+ bufferState); + MemorySegment memorySegment = Buffer.getMemorySegment(buffer); + + CLWrapComputeContext.ClMemPtr clmem = clWrapComputeContext.clMemMap.computeIfAbsent(memorySegment, k -> + CLWrapComputeContext.ClMemPtr.of(arena(), opencl_h.clCreateBuffer(program.context.context, + CL_MEM_USE_HOST_PTR() | CL_MEM_READ_WRITE(), + memorySegment.byteSize(), + memorySegment, + status.ptr())) + ); + if (bufferState.isHostDirty()) { + + //System.out.println("arg " + args[i] + " isHostDirty copying in"); + status.set(opencl_h.clEnqueueWriteBuffer(program.context.queue, + clmem.get(), + clWrapComputeContext.blockInt(), + 0, + memorySegment.byteSize(), + memorySegment, + clWrapComputeContext.eventc(), + clWrapComputeContext.eventsPtr(), + clWrapComputeContext.nextEventPtrSlot() + )); + if (!status.isOK()) { + System.out.println("failed to enqueue write " + status); + } + } else { + + // System.out.println("arg "+args[i]+" is not HostDirty not copying in"); + } + // System.out.println("After possible write "+ bufferState); + status.set(opencl_h.clSetKernelArg(kernel, i, clmem.sizeof(), clmem.ptr())); + if (!status.isOK()) { + System.out.println("failed to set arg " + status); + } + } else { Wrap.Ptr ptr = switch (args[i]) { case Integer intArg -> intPtr(intArg); @@ -246,9 +298,9 @@ public void run(CLWrapComputeContext clWrapComputeContext, int range, Object... NULL, globalSize.ptr(), NULL, - clWrapComputeContext.eventc(), - clWrapComputeContext.eventsPtr(), - clWrapComputeContext.nextEventPtrSlot() + clWrapComputeContext.eventc(), + clWrapComputeContext.eventsPtr(), + clWrapComputeContext.nextEventPtrSlot() ) ); if (!status.isOK()) { @@ -276,6 +328,31 @@ public void run(CLWrapComputeContext clWrapComputeContext, int range, Object... System.out.println("failed to enqueue read " + status); } } + } else if (args[i] instanceof Buffer buffer) { + // System.out.println("Arg "+i+" is a buffer so checking if we need to read"); + SegmentMapper.BufferState bufferState = SegmentMapper.BufferState.of(buffer); + MemorySegment memorySegment = Buffer.getMemorySegment(buffer); + CLWrapComputeContext.ClMemPtr clmem = clWrapComputeContext.clMemMap.get(memorySegment); + // System.out.println("Before possible read "+ bufferState); + if (bufferState.isDeviceDirty()) { + // System.out.println("arg " + args[i] + " isDeviceDirty copying out"); + status.set(opencl_h.clEnqueueReadBuffer(program.context.queue, + clmem.get(), + clWrapComputeContext.blockInt(), + 0, + memorySegment.byteSize(), + memorySegment, + clWrapComputeContext.eventc(), + clWrapComputeContext.eventsPtr(), + clWrapComputeContext.nextEventPtrSlot() + )); + if (!status.isOK()) { + System.out.println("failed to enqueue read " + status); + } + } else { + // System.out.println("arg "+args[i]+" isnot DeviceDirty not copying out"); + } + } } // if (!computeContext.alwaysBlock) { @@ -355,7 +432,7 @@ public CLPlatform(Arena arena, MemorySegment platformId) { this.secretarena = arena; this.platformId = platformId; this.status = CLStatusPtr.of(arena()); - var devicec = intPtr( 0); + var devicec = intPtr(0); if ((status.set(opencl_h.clGetDeviceIDs(platformId, CL_DEVICE_TYPE_ALL(), 0, NULL, devicec.ptr()))) != opencl_h.CL_SUCCESS()) { System.err.println("Failed getting devicec for platform 0 "); } else { diff --git a/hat/wrap/clwrap/src/main/java/wrap/clwrap/CLWrapComputeContext.java b/hat/wrap/clwrap/src/main/java/wrap/clwrap/CLWrapComputeContext.java index 8937c137f3d..13f5c4e8394 100644 --- a/hat/wrap/clwrap/src/main/java/wrap/clwrap/CLWrapComputeContext.java +++ b/hat/wrap/clwrap/src/main/java/wrap/clwrap/CLWrapComputeContext.java @@ -92,6 +92,7 @@ public record ClMemPtr(MemorySegment ptr) implements Wrap.Ptr { public static ClMemPtr of(Arena arena, MemorySegment clmem){ return new ClMemPtr(arena.allocateFrom(AddressLayout.ADDRESS,clmem)); } + MemorySegment get(){ return ptr.get(ValueLayout.ADDRESS,0); } @@ -128,4 +129,6 @@ public CLWrapComputeContext(Arena arena, int maxEvents) { } private final Map<MemorySegment, MemorySegmentState> memorySegmentToStateMap = new HashMap<>(); + public final Map<MemorySegment, CLWrapComputeContext.ClMemPtr> clMemMap = new HashMap<>(); + } diff --git a/hat/wrap/wrap/src/main/java/wrap/Sequence.java b/hat/wrap/wrap/src/main/java/wrap/Sequence.java index 2deaea9dbbd..16a69b71faa 100644 --- a/hat/wrap/wrap/src/main/java/wrap/Sequence.java +++ b/hat/wrap/wrap/src/main/java/wrap/Sequence.java @@ -51,36 +51,36 @@ public static Sequence of(MemorySegment memorySegment, MemoryLayout memoryLayout } - public Object get(int idx) { - return varHandle.get(memorySegment, 0, (long) idx); + public Object get(long idx) { + return varHandle.get(memorySegment, 0, idx); } - public byte i8(int idx) { + public byte i8(long idx) { return (byte) get(idx); } - public short i16(int idx) { + public short i16(long idx ) { return (short) get(idx); } - public int i32(int idx) { + public int i32(long idx ) { return (int) get(idx); } - public long i64(int idx) { + public long i64(long idx ) { return (long) get(idx); } - public float f32(int idx) { + public float f32(long idx ) { return (float) get(idx); } - public double f64(int idx) { + public double f64(long idx ) { return (double) get(idx); } - public Sequence set(int idx, byte v) { - varHandle.set(memorySegment, 0, (long) idx, v); + public Sequence set(long idx , byte v) { + varHandle.set(memorySegment, 0, idx, v); return this; } }