diff --git a/hat/backends/ffi/opencl/cpp/opencl_backend.cpp b/hat/backends/ffi/opencl/cpp/opencl_backend.cpp index 1aa36ab05c3..0816c7bacbf 100644 --- a/hat/backends/ffi/opencl/cpp/opencl_backend.cpp +++ b/hat/backends/ffi/opencl/cpp/opencl_backend.cpp @@ -22,6 +22,7 @@ * or visit www.oracle.com if you need additional information or have any * questions. */ +#define opencl_backend_cpp #include "opencl_backend.h" OpenCLBackend::OpenCLConfig::OpenCLConfig(int mode): @@ -29,8 +30,11 @@ OpenCLBackend::OpenCLConfig::OpenCLConfig(int mode): gpu((mode&GPU_BIT)==GPU_BIT), cpu((mode&CPU_BIT)==CPU_BIT), minimizeCopies((mode&MINIMIZE_COPIES_BIT)==MINIMIZE_COPIES_BIT), + alwaysCopy(!minimizeCopies), trace((mode&TRACE_BIT)==TRACE_BIT), traceCopies((mode&TRACE_COPIES_BIT)==TRACE_COPIES_BIT), + traceEnqueues((mode&TRACE_ENQUEUES_BIT)==TRACE_ENQUEUES_BIT), + traceCalls((mode&TRACE_CALLS_BIT)==TRACE_CALLS_BIT), traceSkippedCopies((mode&TRACE_SKIPPED_COPIES_BIT)==TRACE_SKIPPED_COPIES_BIT), info((mode&INFO_BIT)==INFO_BIT), showCode((mode&SHOW_CODE_BIT)==SHOW_CODE_BIT), @@ -41,9 +45,12 @@ OpenCLBackend::OpenCLConfig::OpenCLConfig(int mode): std::cout << "native gpu " << gpu<<std::endl; std::cout << "native cpu " << cpu<<std::endl; std::cout << "native minimizeCopies " << minimizeCopies<<std::endl; + std::cout << "native alwaysCopy " << alwaysCopy<<std::endl; std::cout << "native trace " << trace<<std::endl; std::cout << "native traceSkippedCopies " << traceSkippedCopies<<std::endl; + std::cout << "native traceCalls " << traceCalls<<std::endl; std::cout << "native traceCopies " << traceCopies<<std::endl; + std::cout << "native traceEnqueues " << traceEnqueues<<std::endl; std::cout << "native profile " << profile<<std::endl; } } @@ -180,6 +187,9 @@ bool OpenCLBackend::getBufferFromDeviceIfDirty(void *memorySegment, long memoryS std::cout << "from getBufferFromDeviceIfDirty Buffer is device dirty so attempting to get buffer from device from OpenCLBackend "<<std::endl; // we use static cast because the ptr type is void* static_cast<OpenCLProgram::OpenCLKernel::OpenCLBuffer *>(bufferState->vendorPtr)->copyFromDevice(); + if (openclConfig.traceEnqueues | openclConfig.traceCopies){ + std::cout << "copying buffer from device (from java access) "<< std::endl; + } // if (openclConfig.traceCopies){ // std::cout << "copying buffer from device "<< std::endl; // bufferState->dump("After copy from device"); diff --git a/hat/backends/ffi/opencl/cpp/opencl_backend_kernel_dispatch.cpp b/hat/backends/ffi/opencl/cpp/opencl_backend_kernel_dispatch.cpp index ff71974b43a..e69889e66a6 100644 --- a/hat/backends/ffi/opencl/cpp/opencl_backend_kernel_dispatch.cpp +++ b/hat/backends/ffi/opencl/cpp/opencl_backend_kernel_dispatch.cpp @@ -27,7 +27,7 @@ void dispatchKernel(Kernel kernel, KernelContext kc, Arg ... args) { for (int argn = 0; argn<args.length; argn++){ Arg arg = args[argn]; - if (!minimizingBuffers || (((arg.flags &JavaDirty)==JavaDirty) && kernel.readsFrom(arg))) { + if (alwaysCopyBuffers || (((arg.flags &JavaDirty)==JavaDirty) && kernel.readsFrom(arg))) { enqueueCopyToDevice(arg); } } @@ -36,7 +36,7 @@ void dispatchKernel(Kernel kernel, KernelContext kc, Arg ... args) { for (int argn = 0; argn<args.length; argn++){ Arg arg = args[argn]; - if (!minimizingBuffers){ + if (alwaysCopyBuffers){ enqueueCopyFromDevice(arg); arg.flags = 0; }else{ @@ -50,24 +50,42 @@ void dispatchKernel(Kernel kernel, KernelContext kc, Arg ... args) { } */ -bool shouldCopyToDevice(BufferState_s *bufferState, Arg_s *arg ){ +bool shouldCopyToDevice(BufferState_s *bufferState, Arg_s *arg, bool alwaysCopy, bool showWhy){ bool kernelReadsFromThisArg = (arg->value.buffer.access==RW_BYTE) || (arg->value.buffer.access==RO_BYTE); bool isHostDirtyOrNew = bufferState->isHostDirty() | bufferState->isHostNew(); bool result= (kernelReadsFromThisArg & isHostDirtyOrNew); - if (result && bufferState->isDeviceDirty()){ - std::cout << "already still on GPU!"<<std::endl; - result= false; - } - return result; + + if (showWhy){ + std::cout<< + "alwaysCopy="<<alwaysCopy + << " | argRW="<<(arg->value.buffer.access==RW_BYTE) + << " | argRO="<<(arg->value.buffer.access==RO_BYTE) + << " | kernelNeedsToRead="<< kernelReadsFromThisArg + << " | hostDirty="<< bufferState->isHostDirty() + << " | hostNew="<< bufferState->isHostNew() + << " | deviceDirty="<< bufferState->isDeviceDirty() + <<" so " + ; + } + if (result && bufferState->isDeviceDirty()){ + result= false; + } + return alwaysCopy |result; } -bool shouldCopyFromDevice( BufferState_s *bufferState, Arg_s *arg ){ +bool shouldCopyFromDevice( BufferState_s *bufferState, Arg_s *arg, bool alwaysCopy, bool showWhy ){ bool kernelWroteToThisArg = (arg->value.buffer.access==WO_BYTE) | (arg->value.buffer.access==RW_BYTE); bool result = kernelWroteToThisArg; - //if (!result){ - // std::cout << "shouldCopyFromDevice false"<<std::endl; - // } - return result; + if (showWhy){ + std::cout<< + "alwaysCopy="<<alwaysCopy + << " | argWO="<<(arg->value.buffer.access==WO_BYTE) + << " | argRW="<<(arg->value.buffer.access==RW_BYTE) + << " | kernelWroteToThisArg="<< kernelWroteToThisArg + <<" so " + ; + } + return alwaysCopy; } long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) { @@ -78,6 +96,9 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) { // std::cout << "Kernel name '"<< (dynamic_cast<Backend::Program::Kernel*>(this))->name<<"'"<<std::endl; openclBackend->openclQueue.marker(openclBackend->openclQueue.EnterKernelDispatchBits, (dynamic_cast<Backend::Program::Kernel*>(this))->name); + if (openclBackend->openclConfig.traceCalls){ + std::cout << "ndrange(\"" << (dynamic_cast<Backend::Program::Kernel*>(this))->name<< "\"){"<<std::endl; + } if (openclBackend->openclConfig.trace){ Sled::show(std::cout, argArray); } @@ -113,20 +134,23 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) { } openclBuffer= static_cast<OpenCLBuffer*>(bufferState->vendorPtr); } - if (!openclBackend->openclConfig.minimizeCopies - || shouldCopyToDevice(bufferState, arg)){ + if (shouldCopyToDevice(bufferState, arg,openclBackend->openclConfig.alwaysCopy, + (openclBackend->openclConfig.traceCopies|openclBackend->openclConfig.traceEnqueues))){ if (openclBackend->openclConfig.traceCopies){ - // std::cout << "We are not minimising copies OR (HOST is JAVA dirty and the kernel is READS this arg) so copying arg " << arg->idx <<" to device "<< std::endl; + std::cout << "We are always cloying OR (HOST is JAVA dirty and the kernel is READS this arg) so copying arg " << arg->idx <<" to device "<< std::endl; } bufferState->clearHostDirty(); + if (openclBackend->openclConfig.traceEnqueues){ + std::cout << "copying arg " << arg->idx <<" to device "<< std::endl; + } openclBuffer->copyToDevice(); }else{ if (openclBackend->openclConfig.traceSkippedCopies){ - std::cout << "NOT copying arg " << arg->idx <<" to device "<< std::endl; + std::cout << "NOT copying arg " << arg->idx <<" to device "<< std::endl; // bufferState->dump("After copy from device"); - } + } } cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(cl_mem), &openclBuffer->clMem); @@ -184,8 +208,9 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) { std::cerr << OpenCLBackend::errorMsg(status) << std::endl; exit(1); } - if (openclBackend->openclConfig.trace){ - std::cout << "enqueued kernel dispatch globalSize=" << globalSize << std::endl; + if (openclBackend->openclConfig.trace | openclBackend->openclConfig.traceEnqueues){ + std::cout << "enqueued kernel dispatch \"" << (dynamic_cast<Backend::Program::Kernel*>(this))->name << + "\" globalSize=" << globalSize << std::endl; } @@ -193,12 +218,17 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) { Arg_s *arg = argSled.arg(i); if (arg->variant == '&') { BufferState_s * bufferState = BufferState_s::of(arg ); - if (!openclBackend->openclConfig.minimizeCopies || shouldCopyFromDevice(bufferState,arg)){ + if (shouldCopyFromDevice(bufferState,arg, + openclBackend->openclConfig.alwaysCopy, + openclBackend->openclConfig.traceEnqueues)){ static_cast<OpenCLBuffer *>(bufferState->vendorPtr)->copyFromDevice(); //if (openclBackend->openclConfig.traceCopies){ //std::cout << "copying arg " << arg->idx <<" from device "<< std::endl; // bufferState->dump("After copy from device"); //} + if (openclBackend->openclConfig.traceEnqueues){ + std::cout << "copying arg " << arg->idx <<" from device "<< std::endl; + } bufferState->setDeviceDirty(); }else{ if (openclBackend->openclConfig.traceSkippedCopies){ @@ -216,5 +246,8 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) { ); openclBackend->openclQueue.wait(); openclBackend->openclQueue.release(); + if (openclBackend->openclConfig.traceCalls){ + std::cout << "\"" << (dynamic_cast<Backend::Program::Kernel*>(this))->name<< "\"}"<<std::endl; + } return 0; } diff --git a/hat/backends/ffi/opencl/include/opencl_backend.h b/hat/backends/ffi/opencl/include/opencl_backend.h index 8f6f5da2f75..cea91e9a918 100644 --- a/hat/backends/ffi/opencl/include/opencl_backend.h +++ b/hat/backends/ffi/opencl/include/opencl_backend.h @@ -45,33 +45,46 @@ class OpenCLBackend : public Backend { public: class OpenCLConfig{ public: - const static int GPU_BIT =1<<1; - const static int CPU_BIT =1<<2; - const static int MINIMIZE_COPIES_BIT =1<<3; - const static int TRACE_BIT =1<<4; - const static int PROFILE_BIT =1<<5; - const static int SHOW_CODE_BIT = 1 << 6; - const static int SHOW_KERNEL_MODEL_BIT = 1 << 7; - const static int SHOW_COMPUTE_MODEL_BIT = 1 <<8; - const static int INFO_BIT = 1 <<9; - const static int TRACE_COPIES_BIT = 1 <<10; - const static int TRACE_SKIPPED_COPIES_BIT = 1 <<11; + // These must sync with hat/backend/ffi/Mode.java + // Bits 0-3 select platform id 0..5 + // Bits 4-7 select device id 0..15 + const static int START_BIT_IDX = 16; + const static int GPU_BIT =1<<START_BIT_IDX; + const static int CPU_BIT =1<<17; + const static int MINIMIZE_COPIES_BIT =1<<18; + const static int TRACE_BIT =1<<19; + const static int PROFILE_BIT =1<<20; + const static int SHOW_CODE_BIT = 1 << 21; + const static int SHOW_KERNEL_MODEL_BIT = 1 << 22; + const static int SHOW_COMPUTE_MODEL_BIT = 1 <<23; + const static int INFO_BIT = 1<<24; + const static int TRACE_COPIES_BIT = 1 <<25; + const static int TRACE_SKIPPED_COPIES_BIT = 1 <<26; + const static int TRACE_ENQUEUES_BIT = 1 <<27; + const static int TRACE_CALLS_BIT = 1 <<28; + const static int END_BIT_IDX = 29; + + const static char *bitNames[]; // See below for out of line definition int mode; bool gpu; bool cpu; bool minimizeCopies; + bool alwaysCopy; bool trace; bool profile; bool showCode; bool info; bool traceCopies; - bool traceSkippedCopies; + bool traceSkippedCopies; + bool traceEnqueues; + bool traceCalls; OpenCLConfig(int mode); virtual ~OpenCLConfig(); }; class OpenCLQueue { public: - static const int CopyToDeviceBits= 1<<20; + const static int START_BIT_IDX =20; + static const int CopyToDeviceBits= 1<<START_BIT_IDX; static const int CopyFromDeviceBits= 1<<21; static const int NDRangeBits =1<<22; static const int StartComputeBits= 1<<23; @@ -80,6 +93,7 @@ class OpenCLBackend : public Backend { static const int LeaveKernelDispatchBits= 1<<26; static const int HasConstCharPtrArgBits = 1<<27; static const int hasIntArgBits = 1<<28; + const static int END_BIT_IDX = 27; OpenCLBackend *openclBackend; size_t eventMax; cl_event *events; @@ -166,4 +180,21 @@ class OpenCLBackend : public Backend { public: static const char *errorMsg(cl_int status); }; -extern "C" long getOpenCLBackend(int mode, int platform, int device, int unused); \ No newline at end of file +extern "C" long getOpenCLBackend(int mode, int platform, int device, int unused); +#ifdef opencl_backend_cpp +const char *OpenCLBackend::OpenCLConfig::bitNames[] = { + "GPU", + "CPU", + "MINIMIZE_COPIES", + "TRACE", + "PROFILE", + "SHOW_CODE", + "SHOW_KERNEL_MODEL", + "SHOW_COMPUTE_MODEL", + "INFO", + "TRACE_COPIES", + "TRACE_SKIPPED_COPIES", + "TRACE_ENQUEUES", + "TRACE_CALLS" + }; +#endif \ No newline at end of file diff --git a/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/Config.java b/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/Config.java new file mode 100644 index 00000000000..c2c05003cfb --- /dev/null +++ b/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/Config.java @@ -0,0 +1,273 @@ +package hat.backend.ffi; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public record Config(int bits) { + record Bit(int index, String name){}; + // These must sync with hat/backends/ffi/opencl/include/opencl_backend.h + // Bits 0-3 select platform id 0..5 + // Bits 4-7 select device id 0..15 + private static final int START_BIT_IDX = 16; + private static final int GPU_BIT = 1 << START_BIT_IDX; + private static final int CPU_BIT = 1 << 17; + private static final int MINIMIZE_COPIES_BIT = 1 << 18; + private static final int TRACE_BIT = 1 << 19; + private static final int PROFILE_BIT = 1 << 20; + private static final int SHOW_CODE_BIT = 1 << 21; + private static final int SHOW_KERNEL_MODEL_BIT = 1 << 22; + private static final int SHOW_COMPUTE_MODEL_BIT = 1 << 23; + private static final int INFO_BIT = 1 << 24; + private static final int TRACE_COPIES_BIT = 1 << 25; + private static final int TRACE_SKIPPED_COPIES_BIT = 1 << 26; + private static final int TRACE_ENQUEUES_BIT = 1 << 27; + private static final int TRACE_CALLS_BIT = 1 << 28; + private static final int END_BIT_IDX = 29; + + private static String[] bitNames = { + "GPU", + "CPU", + "MINIMIZE_COPIES", + "TRACE", + "PROFILE", + "SHOW_CODE", + "SHOW_KERNEL_MODEL", + "SHOW_COMPUTE_MODEL", + "INFO", + "TRACE_COPIES", + "TRACE_SKIPPED_COPIES", + "TRACE_ENQUEUES", + "TRACE_CALLS" + }; + public static Config of() { + if ((((System.getenv("HAT") instanceof String e) ? e : "") + + ((System.getProperty("HAT") instanceof String p) ? p : "")) instanceof String opts) { + return of(opts); + } + return of(); + } + + public static Config of(int bits) { + return new Config(bits); + } + + public static Config of(List<Config> configs) { + int allBits = 0; + for (Config config : configs) { + allBits |= config.bits; + } + return new Config(allBits); + } + + public static Config of(Config... configs) { + return of(List.of(configs)); + } + + public Config and(Config... configs) { + return Config.of(Config.of(List.of(configs)).bits & bits); + } + + public Config or(Config... configs) { + return Config.of(Config.of(List.of(configs)).bits | bits); + } + + public static Config of(String name) { + for (int i = 0; i < bitNames.length; i++) { + if (bitNames[i].equals(name)) { + return new Config(1<<(i+START_BIT_IDX)); + } + } + + if (name.contains(",")) { + List<Config> configs = new ArrayList<>(); + Arrays.stream(name.split(",")).forEach(opt -> + configs.add(of(opt)) + ); + return of(configs); + } else { + System.out.println("Unexpected opt '" + name + "'"); + return Config.of(0); + } + } + + public static Config TRACE_COPIES() { + return new Config(TRACE_COPIES_BIT); + } + public boolean isTRACE_COPIES() { + return (bits & TRACE_COPIES_BIT) == TRACE_COPIES_BIT; + } + public static Config TRACE_CALLS() { + return new Config(TRACE_CALLS_BIT); + } + public boolean isTRACE_CALLS() { + return (bits & TRACE_CALLS_BIT) == TRACE_CALLS_BIT; + } + public static Config TRACE_ENQUEUES() { + return new Config(TRACE_ENQUEUES_BIT); + } + public boolean isTRACE_ENQUEUES() { + return (bits & TRACE_ENQUEUES_BIT) == TRACE_ENQUEUES_BIT; + } + + + public static Config TRACE_SKIPPED_COPIES() { + return new Config(TRACE_SKIPPED_COPIES_BIT); + } + public boolean isTRACE_SKIPPED_COPIES() { + return (bits & TRACE_SKIPPED_COPIES_BIT) == TRACE_SKIPPED_COPIES_BIT; + } + + public static Config INFO() { + return new Config(INFO_BIT); + } + public boolean isINFO() { + return (bits & INFO_BIT) == INFO_BIT; + } + + public static Config CPU() { + return new Config(CPU_BIT); + } + public boolean isCPU() { + return (bits & CPU_BIT) == CPU_BIT; + } + + public static Config GPU() { + return new Config(GPU_BIT); + } + public boolean isGPU() { + return (bits & GPU_BIT) == GPU_BIT; + } + + public static Config PROFILE() { + return new Config(PROFILE_BIT); + } + public boolean isPROFILE() { + return (bits & PROFILE_BIT) == PROFILE_BIT; + } + + public static Config TRACE() { + return new Config(TRACE_BIT); + } + public boolean isTRACE() { + return (bits & TRACE_BIT) == TRACE_BIT; + } + + public static Config MINIMIZE_COPIES() { + return new Config(MINIMIZE_COPIES_BIT); + } + public boolean isMINIMIZE_COPIES() { + String hex = Integer.toHexString(bits); + return (bits & MINIMIZE_COPIES_BIT) == MINIMIZE_COPIES_BIT; + } + + public static Config SHOW_CODE() { + return new Config(SHOW_CODE_BIT); + } + public boolean isSHOW_CODE() { + return (bits & SHOW_CODE_BIT) == SHOW_CODE_BIT; + } + + public static Config SHOW_KERNEL_MODEL() { + return new Config(SHOW_KERNEL_MODEL_BIT); + } + public boolean isSHOW_KERNEL_MODEL() { + return (bits & SHOW_KERNEL_MODEL_BIT) == SHOW_KERNEL_MODEL_BIT; + } + + public static Config SHOW_COMPUTE_MODEL() { + return new Config(SHOW_COMPUTE_MODEL_BIT); + } + public boolean isSHOW_COMPUTE_MODEL() { + return (bits & SHOW_COMPUTE_MODEL_BIT) == SHOW_COMPUTE_MODEL_BIT; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + for (int bitIdx = START_BIT_IDX; bitIdx < END_BIT_IDX; bitIdx++) { + if ((bits&(1<<bitIdx))==(1<<bitIdx)) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append(bitNames[bitIdx-START_BIT_IDX]); + + } + } + /* + if (isTRACE_COPIES()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("TRACE_COPIES"); + } + if (isTRACE_SKIPPED_COPIES()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("TRACE_SKIPPED_COPIES"); + } + if (isINFO()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("INFO"); + } + if (isCPU()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("CPU"); + } + if (isGPU()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("GPU"); + } + if (isTRACE()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("TRACE"); + } + if (isPROFILE()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("PROFILE"); + } + if (isMINIMIZE_COPIES()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("MINIMIZE_COPIES"); + } + if (isSHOW_CODE()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("SHOW_CODE"); + } + if (isSHOW_COMPUTE_MODEL()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("SHOW_COMPUTE_MODEL"); + } + if (isSHOW_KERNEL_MODEL()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("SHOW_KERNEL_MODEL"); + } + if (isMINIMIZE_COPIES()) { + if (!builder.isEmpty()) { + builder.append("|"); + } + builder.append("MINIMIZE_COPIES"); + } */ + + return builder.toString(); + } +} diff --git a/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/OpenCLBackend.java b/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/OpenCLBackend.java index 65bb520a435..2d5815a26e8 100644 --- a/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/OpenCLBackend.java +++ b/hat/backends/ffi/opencl/src/main/java/hat/backend/ffi/OpenCLBackend.java @@ -32,233 +32,12 @@ import hat.callgraph.KernelCallGraph; import java.lang.invoke.MethodHandle; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; import static java.lang.foreign.ValueLayout.JAVA_INT; public class OpenCLBackend extends C99FFIBackend implements BufferTracker { - public record Mode(int bits) { - private static final int GPU_BIT = 1 << 1; - private static final int CPU_BIT = 1 << 2; - private static final int MINIMIZE_COPIES_BIT = 1 << 3; - private static final int TRACE_BIT = 1 << 4; - private static final int PROFILE_BIT = 1 << 5; - private static final int SHOW_CODE_BIT = 1 << 6; - private static final int SHOW_KERNEL_MODEL_BIT = 1 << 7; - private static final int SHOW_COMPUTE_MODEL_BIT = 1 <<8; - private static final int INFO_BIT = 1 <<9; - private static final int TRACE_COPIES_BIT = 1 << 10; - private static final int TRACE_SKIPPED_COPIES_BIT = 1 << 11; - - public static Mode of() { - // List<Mode> modes = new ArrayList<>(); - if (( ((System.getenv("HAT") instanceof String e)?e:"")+ - ((System.getProperty("HAT") instanceof String p)?p:"")) instanceof String opts) { - // Arrays.stream(opts.split(",")).forEach(opt -> - return of(opts); - // ); - } - return of(); - } - public static Mode of(int bits) { - - return new Mode(bits); - } - public static Mode of(List<Mode> modes) { - int allBits = 0; - for (Mode mode : modes) { - allBits |= mode.bits; - } - return new Mode(allBits); - } - public static Mode of(Mode ...modes) { - return of(List.of(modes)); - } - public Mode and(Mode ...modes) { - return Mode.of(Mode.of(List.of(modes)).bits&bits); - } - public Mode or(Mode ...modes) { - return Mode.of(Mode.of(List.of(modes)).bits|bits); - } - public static Mode of(String name) { - return switch (name){ - case "GPU" -> GPU(); - case "CPU" -> CPU(); - case "MINIMIZE_COPIES" -> MINIMIZE_COPIES(); - case "TRACE" -> TRACE(); - case "TRACE_COPIES" -> TRACE_COPIES(); - case "TRACE_SKIPPED_COPIES" -> TRACE_SKIPPED_COPIES(); - case "SHOW_CODE" -> SHOW_CODE(); - case "SHOW_KERNEL_MODEL" -> SHOW_KERNEL_MODEL(); - case "SHOW_COMPUTE_MODEL" -> SHOW_COMPUTE_MODEL(); - case "PROFILE" -> PROFILE(); - case "INFO" -> INFO(); - default -> { - if (name.contains(",")) { - List<Mode> modes = new ArrayList<>(); - Arrays.stream(name.split(",")).forEach(opt -> - modes.add(of(opt)) - ); - yield of(modes); - } else { - System.out.println("Unexpected opt '" + name + "'"); - yield Mode.of(0); - } - } - }; - } - public static Mode TRACE_COPIES() { - return new Mode(TRACE_COPIES_BIT); - } - public boolean isTRACE_COPIES() { - return (bits&TRACE_COPIES_BIT)==TRACE_COPIES_BIT; - } - public static Mode TRACE_SKIPPED_COPIES() { - return new Mode(TRACE_SKIPPED_COPIES_BIT); - } - public boolean isTRACE_SKIPPED_COPIES() { - return (bits&TRACE_SKIPPED_COPIES_BIT)==TRACE_SKIPPED_COPIES_BIT; - } - public static Mode INFO() { - return new Mode(INFO_BIT); - } - public boolean isINFO() { - return (bits&INFO_BIT)==INFO_BIT; - } - public static Mode CPU() { - return new Mode(CPU_BIT); - } - public boolean isCPU() { - return (bits&CPU_BIT)==CPU_BIT; - } - public static Mode GPU() { - return new Mode(GPU_BIT); - } - public boolean isGPU() { - return (bits&GPU_BIT)==GPU_BIT; - } - public static Mode PROFILE() { - return new Mode(PROFILE_BIT); - } - public boolean isPROFILE() { - return (bits&PROFILE_BIT)==PROFILE_BIT; - } - public static Mode TRACE() { - return new Mode(TRACE_BIT); - } - public boolean isTRACE() { - return (bits&TRACE_BIT)==TRACE_BIT; - } - public static Mode MINIMIZE_COPIES() { - return new Mode(MINIMIZE_COPIES_BIT); - } - public boolean isMINIMIZE_COPIES() { - String hex = Integer.toHexString(bits); - return (bits&MINIMIZE_COPIES_BIT)==MINIMIZE_COPIES_BIT; - } - public static Mode SHOW_CODE() { - return new Mode(SHOW_CODE_BIT); - } - public boolean isSHOW_CODE() { - return (bits&SHOW_CODE_BIT)==SHOW_CODE_BIT; - } - public static Mode SHOW_KERNEL_MODEL() { - return new Mode(SHOW_KERNEL_MODEL_BIT); - } - public boolean isSHOW_KERNEL_MODEL() { - return (bits&SHOW_KERNEL_MODEL_BIT)==SHOW_KERNEL_MODEL_BIT; - } - public static Mode SHOW_COMPUTE_MODEL() { - return new Mode(SHOW_COMPUTE_MODEL_BIT); - } - public boolean isSHOW_COMPUTE_MODEL() { - return (bits&SHOW_COMPUTE_MODEL_BIT)==SHOW_COMPUTE_MODEL_BIT; - } - - @Override - public String toString() { - StringBuilder builder = new StringBuilder(); - if (isTRACE_COPIES()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("TRACE_COPIES"); - } - if (isTRACE_SKIPPED_COPIES()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("TRACE_SKIPPED_COPIES"); - } - if (isINFO()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("INFO"); - } - if (isCPU()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("CPU"); - } - if (isGPU()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("GPU"); - } - if (isTRACE()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("TRACE"); - } - if (isPROFILE()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("PROFILE"); - } - if (isMINIMIZE_COPIES()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("MINIMIZE_COPIES"); - } - if (isSHOW_CODE()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("SHOW_CODE"); - } - if (isSHOW_COMPUTE_MODEL()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("SHOW_COMPUTE_MODEL"); - } - if (isSHOW_KERNEL_MODEL()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("SHOW_KERNEL_MODEL"); - } - if (isMINIMIZE_COPIES()) { - if (!builder.isEmpty()){ - builder.append("|"); - } - builder.append("MINIMIZE_COPIES"); - } - - return builder.toString(); - } - } - - final Mode mode; + final Config config; final MethodHandle getBackend_MH; public long getBackend(int mode, int platform, int device, int unused) { @@ -269,38 +48,38 @@ public long getBackend(int mode, int platform, int device, int unused) { } return backendHandle; } - public OpenCLBackend(String spec) { - this(Mode.of(spec)); + public OpenCLBackend(String configSpec) { + this(Config.of(configSpec)); } - public OpenCLBackend(Mode mode) { + public OpenCLBackend(Config config) { super("opencl_backend"); - this.mode = mode; + this.config = config; getBackend_MH = nativeLibrary.longFunc("getOpenCLBackend",JAVA_INT,JAVA_INT, JAVA_INT, JAVA_INT); - getBackend(mode.bits,0, 0, 0 ); - if (mode.isINFO()) { - System.out.println(mode); + getBackend(config.bits(),0, 0, 0 ); + if (config.isINFO()) { + System.out.println("CONFIG = "+config); info(); } } public OpenCLBackend() { - this(Mode.of().or(Mode.GPU())); + this(Config.of().or(Config.GPU())); } @Override public void computeContextHandoff(ComputeContext computeContext) { //System.out.println("OpenCL backend received computeContext"); - injectBufferTracking(computeContext.computeCallGraph.entrypoint, mode.isSHOW_COMPUTE_MODEL()); + injectBufferTracking(computeContext.computeCallGraph.entrypoint, config.isSHOW_COMPUTE_MODEL()); } @Override public void dispatchKernel(KernelCallGraph kernelCallGraph, NDRange ndRange, Object... args) { //System.out.println("OpenCL backend dispatching kernel " + kernelCallGraph.entrypoint.method); CompiledKernel compiledKernel = kernelCallGraphCompiledCodeMap.computeIfAbsent(kernelCallGraph, (_) -> { - String code = createCode(kernelCallGraph, new OpenCLHatKernelBuilder(), args, mode.isSHOW_KERNEL_MODEL()); - if (mode.isSHOW_CODE()) { + String code = createCode(kernelCallGraph, new OpenCLHatKernelBuilder(), args, config.isSHOW_KERNEL_MODEL()); + if (config.isSHOW_CODE()) { System.out.println(code); } long programHandle = compileProgram(code); @@ -317,7 +96,7 @@ public void dispatchKernel(KernelCallGraph kernelCallGraph, NDRange ndRange, Obj @Override public void preMutate(Buffer b) { - if (mode.isMINIMIZE_COPIES()) { + if (config.isMINIMIZE_COPIES()) { if (b.isDeviceDirty()) { if (!b.isHostChecked()) { getBufferFromDeviceIfDirty(b);// calls through FFI and might block when fetching from device @@ -330,14 +109,14 @@ public void preMutate(Buffer b) { @Override public void postMutate(Buffer b) { - if (mode.isMINIMIZE_COPIES()) { + if (config.isMINIMIZE_COPIES()) { b.setHostDirty(); } } @Override public void preAccess(Buffer b) { - if (mode.isMINIMIZE_COPIES()) { + if (config.isMINIMIZE_COPIES()) { if (b.isDeviceDirty() && !b.isHostChecked()) { getBufferFromDeviceIfDirty(b); // calls through FFI and might block when fetching from device // We don't call clearDeviceDirty() if we did then 'just reading on the host' would force copy in next dispatch @@ -354,7 +133,7 @@ public void postAccess(Buffer b) { @Override public void preEscape(Buffer b) { - if (mode.isMINIMIZE_COPIES()) { + if (config.isMINIMIZE_COPIES()) { if (b.isDeviceDirty()) { if (!b.isHostChecked()) { getBufferFromDeviceIfDirty(b); @@ -367,7 +146,7 @@ public void preEscape(Buffer b) { @Override public void postEscape(Buffer b) { - if (mode.isMINIMIZE_COPIES()) { + if (config.isMINIMIZE_COPIES()) { b.setHostDirty(); // We have no choice but to assume escapee was modified by the call } } diff --git a/hat/examples/experiments/src/main/java/experiments/Mesh.java b/hat/examples/experiments/src/main/java/experiments/Mesh.java index 22347ed746b..ecde0c9c825 100644 --- a/hat/examples/experiments/src/main/java/experiments/Mesh.java +++ b/hat/examples/experiments/src/main/java/experiments/Mesh.java @@ -28,12 +28,10 @@ import hat.ComputeContext; import hat.KernelContext; import hat.backend.ffi.OpenCLBackend; -import static hat.backend.ffi.OpenCLBackend.Mode.*; +import static hat.backend.ffi.Config.*; import hat.ifacemapper.BoundSchema; import hat.ifacemapper.Schema; -import hat.backend.DebugBackend; import hat.buffer.Buffer; -import hat.buffer.BufferAllocator; import java.lang.foreign.GroupLayout; import java.lang.foreign.MemoryLayout; diff --git a/hat/examples/experiments/src/main/java/experiments/MinBufferTest.java b/hat/examples/experiments/src/main/java/experiments/MinBufferTest.java index c6b256f1a8b..31de41465a2 100644 --- a/hat/examples/experiments/src/main/java/experiments/MinBufferTest.java +++ b/hat/examples/experiments/src/main/java/experiments/MinBufferTest.java @@ -28,14 +28,13 @@ import hat.ComputeContext; import hat.KernelContext; import hat.backend.ffi.OpenCLBackend; -import hat.buffer.Buffer; import hat.buffer.S32Array; import static hat.ifacemapper.MappableIface.*; import jdk.incubator.code.CodeReflection; import java.lang.invoke.MethodHandles; -import static hat.backend.ffi.OpenCLBackend.Mode.*; +import static hat.backend.ffi.Config.*; public class MinBufferTest { diff --git a/hat/examples/life/src/main/java/life/Viewer.java b/hat/examples/life/src/main/java/life/Viewer.java index 76c3eaad533..ef875c66f1b 100644 --- a/hat/examples/life/src/main/java/life/Viewer.java +++ b/hat/examples/life/src/main/java/life/Viewer.java @@ -184,8 +184,8 @@ public void paint(Graphics g) { } public static class Controls { -public final JMenuBar menuBar; - private final JButton startButton; + public final JMenuBar menuBar; + private final JButton startButton; private JToggleButton useGPUToggleButton; private JToggleButton minimizeCopiesToggleButton; private SevenSegmentDisplay generationsPerSecondSevenSegment; @@ -197,7 +197,7 @@ public static class Controls { this.state = state; this.menuBar = new JMenuBar(); JPanel panel = new JPanel(); - panel.setLayout(new BoxLayout(panel, BoxLayout.X_AXIS)); + panel.setLayout(new BoxLayout(panel, BoxLayout.X_AXIS)); ((JButton) panel.add(new JButton("Exit"))).addActionListener(_ -> System.exit(0)); this.startButton = (JButton) panel.add(new JButton("Start")); diff --git a/hat/examples/mandel/src/main/java/mandel/Main.java b/hat/examples/mandel/src/main/java/mandel/Main.java index 17b7938310a..63b02345e8a 100644 --- a/hat/examples/mandel/src/main/java/mandel/Main.java +++ b/hat/examples/mandel/src/main/java/mandel/Main.java @@ -137,9 +137,12 @@ public static void main(String[] args) { final float fy = y - sign * zoomPoint.y / zoomFrames; accelerator.compute(cc -> Main.compute(cc, pallette, s32Array2D, fx, fy, fscale)); viewer.imageViewer.syncWithRGB(s32Array2D); + } } - System.out.println("FPS = " + ((zoomFrames * 2 * 1000) / (System.currentTimeMillis() - startMillis))); + var fps = ((zoomFrames * 2 * 1000) / (System.currentTimeMillis() - startMillis)); + viewer.framesSecondSevenSegment.set((int)fps); + // System.out.println("FPS = " +fps); } } } diff --git a/hat/examples/mandel/src/main/java/mandel/Viewer.java b/hat/examples/mandel/src/main/java/mandel/Viewer.java index e36727c31b1..9bc427c6b8c 100644 --- a/hat/examples/mandel/src/main/java/mandel/Viewer.java +++ b/hat/examples/mandel/src/main/java/mandel/Viewer.java @@ -26,9 +26,16 @@ import hat.buffer.Buffer; import hat.buffer.S32Array2D; +import hat.util.ui.SevenSegmentDisplay; +import javax.swing.Box; +import javax.swing.BoxLayout; +import javax.swing.JButton; import javax.swing.JComponent; import javax.swing.JFrame; +import javax.swing.JLabel; +import javax.swing.JMenuBar; +import javax.swing.JPanel; import javax.swing.WindowConstants; import java.awt.Dimension; import java.awt.Graphics; @@ -45,6 +52,8 @@ public class Viewer extends JFrame { + public final SevenSegmentDisplay framesSecondSevenSegment; + public static class PointF32 { public final float x; public final float y; @@ -124,6 +133,16 @@ public Viewer(String title, S32Array2D s32Array2D) { super(title); this.imageViewer = new ImageViewer(new BufferedImage(s32Array2D.width(), s32Array2D.height(), BufferedImage.TYPE_INT_RGB)); + var menuBar = new JMenuBar(); + JPanel panel = new JPanel(); + panel.setLayout(new BoxLayout(panel, BoxLayout.X_AXIS)); + ((JButton) panel.add(new JButton("Exit"))).addActionListener(_ -> System.exit(0)); + panel.add(new JLabel("FPS")); + this.framesSecondSevenSegment = (SevenSegmentDisplay) + panel.add(new SevenSegmentDisplay(3,30,panel.getForeground(),panel.getBackground())); + panel.add(Box.createHorizontalStrut(400)); + menuBar.add(panel); + this.setJMenuBar(menuBar); this.getContentPane().add(this.imageViewer); this.pack(); this.setLocationRelativeTo(null); diff --git a/hat/examples/nbody/src/main/java/nbody/opencl/OpenCLNBodyGLWindow.java b/hat/examples/nbody/src/main/java/nbody/opencl/OpenCLNBodyGLWindow.java index ae08b4208d2..b6e29b33581 100644 --- a/hat/examples/nbody/src/main/java/nbody/opencl/OpenCLNBodyGLWindow.java +++ b/hat/examples/nbody/src/main/java/nbody/opencl/OpenCLNBodyGLWindow.java @@ -28,6 +28,7 @@ import hat.Accelerator; import hat.ComputeContext; import hat.KernelContext; +import hat.backend.ffi.Config; import hat.backend.ffi.OpenCLBackend; import hat.ifacemapper.SegmentMapper; import jdk.incubator.code.CodeReflection; @@ -116,7 +117,7 @@ public OpenCLNBodyGLWindow(Arena arena, int width, int height, GLTexture particl super(arena, width, height, particle, bodyCount, mode); final float maxDist = 80f; accelerator = new Accelerator(MethodHandles.lookup(), - new OpenCLBackend(OpenCLBackend.Mode.of("GPU")) + new OpenCLBackend(Config.of("GPU")) ); universe = Universe.create(accelerator, bodyCount); for (int body = 0; body < bodyCount; body++) {