Skip to content

Commit 14b3744

Browse files
author
Gary Frost
committedFeb 14, 2025
Hat mark buffers 1
1 parent ca6bbc2 commit 14b3744

File tree

14 files changed

+317
-287
lines changed

14 files changed

+317
-287
lines changed
 

‎hat/backends/ffi/mock/cpp/mock_backend.cpp

+10-3
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ class MockBackend : public Backend {
3030
public :
3131
};
3232

33+
class MockQueue : public Backend::Queue {
34+
public :
35+
MockQueue():Backend::Queue(){}
36+
virtual ~MockQueue(){}
37+
};
38+
3339
class MockProgram : public Backend::Program {
3440
class MockKernel : public Backend::Program::Kernel {
3541
public:
@@ -65,8 +71,8 @@ class MockBackend : public Backend {
6571

6672
public:
6773

68-
MockBackend(MockConfig *mockConfig, int mockConfigSchemeLen, char *mockBackendSchema)
69-
: Backend(mockConfig, mockConfigSchemeLen, mockBackendSchema) {
74+
MockBackend(MockConfig *mockConfig, int mockConfigSchemeLen, char *mockBackendSchema, MockQueue *mockQueue)
75+
: Backend(mockConfig, mockConfigSchemeLen, mockBackendSchema, mockQueue) {
7076
if (mockConfig == nullptr) {
7177
std::cout << "mockConfig == null" << std::endl;
7278
} else {
@@ -99,5 +105,6 @@ class MockBackend : public Backend {
99105

100106
long getBackend(void *config, int configSchemaLen, char *configSchema) {
101107
MockBackend::MockConfig *mockConfig = (MockBackend::MockConfig *) config;
102-
return (long) new MockBackend(mockConfig, configSchemaLen, configSchema);
108+
MockBackend::MockQueue *mockQueue = (MockBackend::MockQueue *) new MockBackend::MockQueue();
109+
return (long) new MockBackend(mockConfig, configSchemaLen, configSchema, mockQueue);
103110
}

‎hat/backends/ffi/opencl/cpp/opencl_backend.cpp

+70-69
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,18 @@ OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::OpenCLBuffer(Backend::
4343
std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
4444
exit(1);
4545
}
46-
arg->value.buffer.vendorPtr = static_cast<void *>(this);
46+
47+
BufferState_s * bufferState = BufferState_s::of(
48+
arg->value.buffer.memorySegment,
49+
arg->value.buffer.sizeInBytes
50+
);
51+
if (INFO){
52+
bufferState->dump("on allocation before assign");
53+
}
54+
bufferState->vendorPtr = static_cast<void *>(this);
55+
if (INFO){
56+
bufferState->dump("after assign ");
57+
}
4758
if (INFO){
4859
std::cout << "created buffer " << std::endl;
4960
}
@@ -58,15 +69,15 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyToDevice() {
5869
*/
5970
auto openclKernel = dynamic_cast<OpenCLKernel *>(kernel);
6071
auto openclBackend = dynamic_cast<OpenCLBackend *>(openclKernel->program->backend);
61-
cl_int status = clEnqueueWriteBuffer(openclBackend->command_queue,
72+
cl_int status = clEnqueueWriteBuffer( dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->command_queue,
6273
clMem,
6374
CL_FALSE,
6475
0,
6576
arg->value.buffer.sizeInBytes,
6677
arg->value.buffer.memorySegment,
67-
openclKernel->eventc,
68-
((openclKernel->eventc == 0) ? NULL : openclKernel->events),
69-
&(openclKernel->events[openclKernel->eventc]));
78+
dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->eventc,
79+
((dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->eventc == 0) ? NULL : dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->events),
80+
&(dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->events[dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->eventc]));
7081

7182

7283

@@ -76,7 +87,7 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyToDevice() {
7687
std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
7788
exit(1);
7889
}
79-
openclKernel->eventc++;
90+
dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->eventc++;
8091
if (INFO){
8192
std::cout << "enqueued buffer copyToDevice " << std::endl;
8293
}
@@ -85,21 +96,21 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyToDevice() {
8596
void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyFromDevice() {
8697
auto openclKernel = dynamic_cast<OpenCLKernel *>(kernel);
8798
auto openclBackend = dynamic_cast<OpenCLBackend *>(openclKernel->program->backend);
88-
cl_int status = clEnqueueReadBuffer(openclBackend->command_queue,
99+
cl_int status = clEnqueueReadBuffer( dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->command_queue,
89100
clMem,
90101
CL_FALSE,
91102
0,
92103
arg->value.buffer.sizeInBytes,
93104
arg->value.buffer.memorySegment,
94-
openclKernel->eventc,
95-
((openclKernel->eventc == 0) ? NULL : openclKernel->events),
96-
&(openclKernel->events[openclKernel->eventc]));
105+
dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->eventc,
106+
((dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->eventc == 0) ? NULL : dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->events),
107+
&(dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->events[dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->eventc]));
97108

98109
if (status != CL_SUCCESS) {
99110
std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
100111
exit(1);
101112
}
102-
openclKernel->eventc++;
113+
dynamic_cast<OpenCLQueue *>(openclKernel->program->backend->queue)->eventc++;
103114
if (INFO){
104115
std::cout << "enqueued buffer copyFromDevice " << std::endl;
105116
}
@@ -110,8 +121,7 @@ OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::~OpenCLBuffer() {
110121
}
111122

112123
OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLKernel(Backend::Program *program, char* name, cl_kernel kernel)
113-
: Backend::Program::Kernel(program, name), kernel(kernel), eventMax(0), events(nullptr),
114-
eventc(0) {
124+
: Backend::Program::Kernel(program, name), kernel(kernel){
115125
}
116126

117127
OpenCLBackend::OpenCLProgram::OpenCLKernel::~OpenCLKernel() {
@@ -124,12 +134,13 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
124134
if (INFO){
125135
Sled::show(std::cout, argArray);
126136
}
127-
if (events != nullptr || eventc != 0) {
128-
std::cerr << "opencl issue, we might have leaked events!" << std::endl;
129-
}
130-
eventMax = argSled.argc() * 4 + 1;
131-
eventc = 0;
132-
events = new cl_event[eventMax];
137+
// if (events != nullptr || eventc != 0) {
138+
// std::cerr << "opencl issue, we might have leaked events!" << std::endl;
139+
//}
140+
// eventMax = argSled.argc() * 4 + 1;
141+
//eventc = 0;
142+
// events = new cl_event[eventMax];
143+
OpenCLQueue *openclQueue = dynamic_cast<OpenCLQueue *>(program->backend->queue);
133144
NDRange *ndrange = nullptr;
134145
for (int i = 0; i < argSled.argc(); i++) {
135146
Arg_s *arg = argSled.arg(i);
@@ -138,33 +149,6 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
138149
auto openclBuffer = new OpenCLBuffer(this, arg);
139150
if (arg->idx == 0){
140151
ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment);
141-
}else{
142-
IfaceBufferBits_s *ifacebufferbitz = IfaceBufferBits_s::of(
143-
arg->value.buffer.memorySegment,
144-
arg->value.buffer.sizeInBytes
145-
);
146-
if (ifacebufferbitz->ok()){
147-
if (INFO){
148-
if (ifacebufferbitz->isJavaDirty()){
149-
printf(" java dirty (javaDirty:%08x)\n", ifacebufferbitz->payload.javaDirty);
150-
}else{
151-
printf(" NOT java dirty (javaDirty:%08x)\n", ifacebufferbitz->payload.javaDirty);
152-
}
153-
if (ifacebufferbitz->isGpuDirty()){
154-
printf(" gpu dirty (gpuDirty:%08x)\n", ifacebufferbitz->payload.gpuDirty);
155-
}else{
156-
printf(" NOT gpu dirty (gpuDirty:%08x)\n", ifacebufferbitz->payload.gpuDirty);
157-
}
158-
}
159-
}else{
160-
printf("bad magic \n");
161-
printf("(magic1:%016lx,", ifacebufferbitz->magic1);
162-
printf("javaDirty:%08x,", ifacebufferbitz->payload.javaDirty);
163-
printf("gpuDirty:%08x,", ifacebufferbitz->payload.gpuDirty);
164-
printf("unused[0]:%08x,", ifacebufferbitz->payload.unused[0]);
165-
printf("unused[1]:%08x,", ifacebufferbitz->payload.unused[1]);
166-
printf("magic2:%016lx)\n", ifacebufferbitz->magic2);
167-
}
168152
}
169153
openclBuffer->copyToDevice();
170154
cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(cl_mem), &openclBuffer->clMem);
@@ -214,7 +198,7 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
214198
break;
215199
}
216200
default: {
217-
std::cerr << "unexpected variant " << (char) arg->variant << std::endl;
201+
std::cerr << "unexpected variant (ndrange) " << (char) arg->variant << std::endl;
218202
exit(1);
219203
}
220204
}
@@ -226,15 +210,15 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
226210
}
227211
size_t dims = 1;
228212
cl_int status = clEnqueueNDRangeKernel(
229-
dynamic_cast<OpenCLBackend *>(program->backend)->command_queue,
213+
openclQueue->command_queue,
230214
kernel,
231215
dims,
232216
nullptr,
233217
&globalSize,
234218
nullptr,
235-
eventc,
236-
((eventc == 0) ? nullptr : events),
237-
&(events[eventc]));
219+
openclQueue->eventc,
220+
(openclQueue->eventc == 0) ? nullptr : openclQueue->events,
221+
&(openclQueue->events[openclQueue->eventc]));
238222
if (status != CL_SUCCESS) {
239223
std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
240224
exit(1);
@@ -244,34 +228,49 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
244228
std::cout << " globalSize=" << globalSize << " " << std::endl;
245229
}
246230

247-
eventc++;
231+
openclQueue->eventc++;
248232
for (int i = 0; i < argSled.argc(); i++) {
249233
Arg_s *arg = argSled.arg(i);
250234
if (arg->variant == '&') {
251-
static_cast<OpenCLBuffer *>(arg->value.buffer.vendorPtr)->copyFromDevice();
235+
BufferState_s * bufferState = BufferState_s::of(
236+
arg->value.buffer.memorySegment,
237+
arg->value.buffer.sizeInBytes
238+
);
239+
static_cast<OpenCLBuffer *>(bufferState->vendorPtr)->copyFromDevice();
240+
if (INFO){
241+
bufferState->dump("After copy from device");
242+
}
243+
252244
}
253245
}
254-
status = clWaitForEvents(eventc, events);
246+
status = clWaitForEvents(openclQueue->eventc, openclQueue->events);
255247
if (status != CL_SUCCESS) {
256248
std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
257249
exit(1);
258250
}
259-
for (int i = 0; i < eventc; i++) {
260-
status = clReleaseEvent(events[i]);
251+
for (int i = 0; i < openclQueue->eventc; i++) {
252+
status = clReleaseEvent(openclQueue->events[i]);
261253
if (status != CL_SUCCESS) {
262254
std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
263255
exit(1);
264256
}
265257
}
266-
delete[] events;
267-
eventMax = 0;
268-
eventc = 0;
269-
events = nullptr;
258+
// delete[] events;
259+
//eventMax = 0;
260+
openclQueue->eventc = 0;
261+
//events = nullptr;
270262
for (int i = 0; i < argSled.argc(); i++) {
271263
Arg_s *arg = argSled.arg(i);
272264
if (arg->variant == '&') {
273-
delete static_cast<OpenCLBuffer *>(arg->value.buffer.vendorPtr);
274-
arg->value.buffer.vendorPtr = nullptr;
265+
BufferState_s * bufferState = BufferState_s::of(
266+
arg->value.buffer.memorySegment,
267+
arg->value.buffer.sizeInBytes
268+
);
269+
delete static_cast<OpenCLBuffer *>(bufferState->vendorPtr);
270+
bufferState->vendorPtr = nullptr;
271+
if (INFO){
272+
bufferState->dump("After deleting buffer ");
273+
}
275274
}
276275
}
277276
return 0;
@@ -297,7 +296,7 @@ bool OpenCLBackend::OpenCLProgram::programOK() {
297296
}
298297

299298
OpenCLBackend::OpenCLBackend(OpenCLBackend::OpenCLConfig *openclConfig, int configSchemaLen, char *configSchema)
300-
: Backend((Backend::Config *) openclConfig, configSchemaLen, configSchema) {
299+
: Backend((Backend::Config *) openclConfig, configSchemaLen, configSchema, (Backend::Queue *) new OpenCLQueue()) {
301300

302301
if (INFO){
303302
if (openclConfig == nullptr) {
@@ -346,13 +345,14 @@ OpenCLBackend::OpenCLBackend(OpenCLBackend::OpenCLConfig *openclConfig, int conf
346345

347346
cl_command_queue_properties queue_props = CL_QUEUE_PROFILING_ENABLE;
348347

349-
if ((command_queue = clCreateCommandQueue(context, device_ids[0], queue_props, &status)) == NULL ||
348+
if ((dynamic_cast<OpenCLQueue *>(queue)->command_queue = clCreateCommandQueue(context, device_ids[0], queue_props, &status)) == NULL ||
350349
status != CL_SUCCESS) {
351350
clReleaseContext(context);
352351
delete[] platforms;
353352
delete[] device_ids;
354353
return;
355354
}
355+
356356
device_id = device_ids[0];
357357
delete[] device_ids;
358358
delete[] platforms;
@@ -365,15 +365,16 @@ OpenCLBackend::OpenCLBackend()
365365

366366
OpenCLBackend::~OpenCLBackend() {
367367
clReleaseContext(context);
368-
clReleaseCommandQueue(command_queue);
368+
clReleaseCommandQueue(dynamic_cast<OpenCLQueue *>(queue)->command_queue);
369369
}
370370

371371
void OpenCLBackend::OpenCLProgram::OpenCLKernel::showEvents(int width) {
372-
cl_ulong *samples = new cl_ulong[4 * eventc]; // queued, submit, start, end
372+
OpenCLQueue * openclQueue = dynamic_cast<OpenCLQueue *>(program->backend->queue);
373+
cl_ulong *samples = new cl_ulong[4 * openclQueue->eventc]; // queued, submit, start, end
373374
int sample = 0;
374375
cl_ulong min;
375376
cl_ulong max;
376-
for (int event = 0; event < eventc; event++) {
377+
for (int event = 0; event < openclQueue->eventc; event++) {
377378
for (int type = 0; type < 4; type++) {
378379
cl_profiling_info info;
379380
switch (type) {
@@ -391,7 +392,7 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::showEvents(int width) {
391392
break;
392393
}
393394

394-
if ((clGetEventProfilingInfo(events[event], info, sizeof(samples[sample]), &samples[sample], NULL)) !=
395+
if ((clGetEventProfilingInfo(openclQueue->events[event], info, sizeof(samples[sample]), &samples[sample], NULL)) !=
395396
CL_SUCCESS) {
396397
std::cerr << "failed to get profile info " << info << std::endl;
397398
}
@@ -414,7 +415,7 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::showEvents(int width) {
414415
std::cout << "Range: " << range << "(ns)" << std::endl;
415416
std::cout << "Scale: " << scale << " range (ns) per char" << std::endl;
416417

417-
for (int event = 0; event < eventc; event++) {
418+
for (int event = 0; event < openclQueue->eventc; event++) {
418419
cl_ulong queue = (samples[sample++] - min) / scale;
419420
cl_ulong submit = (samples[sample++] - min) / scale;
420421
cl_ulong start = (samples[sample++] - min) / scale;

‎hat/backends/ffi/opencl/include/opencl_backend.h

+10-14
Original file line numberDiff line numberDiff line change
@@ -62,28 +62,31 @@ class OpenCLBackend : public Backend {
6262
public:
6363
boolean gpu;
6464
};
65+
class OpenCLQueue : public Backend::Queue {
66+
public:
67+
size_t eventMax;
68+
cl_event *events;
69+
size_t eventc;
70+
cl_command_queue command_queue;
71+
OpenCLQueue():Backend::Queue(), eventMax(256), events(new cl_event[eventMax]), eventc(0){
72+
}
73+
virtual ~OpenCLQueue(){}
74+
};
6575

6676
class OpenCLProgram : public Backend::Program {
6777
class OpenCLKernel : public Backend::Program::Kernel {
6878

6979
class OpenCLBuffer : public Backend::Program::Kernel::Buffer {
7080
public:
7181
cl_mem clMem;
72-
7382
void copyToDevice();
74-
7583
void copyFromDevice();
76-
7784
OpenCLBuffer(Backend::Program::Kernel *kernel, Arg_s *arg);
78-
7985
virtual ~OpenCLBuffer();
8086
};
8187

8288
private:
8389
cl_kernel kernel;
84-
size_t eventMax;
85-
cl_event *events;
86-
size_t eventc;
8790
protected:
8891
void showEvents(int width);
8992
public:
@@ -110,18 +113,11 @@ class OpenCLBackend : public Backend {
110113
public:
111114
cl_platform_id platform_id;
112115
cl_context context;
113-
cl_command_queue command_queue;
114116
cl_device_id device_id;
115-
116-
117117
OpenCLBackend();
118-
119118
OpenCLBackend(OpenCLConfig *config, int configSchemaLen, char *configSchema);
120-
121119
~OpenCLBackend();
122-
123120
int getMaxComputeUnits();
124-
125121
void info();
126122
void dumpSled(std::ostream &out,void *argArray);
127123
char *dumpSchema(std::ostream &out,int depth, char *ptr, void *data);

‎hat/backends/ffi/shared/cpp/shared.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ void Sled::show(std::ostream &out, void *argArray) {
109109
break;
110110
}
111111
default: {
112-
std::cerr << "unexpected variant '" << (char) arg->variant << "'" << std::endl;
112+
std::cerr << "unexpected variant (shared.cpp) '" << (char) arg->variant << "'" << std::endl;
113113
exit(1);
114114
}
115115
}

0 commit comments

Comments
 (0)
Please sign in to comment.