@@ -43,7 +43,18 @@ OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::OpenCLBuffer(Backend::
43
43
std::cerr << OpenCLBackend::errorMsg (status) << std::endl;
44
44
exit (1 );
45
45
}
46
- arg->value .buffer .vendorPtr = static_cast <void *>(this );
46
+
47
+ BufferState_s * bufferState = BufferState_s::of (
48
+ arg->value .buffer .memorySegment ,
49
+ arg->value .buffer .sizeInBytes
50
+ );
51
+ if (INFO){
52
+ bufferState->dump (" on allocation before assign" );
53
+ }
54
+ bufferState->vendorPtr = static_cast <void *>(this );
55
+ if (INFO){
56
+ bufferState->dump (" after assign " );
57
+ }
47
58
if (INFO){
48
59
std::cout << " created buffer " << std::endl;
49
60
}
@@ -58,15 +69,15 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyToDevice() {
58
69
*/
59
70
auto openclKernel = dynamic_cast <OpenCLKernel *>(kernel);
60
71
auto openclBackend = dynamic_cast <OpenCLBackend *>(openclKernel->program ->backend );
61
- cl_int status = clEnqueueWriteBuffer (openclBackend ->command_queue ,
72
+ cl_int status = clEnqueueWriteBuffer ( dynamic_cast <OpenCLQueue *>(openclKernel-> program -> backend -> queue ) ->command_queue ,
62
73
clMem,
63
74
CL_FALSE,
64
75
0 ,
65
76
arg->value .buffer .sizeInBytes ,
66
77
arg->value .buffer .memorySegment ,
67
- openclKernel->eventc ,
68
- ((openclKernel->eventc == 0 ) ? NULL : openclKernel->events ),
69
- &(openclKernel->events [openclKernel->eventc ]));
78
+ dynamic_cast <OpenCLQueue *>( openclKernel-> program -> backend -> queue ) ->eventc ,
79
+ ((dynamic_cast <OpenCLQueue *>( openclKernel->program -> backend -> queue )-> eventc == 0 ) ? NULL : dynamic_cast <OpenCLQueue *>( openclKernel-> program -> backend -> queue ) ->events ),
80
+ &(dynamic_cast <OpenCLQueue *>( openclKernel->program -> backend -> queue )-> events [dynamic_cast <OpenCLQueue *>( openclKernel-> program -> backend -> queue ) ->eventc ]));
70
81
71
82
72
83
@@ -76,7 +87,7 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyToDevice() {
76
87
std::cerr << OpenCLBackend::errorMsg (status) << std::endl;
77
88
exit (1 );
78
89
}
79
- openclKernel->eventc ++;
90
+ dynamic_cast <OpenCLQueue *>( openclKernel-> program -> backend -> queue ) ->eventc ++;
80
91
if (INFO){
81
92
std::cout << " enqueued buffer copyToDevice " << std::endl;
82
93
}
@@ -85,21 +96,21 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyToDevice() {
85
96
void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyFromDevice () {
86
97
auto openclKernel = dynamic_cast <OpenCLKernel *>(kernel);
87
98
auto openclBackend = dynamic_cast <OpenCLBackend *>(openclKernel->program ->backend );
88
- cl_int status = clEnqueueReadBuffer (openclBackend ->command_queue ,
99
+ cl_int status = clEnqueueReadBuffer ( dynamic_cast <OpenCLQueue *>(openclKernel-> program -> backend -> queue ) ->command_queue ,
89
100
clMem,
90
101
CL_FALSE,
91
102
0 ,
92
103
arg->value .buffer .sizeInBytes ,
93
104
arg->value .buffer .memorySegment ,
94
- openclKernel->eventc ,
95
- ((openclKernel->eventc == 0 ) ? NULL : openclKernel->events ),
96
- &(openclKernel->events [openclKernel->eventc ]));
105
+ dynamic_cast <OpenCLQueue *>( openclKernel-> program -> backend -> queue ) ->eventc ,
106
+ ((dynamic_cast <OpenCLQueue *>( openclKernel->program -> backend -> queue )-> eventc == 0 ) ? NULL : dynamic_cast <OpenCLQueue *>( openclKernel-> program -> backend -> queue ) ->events ),
107
+ &(dynamic_cast <OpenCLQueue *>( openclKernel->program -> backend -> queue )-> events [dynamic_cast <OpenCLQueue *>( openclKernel-> program -> backend -> queue ) ->eventc ]));
97
108
98
109
if (status != CL_SUCCESS) {
99
110
std::cerr << OpenCLBackend::errorMsg (status) << std::endl;
100
111
exit (1 );
101
112
}
102
- openclKernel->eventc ++;
113
+ dynamic_cast <OpenCLQueue *>( openclKernel-> program -> backend -> queue ) ->eventc ++;
103
114
if (INFO){
104
115
std::cout << " enqueued buffer copyFromDevice " << std::endl;
105
116
}
@@ -110,8 +121,7 @@ OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::~OpenCLBuffer() {
110
121
}
111
122
112
123
OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLKernel (Backend::Program *program, char * name, cl_kernel kernel)
113
- : Backend::Program::Kernel(program, name), kernel(kernel), eventMax(0 ), events(nullptr ),
114
- eventc(0 ) {
124
+ : Backend::Program::Kernel(program, name), kernel(kernel){
115
125
}
116
126
117
127
OpenCLBackend::OpenCLProgram::OpenCLKernel::~OpenCLKernel () {
@@ -124,12 +134,13 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
124
134
if (INFO){
125
135
Sled::show (std::cout, argArray);
126
136
}
127
- if (events != nullptr || eventc != 0 ) {
128
- std::cerr << " opencl issue, we might have leaked events!" << std::endl;
129
- }
130
- eventMax = argSled.argc () * 4 + 1 ;
131
- eventc = 0 ;
132
- events = new cl_event[eventMax];
137
+ // if (events != nullptr || eventc != 0) {
138
+ // std::cerr << "opencl issue, we might have leaked events!" << std::endl;
139
+ // }
140
+ // eventMax = argSled.argc() * 4 + 1;
141
+ // eventc = 0;
142
+ // events = new cl_event[eventMax];
143
+ OpenCLQueue *openclQueue = dynamic_cast <OpenCLQueue *>(program->backend ->queue );
133
144
NDRange *ndrange = nullptr ;
134
145
for (int i = 0 ; i < argSled.argc (); i++) {
135
146
Arg_s *arg = argSled.arg (i);
@@ -138,33 +149,6 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
138
149
auto openclBuffer = new OpenCLBuffer (this , arg);
139
150
if (arg->idx == 0 ){
140
151
ndrange = static_cast <NDRange *>(arg->value .buffer .memorySegment );
141
- }else {
142
- IfaceBufferBits_s *ifacebufferbitz = IfaceBufferBits_s::of (
143
- arg->value .buffer .memorySegment ,
144
- arg->value .buffer .sizeInBytes
145
- );
146
- if (ifacebufferbitz->ok ()){
147
- if (INFO){
148
- if (ifacebufferbitz->isJavaDirty ()){
149
- printf (" java dirty (javaDirty:%08x)\n " , ifacebufferbitz->payload .javaDirty );
150
- }else {
151
- printf (" NOT java dirty (javaDirty:%08x)\n " , ifacebufferbitz->payload .javaDirty );
152
- }
153
- if (ifacebufferbitz->isGpuDirty ()){
154
- printf (" gpu dirty (gpuDirty:%08x)\n " , ifacebufferbitz->payload .gpuDirty );
155
- }else {
156
- printf (" NOT gpu dirty (gpuDirty:%08x)\n " , ifacebufferbitz->payload .gpuDirty );
157
- }
158
- }
159
- }else {
160
- printf (" bad magic \n " );
161
- printf (" (magic1:%016lx," , ifacebufferbitz->magic1 );
162
- printf (" javaDirty:%08x," , ifacebufferbitz->payload .javaDirty );
163
- printf (" gpuDirty:%08x," , ifacebufferbitz->payload .gpuDirty );
164
- printf (" unused[0]:%08x," , ifacebufferbitz->payload .unused [0 ]);
165
- printf (" unused[1]:%08x," , ifacebufferbitz->payload .unused [1 ]);
166
- printf (" magic2:%016lx)\n " , ifacebufferbitz->magic2 );
167
- }
168
152
}
169
153
openclBuffer->copyToDevice ();
170
154
cl_int status = clSetKernelArg (kernel, arg->idx , sizeof (cl_mem), &openclBuffer->clMem );
@@ -214,7 +198,7 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
214
198
break ;
215
199
}
216
200
default : {
217
- std::cerr << " unexpected variant " << (char ) arg->variant << std::endl;
201
+ std::cerr << " unexpected variant (ndrange) " << (char ) arg->variant << std::endl;
218
202
exit (1 );
219
203
}
220
204
}
@@ -226,15 +210,15 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
226
210
}
227
211
size_t dims = 1 ;
228
212
cl_int status = clEnqueueNDRangeKernel (
229
- dynamic_cast <OpenCLBackend *>(program-> backend ) ->command_queue ,
213
+ openclQueue ->command_queue ,
230
214
kernel,
231
215
dims,
232
216
nullptr ,
233
217
&globalSize,
234
218
nullptr ,
235
- eventc,
236
- (( eventc == 0 ) ? nullptr : events) ,
237
- &(events[eventc]));
219
+ openclQueue-> eventc ,
220
+ (openclQueue-> eventc == 0 ) ? nullptr : openclQueue-> events ,
221
+ &(openclQueue-> events [openclQueue-> eventc ]));
238
222
if (status != CL_SUCCESS) {
239
223
std::cerr << OpenCLBackend::errorMsg (status) << std::endl;
240
224
exit (1 );
@@ -244,34 +228,49 @@ long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
244
228
std::cout << " globalSize=" << globalSize << " " << std::endl;
245
229
}
246
230
247
- eventc++;
231
+ openclQueue-> eventc ++;
248
232
for (int i = 0 ; i < argSled.argc (); i++) {
249
233
Arg_s *arg = argSled.arg (i);
250
234
if (arg->variant == ' &' ) {
251
- static_cast <OpenCLBuffer *>(arg->value .buffer .vendorPtr )->copyFromDevice ();
235
+ BufferState_s * bufferState = BufferState_s::of (
236
+ arg->value .buffer .memorySegment ,
237
+ arg->value .buffer .sizeInBytes
238
+ );
239
+ static_cast <OpenCLBuffer *>(bufferState->vendorPtr )->copyFromDevice ();
240
+ if (INFO){
241
+ bufferState->dump (" After copy from device" );
242
+ }
243
+
252
244
}
253
245
}
254
- status = clWaitForEvents (eventc, events);
246
+ status = clWaitForEvents (openclQueue-> eventc , openclQueue-> events );
255
247
if (status != CL_SUCCESS) {
256
248
std::cerr << OpenCLBackend::errorMsg (status) << std::endl;
257
249
exit (1 );
258
250
}
259
- for (int i = 0 ; i < eventc; i++) {
260
- status = clReleaseEvent (events[i]);
251
+ for (int i = 0 ; i < openclQueue-> eventc ; i++) {
252
+ status = clReleaseEvent (openclQueue-> events [i]);
261
253
if (status != CL_SUCCESS) {
262
254
std::cerr << OpenCLBackend::errorMsg (status) << std::endl;
263
255
exit (1 );
264
256
}
265
257
}
266
- delete[] events;
267
- eventMax = 0 ;
268
- eventc = 0 ;
269
- events = nullptr ;
258
+ // delete[] events;
259
+ // eventMax = 0;
260
+ openclQueue-> eventc = 0 ;
261
+ // events = nullptr;
270
262
for (int i = 0 ; i < argSled.argc (); i++) {
271
263
Arg_s *arg = argSled.arg (i);
272
264
if (arg->variant == ' &' ) {
273
- delete static_cast <OpenCLBuffer *>(arg->value .buffer .vendorPtr );
274
- arg->value .buffer .vendorPtr = nullptr ;
265
+ BufferState_s * bufferState = BufferState_s::of (
266
+ arg->value .buffer .memorySegment ,
267
+ arg->value .buffer .sizeInBytes
268
+ );
269
+ delete static_cast <OpenCLBuffer *>(bufferState->vendorPtr );
270
+ bufferState->vendorPtr = nullptr ;
271
+ if (INFO){
272
+ bufferState->dump (" After deleting buffer " );
273
+ }
275
274
}
276
275
}
277
276
return 0 ;
@@ -297,7 +296,7 @@ bool OpenCLBackend::OpenCLProgram::programOK() {
297
296
}
298
297
299
298
OpenCLBackend::OpenCLBackend (OpenCLBackend::OpenCLConfig *openclConfig, int configSchemaLen, char *configSchema)
300
- : Backend((Backend::Config *) openclConfig, configSchemaLen, configSchema) {
299
+ : Backend((Backend::Config *) openclConfig, configSchemaLen, configSchema, (Backend::Queue *) new OpenCLQueue() ) {
301
300
302
301
if (INFO){
303
302
if (openclConfig == nullptr ) {
@@ -346,13 +345,14 @@ OpenCLBackend::OpenCLBackend(OpenCLBackend::OpenCLConfig *openclConfig, int conf
346
345
347
346
cl_command_queue_properties queue_props = CL_QUEUE_PROFILING_ENABLE;
348
347
349
- if ((command_queue = clCreateCommandQueue (context, device_ids[0 ], queue_props, &status)) == NULL ||
348
+ if ((dynamic_cast <OpenCLQueue *>(queue)-> command_queue = clCreateCommandQueue (context, device_ids[0 ], queue_props, &status)) == NULL ||
350
349
status != CL_SUCCESS) {
351
350
clReleaseContext (context);
352
351
delete[] platforms;
353
352
delete[] device_ids;
354
353
return ;
355
354
}
355
+
356
356
device_id = device_ids[0 ];
357
357
delete[] device_ids;
358
358
delete[] platforms;
@@ -365,15 +365,16 @@ OpenCLBackend::OpenCLBackend()
365
365
366
366
OpenCLBackend::~OpenCLBackend () {
367
367
clReleaseContext (context);
368
- clReleaseCommandQueue (command_queue);
368
+ clReleaseCommandQueue (dynamic_cast <OpenCLQueue *>(queue)-> command_queue );
369
369
}
370
370
371
371
void OpenCLBackend::OpenCLProgram::OpenCLKernel::showEvents (int width) {
372
- cl_ulong *samples = new cl_ulong[4 * eventc]; // queued, submit, start, end
372
+ OpenCLQueue * openclQueue = dynamic_cast <OpenCLQueue *>(program->backend ->queue );
373
+ cl_ulong *samples = new cl_ulong[4 * openclQueue->eventc ]; // queued, submit, start, end
373
374
int sample = 0 ;
374
375
cl_ulong min;
375
376
cl_ulong max;
376
- for (int event = 0 ; event < eventc; event++) {
377
+ for (int event = 0 ; event < openclQueue-> eventc ; event++) {
377
378
for (int type = 0 ; type < 4 ; type++) {
378
379
cl_profiling_info info;
379
380
switch (type) {
@@ -391,7 +392,7 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::showEvents(int width) {
391
392
break ;
392
393
}
393
394
394
- if ((clGetEventProfilingInfo (events[event], info, sizeof (samples[sample]), &samples[sample], NULL )) !=
395
+ if ((clGetEventProfilingInfo (openclQueue-> events [event], info, sizeof (samples[sample]), &samples[sample], NULL )) !=
395
396
CL_SUCCESS) {
396
397
std::cerr << " failed to get profile info " << info << std::endl;
397
398
}
@@ -414,7 +415,7 @@ void OpenCLBackend::OpenCLProgram::OpenCLKernel::showEvents(int width) {
414
415
std::cout << " Range: " << range << " (ns)" << std::endl;
415
416
std::cout << " Scale: " << scale << " range (ns) per char" << std::endl;
416
417
417
- for (int event = 0 ; event < eventc; event++) {
418
+ for (int event = 0 ; event < openclQueue-> eventc ; event++) {
418
419
cl_ulong queue = (samples[sample++] - min) / scale;
419
420
cl_ulong submit = (samples[sample++] - min) / scale;
420
421
cl_ulong start = (samples[sample++] - min) / scale;
0 commit comments