As a follow up to my setup issues, I think this is the relevant subtopic to post it and here is the full error stack.
vgg = Vgg16()
# Grab a few images at a time for training and validation.
# NB: They must be in subdirectories named based on their category
batches = vgg.get_batches(path+'train', batch_size=batch_size)
val_batches = vgg.get_batches(path+'valid', batch_size=batch_size*2)
vgg.finetune(batches)
vgg.fit(batches, val_batches, nb_epoch=1)
###Output & Error message
Found 40 images belonging to 2 classes.
['nvcc', '-shared', '-O3', '-Xlinker', '-rpath,/usr/local/cuda/lib64', '-arch=sm_61', '-m64', '-Xcompiler', '-fno-math-errno,-Wno-unused-label,-Wno-unused-variable,-Wno-write-strings,-DCUDA_NDARRAY_CUH=c72d035fdf91890f3b36710688069b2e,-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION,-fPIC,-fvisibility=hidden', '-Xlinker', '-rpath,/home/ra/.theano/compiledir_Linux-4.8--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.13-64/cuda_ndarray', '-I/home/ra/.theano/compiledir_Linux-4.8--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.13-64/cuda_ndarray', '-I/usr/local/cuda/include', '-I/opt/anaconda/lib/python2.7/site-packages/theano/sandbox/cuda', '-I/opt/anaconda/lib/python2.7/site-packages/numpy/core/include', '-I/opt/anaconda/include/python2.7', '-I/opt/anaconda/lib/python2.7/site-packages/theano/gof', '-L/home/ra/.theano/compiledir_Linux-4.8--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.13-64/cuda_ndarray', '-L/opt/anaconda/lib', '-o', '/home/ra/.theano/compiledir_Linux-4.8--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.13-64/tmpbDHDIA/ea4e203b6529466794536f8a1bfa77ae.so', 'mod.cu', '-lcudart', '-lcublas', '-lcuda_ndarray', '-lcudnn', '-lpython2.7']
1 #include <Python.h>
2 #include <iostream>
3 #include "theano_mod_helper.h"
4 #include "cuda_ndarray.cuh"
5 #include <math.h>
6 #include <numpy/arrayobject.h>
7 #include <numpy/arrayscalars.h>
8 #include "cudnn.h"
9 #include "cudnn_helper.h"
10 //////////////////////
11 //// Support Code
12 //////////////////////
13
14 void _capsule_destructor(PyObject *o) {
15 void *d = PyCapsule_GetContext(o);
16 void *p = PyCapsule_GetPointer(o, NULL);
17 void (*f)(void *) = (void (*)(void *))d;
18 if (f != NULL) f(p);
19 }
20
21
22 static cudnnHandle_t _handle = NULL;
23
24
25 static int
26 c_set_tensorNd(CudaNdarray *var, cudnnTensorDescriptor_t desc) {
27
28 int dim = CudaNdarray_NDIM(var);
29 int *strides = (int *)malloc(dim * sizeof(int));
30 int default_str = 1;
31 int return_value = 0;
32
33 if (strides != NULL) {
34 for (int i = dim-1; i >= 0; i--)
35 {
36 if (CudaNdarray_HOST_STRIDES(var)[i])
37 strides[i] = CudaNdarray_HOST_STRIDES(var)[i];
38 else
39 strides[i] = default_str;
40 default_str *= CudaNdarray_HOST_DIMS(var)[i];
41 }
42
43 cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, CUDNN_DATA_FLOAT, dim,
44 CudaNdarray_HOST_DIMS(var),
45 strides);
46
47
48 if (err != CUDNN_STATUS_SUCCESS) {
49 PyErr_Format(PyExc_RuntimeError,
50 "Could not set tensorNd descriptor: %s"
51 "dim=%d",
52 cudnnGetErrorString(err), dim);
53
54 return_value = -1;
55 }
56 } else {
57 PyErr_Format(PyExc_MemoryError,
58 "Could not allocate memory for strides array of size %d.",
59 dim);
60
61 return_value = -1;
62 }
63
64 free(strides);
65 return return_value;
66 }
67
68
69 static int
70 c_set_filterNd(CudaNdarray *var, cudnnFilterDescriptor_t desc) {
71 if (!CudaNdarray_is_c_contiguous(var)) {
72 PyErr_SetString(PyExc_ValueError,
73 "Only contiguous filters (kernels) are supported.");
74 return -1;
75 }
76 int dim = CudaNdarray_NDIM(var);
77 cudnnStatus_t err = cudnnSetFilterNdDescriptor_v4(desc,
78 CUDNN_DATA_FLOAT,
79 CUDNN_TENSOR_NCHW,
80 dim,
81 CudaNdarray_HOST_DIMS(var));
82 if (err != CUDNN_STATUS_SUCCESS) {
83 PyErr_Format(PyExc_RuntimeError,
84 "Could not set filter descriptor: %s."
85 " dims= %d",
86 cudnnGetErrorString(err), dim);
87 return -1;
88 }
89 return 0;
90 }
91
92
93
94 namespace {
95 struct __struct_compiled_op_ea4e203b6529466794536f8a1bfa77ae {
96 PyObject* __ERROR;
97
98 PyObject* storage_V3;
99 PyObject* storage_V5;
100 PyObject* storage_V7;
101 PyObject* storage_V9;
102 PyObject* storage_V11;
103 PyObject* storage_V13;
104 PyObject* storage_V1;
105
106 #define DTYPE_INPUT_0 npy_float32
107 #define TYPENUM_INPUT_0 11
108 #define ITEMSIZE_INPUT_0 4
109 #define DTYPE_INPUT_1 npy_float32
110 #define TYPENUM_INPUT_1 11
111 #define ITEMSIZE_INPUT_1 4
112 #define DTYPE_INPUT_2 npy_float32
113 #define TYPENUM_INPUT_2 11
114 #define ITEMSIZE_INPUT_2 4
115 #define DTYPE_INPUT_4 npy_float32
116 #define TYPENUM_INPUT_4 11
117 #define ITEMSIZE_INPUT_4 4
118 #define DTYPE_INPUT_5 npy_float32
119 #define TYPENUM_INPUT_5 11
120 #define ITEMSIZE_INPUT_5 4
121 #define DTYPE_OUTPUT_0 npy_float32
122 #define TYPENUM_OUTPUT_0 11
123 #define ITEMSIZE_OUTPUT_0 4
124 #define APPLY_SPECIFIC(str) str##_node_ea4e203b6529466794536f8a1bfa77ae_0
125 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
126 #define CHOOSE_ALGO 0
127 #define CHOOSE_ALGO_ONCE 0
128 #define CHOOSE_ALGO_TIME 0
129 #define CONV_INPLACE 1
130
131 cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
132 cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
133 cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns);
134
135 /* Keep track, from one execution to another, of the dimension of the data
136 and the algorithms, if any, that were selected according to these dimensions
137 and according to the amount of memory available at that time.
138
139 Note : Implementation selection for backward convolution only exists starting
140 at V3.
141 */
142 int APPLY_SPECIFIC(previous_input_shape)[5];
143 int APPLY_SPECIFIC(previous_kerns_shape)[5];
144 int APPLY_SPECIFIC(previous_output_shape)[5];
145 bool APPLY_SPECIFIC(previous_algo_set);
146 cudnnConvolutionFwdAlgo_t APPLY_SPECIFIC(previous_algo);
147 cudnnConvolutionBwdFilterAlgo_t APPLY_SPECIFIC(previous_bwd_f_algo);
148 cudnnConvolutionBwdDataAlgo_t APPLY_SPECIFIC(previous_bwd_d_algo);
149
150
151
152 int
153 APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
154 CudaNdarray *om, cudnnConvolutionDescriptor_t desc,
155 float alpha, float beta, CudaNdarray **output) {
156
157 cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
158 if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) {
159 PyErr_SetString(PyExc_ValueError,
160 "GpuDnnConv images and kernel must have the same stack size\n");
161 return 1;
162 }
163
164 int nb_dim = CudaNdarray_NDIM(input);
165
166 #ifdef CONV_INPLACE
167 Py_XDECREF(*output);
168 *output = om;
169 Py_INCREF(*output);
170 #else
171 if (CudaNdarray_prep_output(output, nb_dim, CudaNdarray_HOST_DIMS(om)) != 0)
172 return 1;
173 if (beta != 0.0 && CudaNdarray_CopyFromCudaNdarray(*output, om))
174 return 1;
175 #endif
176
177 if (CudaNdarray_DIMS(input)[0] == 0 || CudaNdarray_DIMS(kerns)[0] == 0 || CudaNdarray_DIMS(kerns)[1] == 0) {
178 cudaError_t err2 = cudaMemset((*output)->devdata, 0,
179 CudaNdarray_SIZE(*output) * sizeof(real));
180 if (err2 != cudaSuccess) {
181 PyErr_Format(PyExc_RuntimeError,
182 "GpuDnnConv could not fill the output with zeros: %s",
183 cudaGetErrorString(err2));
184 return 1;
185 }
186 return 0;
187 }
188
189 if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
190 return 1;
191 if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
192 return 1;
193 if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1)
194 return 1;
195
196 {
197 size_t worksize;
198 void *workspace;
199 cudnnConvolutionFwdAlgo_t chosen_algo;
200
201
202 if (CHOOSE_ALGO)
203 {
204
205 // A new convolution implementation should be selected, based either on
206 // timing or heuristics if in one of the two following cases :
207 // - The implementation should only be chosen during the first execution
208 // of an apply node and this is the first execution of the apply node.
209 // - The implementation should be chosen as often as necessary and the
210 // shapes of the inputs differ from the last time an implementation
211 // was chosen.
212 bool reuse_previous_algo;
213 if (CHOOSE_ALGO_ONCE)
214 {
215 // Only choose a new implementation of none has been chosen before.
216 reuse_previous_algo = APPLY_SPECIFIC(previous_algo_set);
217 }
218 else
219 {
220 // Reuse the previous implementation if the inputs and the kernels
221 // have the same shapes as they had when the previous implementation
222 // was selected
223 bool same_shapes = true;
224 for (int i = 0; (i < nb_dim) && same_shapes; i++)
225 {
226 same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] ==
227 APPLY_SPECIFIC(previous_input_shape)[i]);
228 same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] ==
229 APPLY_SPECIFIC(previous_kerns_shape)[i]);
230 }
231 reuse_previous_algo = same_shapes;
232 }
233
234 // If the previously choosen implementation can't be reused, select a
235 // new one based on the shapes of the current inputs
236 if (!reuse_previous_algo)
237 {
238
239 // Obtain a convolution algorithm appropriate for the input and kernel
240 // shapes. Either by choosing one according to heuristics or by making
241 // cuDNN time every implementation and choose the best one.
242 if (CHOOSE_ALGO_TIME)
243 {
244 // Time the different implementations to choose the best one
245 int requestedCount = 1;
246 int count;
247 cudnnConvolutionFwdAlgoPerf_t choosen_algo_perf;
248 err = cudnnFindConvolutionForwardAlgorithm(_handle,
249 APPLY_SPECIFIC(input),
250 APPLY_SPECIFIC(kerns),
251 desc,
252 APPLY_SPECIFIC(output),
253 requestedCount,
254 &count,
255 &choosen_algo_perf);
256 if (err != CUDNN_STATUS_SUCCESS) {
257 PyErr_Format(PyExc_RuntimeError,
258 "GpuDnnConv: error selecting convolution algo: %s",
259 cudnnGetErrorString(err));
260 return 1;
261 }
262
263 chosen_algo = choosen_algo_perf.algo;
264 }
265 else
266 {
267 // The implementation should be chosen using heuristics based on the
268 // input shapes and the amount of memory available.
269
270 // Get the amount of available memory
271 size_t free = 0, total = 0;
272 cudaError_t err2 = cudaMemGetInfo(&free, &total);
273 if (err2 != cudaSuccess){
274 cudaGetLastError();
275 fprintf(stderr,
276 "Error when trying to find the memory information"
277 " on the GPU: %s\n", cudaGetErrorString(err2));
278 return 1;
279 }
280
281 // Use heuristics to choose the implementation
282 err = cudnnGetConvolutionForwardAlgorithm(_handle,
283 APPLY_SPECIFIC(input),
284 APPLY_SPECIFIC(kerns),
285 desc,
286 APPLY_SPECIFIC(output),
287 CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
288 free,
289 &chosen_algo);
290
291 if (err != CUDNN_STATUS_SUCCESS) {
292 PyErr_Format(PyExc_RuntimeError,
293 "GpuDnnConv: error selecting convolution algo: %s",
294 cudnnGetErrorString(err));
295 return 1;
296 }
297 }
298
299 // Store the shapes of the inputs and kernels as well as the chosen
300 // algorithm for future use.
301 APPLY_SPECIFIC(previous_algo) = chosen_algo;
302 APPLY_SPECIFIC(previous_algo_set) = true;
303 for (int i = 0; i < nb_dim; i++)
304 {
305 APPLY_SPECIFIC(previous_input_shape)[i] =
306 CudaNdarray_HOST_DIMS(input)[i];
307 APPLY_SPECIFIC(previous_kerns_shape)[i] =
308 CudaNdarray_HOST_DIMS(kerns)[i];
309 }
310 }
311 else
312 {
313 // Reuse the previously chosen convolution implementation
314 chosen_algo = APPLY_SPECIFIC(previous_algo);
315 }
316 }
317 else
318 {
319 chosen_algo = CONV_ALGO;
320 }
321
322 if (0){
323 char * a;
324 switch(chosen_algo){
325 case CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM:
326 a = "implicit gemm (0)";
327 break;
328 case CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM:
329 a = "precomp gemm (1)";
330 break;
331 case CUDNN_CONVOLUTION_FWD_ALGO_GEMM:
332 a = "gemm (2)";
333 break;
334 case CUDNN_CONVOLUTION_FWD_ALGO_DIRECT:
335 a = "direct (3)";
336 break;
337 case CUDNN_CONVOLUTION_FWD_ALGO_FFT:
338 a = "fft (4)";
339 break;
340 case CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING:
341 a = "fft tiling (5)";
342 break;
343 #if CUDNN_VERSION > 5000
344 case CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD:
345 a = "winograd (6)";
346 break;
347 #endif
348 }
349 printf("GpuDNNConv: algo %s\n", a);
350 }
351
352 // The FFT implementation (only in V3 and onward) does not support strides,
353 // 1x1 filters or inputs with a spatial dimension larger than 1024.
354 // The tiled-FFT implementation (only in V4 onward) does not support
355 // strides.
356 // If the chosen implementation is FFT or tiled-FFT, validate that it can
357 // be used on the current data and default on a safe implementation if it
358 // can't.
359 // Following code is 2d-specific, but it is fine as FFT and tiled-FFT are
360 // defined only for 2d-filters
361 if ((chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT ||
362 chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && nb_dim == 4)
363 {
364
365 // Extract the properties of the convolution descriptor
366 int nd;
367 int pad[2];
368 int stride[2];
369 int upscale[2];
370 cudnnConvolutionMode_t mode;
371 cudnnDataType_t data_type;
372 err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
373 upscale, &mode, &data_type);
374
375 if (err != CUDNN_STATUS_SUCCESS) {
376 PyErr_Format(PyExc_RuntimeError,
377 "GpuDnnConv: error getting convolution properties: %s",
378 cudnnGetErrorString(err));
379 return 1;
380 }
381
382 // Extract the spatial size of the filters
383 int filter_h = CudaNdarray_HOST_DIMS(kerns)[2];
384 int filter_w = CudaNdarray_HOST_DIMS(kerns)[3];
385
386 // Extract the spatial size of the input
387 int input_h = CudaNdarray_HOST_DIMS(input)[2];
388 int input_w = CudaNdarray_HOST_DIMS(input)[3];
389
390 // Ensure that the selected implementation supports the requested
391 // convolution. Fall back to a safe implementation otherwise.
392 if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
393 {
394 if (stride[0] != 1 || stride[1] != 1 || input_h > 1024 ||
395 input_w > 1024 || (filter_h == 1 && filter_w == 1))
396 {
397 chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
398 }
399 }
400 else
401 {
402 // chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
403 if (stride[0] != 1 || stride[1] != 1)
404 {
405 chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
406 }
407 }
408 }
409
410 err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
411 APPLY_SPECIFIC(input),
412 APPLY_SPECIFIC(kerns),
413 desc,
414 APPLY_SPECIFIC(output),
415 chosen_algo,
416 &worksize);
417 if (err == CUDNN_STATUS_NOT_SUPPORTED) {
418 // Fallback to none algo if not supported
419 // TODO: Print a warning
420 chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
421
422 err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
423 APPLY_SPECIFIC(input),
424 APPLY_SPECIFIC(kerns),
425 desc,
426 APPLY_SPECIFIC(output),
427 chosen_algo,
428 &worksize);
429 }
430 if (err != CUDNN_STATUS_SUCCESS) {
431 PyErr_Format(PyExc_RuntimeError,
432 "GpuDnnConv: error getting worksize: %s",
433 cudnnGetErrorString(err));
434 return 1;
435 }
436 workspace = get_work_mem(worksize);
437 if (workspace == NULL && worksize != 0)
438 return 1;
439
440 err = cudnnConvolutionForward(
441 _handle,
442 (void *)&alpha,
443 APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
444 APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns),
445 desc,
446 chosen_algo,
447 workspace, worksize,
448 (void *)&beta,
449 APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(*output));
450 }
451 if (err != CUDNN_STATUS_SUCCESS) {
452 PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s",
453 cudnnGetErrorString(err));
454 return 1;
455 }
456 return 0;
457 }
458
459 #undef DTYPE_INPUT_0
460 #undef TYPENUM_INPUT_0
461 #undef ITEMSIZE_INPUT_0
462 #undef DTYPE_INPUT_1
463 #undef TYPENUM_INPUT_1
464 #undef ITEMSIZE_INPUT_1
465 #undef DTYPE_INPUT_2
466 #undef TYPENUM_INPUT_2
467 #undef ITEMSIZE_INPUT_2
468 #undef DTYPE_INPUT_4
469 #undef TYPENUM_INPUT_4
470 #undef ITEMSIZE_INPUT_4
471 #undef DTYPE_INPUT_5
472 #undef TYPENUM_INPUT_5
473 #undef ITEMSIZE_INPUT_5
474 #undef DTYPE_OUTPUT_0
475 #undef TYPENUM_OUTPUT_0
476 #undef ITEMSIZE_OUTPUT_0
477 #undef APPLY_SPECIFIC
478 #undef CONV_ALGO
479 #undef CHOOSE_ALGO
480 #undef CHOOSE_ALGO_ONCE
481 #undef CHOOSE_ALGO_TIME
482 #undef CONV_INPLACE
483
484 __struct_compiled_op_ea4e203b6529466794536f8a1bfa77ae() {
485 // This is only somewhat safe because we:
486 // 1) Are not a virtual class
487 // 2) Do not use any virtual classes in the members
488 // 3) Deal with mostly POD and pointers
489
490 // If this changes, we would have to revise this, but for
491 // now I am tired of chasing segfaults because
492 // initialization code had an error and some pointer has
493 // a junk value.
494 memset(this, 0, sizeof(*this));
495 }
496 ~__struct_compiled_op_ea4e203b6529466794536f8a1bfa77ae(void) {
497 cleanup();
498 }
499
500 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V9, PyObject* storage_V11, PyObject* storage_V13, PyObject* storage_V1) {
501 Py_XINCREF(storage_V3);
502 Py_XINCREF(storage_V5);
503 Py_XINCREF(storage_V7);
504 Py_XINCREF(storage_V9);
505 Py_XINCREF(storage_V11);
506 Py_XINCREF(storage_V13);
507 Py_XINCREF(storage_V1);
508 this->storage_V3 = storage_V3;
509 this->storage_V5 = storage_V5;
510 this->storage_V7 = storage_V7;
511 this->storage_V9 = storage_V9;
512 this->storage_V11 = storage_V11;
513 this->storage_V13 = storage_V13;
514 this->storage_V1 = storage_V1;
515
516
517
518
519
520
521
522
523
524 #define DTYPE_INPUT_0 npy_float32
525 #define TYPENUM_INPUT_0 11
526 #define ITEMSIZE_INPUT_0 4
527 #define DTYPE_INPUT_1 npy_float32
528 #define TYPENUM_INPUT_1 11
529 #define ITEMSIZE_INPUT_1 4
530 #define DTYPE_INPUT_2 npy_float32
531 #define TYPENUM_INPUT_2 11
532 #define ITEMSIZE_INPUT_2 4
533 #define DTYPE_INPUT_4 npy_float32
534 #define TYPENUM_INPUT_4 11
535 #define ITEMSIZE_INPUT_4 4
536 #define DTYPE_INPUT_5 npy_float32
537 #define TYPENUM_INPUT_5 11
538 #define ITEMSIZE_INPUT_5 4
539 #define DTYPE_OUTPUT_0 npy_float32
540 #define TYPENUM_OUTPUT_0 11
541 #define ITEMSIZE_OUTPUT_0 4
542 #define APPLY_SPECIFIC(str) str##_node_ea4e203b6529466794536f8a1bfa77ae_0
543 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
544 #define CHOOSE_ALGO 0
545 #define CHOOSE_ALGO_ONCE 0
546 #define CHOOSE_ALGO_TIME 0
547 #define CONV_INPLACE 1
548 #define FAIL { \
549 if (!PyErr_Occurred()) { \
550 PyErr_SetString(PyExc_RuntimeError, \
551 "Unexpected error in an Op's C code. " \
552 "No Python exception was set."); \
553 } \
554 return 15; \
555
/opt/anaconda/lib/python2.7/site-packages/theano/gof/cmodule.pyc in module_from_key(self, key, lnk, keep_lock)
1157 try:
1158 location = dlimport_workdir(self.dirname)
-> 1159 module = lnk.compile_cmodule(location)
1160 name = module.__file__
1161 assert name.startswith(location)
/opt/anaconda/lib/python2.7/site-packages/theano/gof/cc.pyc in compile_cmodule(self, location)
1487 lib_dirs=self.lib_dirs(),
1488 libs=libs,
-> 1489 preargs=preargs)
1490 except Exception as e:
1491 e.args += (str(self.fgraph),)
/opt/anaconda/lib/python2.7/site-packages/theano/sandbox/cuda/nvcc_compiler.pyc in compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, rpaths, py_module, hide_symbols)
403 print(cmd)
404 raise Exception('nvcc return status', p.returncode,
--> 405 'for cmd', ' '.join(cmd))
406 elif config.cmodule.compilation_warning and nvcc_stdout:
407 print(nvcc_stdout)
Exception: ('The following error happened while compiling the node', GpuDnnConv{algo='small', inplace=True}(GpuContiguous.0, GpuContiguous.0, GpuAllocEmpty.0, GpuDnnConvDesc{border_mode='valid', subsample=(1, 1), conv_mode='conv', precision='float32'}.0, Constant{1.0}, Constant{0.0}), '\n', 'nvcc return status', 2, 'for cmd', 'nvcc -shared -O3 -Xlinker -rpath,/usr/local/cuda/lib64 -arch=sm_61 -m64 -Xcompiler -fno-math-errno,-Wno-unused-label,-Wno-unused-variable,-Wno-write-strings,-DCUDA_NDARRAY_CUH=c72d035fdf91890f3b36710688069b2e,-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION,-fPIC,-fvisibility=hidden -Xlinker -rpath,/home/ra/.theano/compiledir_Linux-4.8--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.13-64/cuda_ndarray -I/home/ra/.theano/compiledir_Linux-4.8--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.13-64/cuda_ndarray -I/usr/local/cuda/include -I/opt/anaconda/lib/python2.7/site-packages/theano/sandbox/cuda -I/opt/anaconda/lib/python2.7/site-packages/numpy/core/include -I/opt/anaconda/include/python2.7 -I/opt/anaconda/lib/python2.7/site-packages/theano/gof -L/home/ra/.theano/compiledir_Linux-4.8--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.13-64/cuda_ndarray -L/opt/anaconda/lib -o /home/ra/.theano/compiledir_Linux-4.8--generic-x86_64-with-debian-stretch-sid-x86_64-2.7.13-64/tmpbDHDIA/ea4e203b6529466794536f8a1bfa77ae.so mod.cu -lcudart -lcublas -lcuda_ndarray -lcudnn -lpython2.7', "[GpuDnnConv{algo='small', inplace=True}(<CudaNdarrayType(float32, 4D)>, <CudaNdarrayType(float32, 4D)>, <CudaNdarrayType(float32, 4D)>, <CDataType{cudnnConvolutionDescriptor_t}>, Constant{1.0}, Constant{0.0})]")
End of Error/Exception
This is with python 2.7, keras 1.1.2
Donât know how to fix this. Sorry for the repeated posts Need help @jeremy