Lesson 11 discussion and wiki

It’s the same but a little faster!

import torch
data = torch.randn(300)
grad_data = torch.randn(300)
lr = 1e-3

d1 = data.add(-lr, grad_data)  # timeit -> 3.16 µs ± 105 ns per loop
d2 = data.add(-lr*grad_data)  # timeit -> 9.52 µs ± 172 ns per loop
test_eq(d1, d2)  # doesnt raise
3 Likes

In 08_data_block.ipynb, the line

x,y = next(iter(train_dl))

throws a BrokenPipeError: [Errno 32] Broken pipe

with the following trace: (see below)

Does anyone know how to deal with this error?

Thanks!


BrokenPipeError Traceback (most recent call last)
in
----> 1 x,y = next(iter(train_dl))

~\Anaconda3\envs\fastai\lib\site-packages\torch\utils\data\dataloader.py in iter(self)
160
161 def iter(self):
–> 162 return _DataLoaderIter(self)
163
164 def len(self):

~\Anaconda3\envs\fastai\lib\site-packages\torch\utils\data\dataloader.py in init(self, loader)
436 # before it starts, and del tries to join but will get:
437 # AssertionError: can only join a started process.
–> 438 w.start()
439 self.index_queues.append(index_queue)
440 self.workers.append(w)

~\Anaconda3\envs\fastai\lib\multiprocessing\process.py in start(self)
110 ‘daemonic processes are not allowed to have children’
111 _cleanup()
–> 112 self._popen = self._Popen(self)
113 self._sentinel = self._popen.sentinel
114 # Avoid a refcycle if the target function holds an indirect

~\Anaconda3\envs\fastai\lib\multiprocessing\context.py in _Popen(process_obj)
221 @staticmethod
222 def _Popen(process_obj):
–> 223 return _default_context.get_context().Process._Popen(process_obj)
224
225 class DefaultContext(BaseContext):

~\Anaconda3\envs\fastai\lib\multiprocessing\context.py in _Popen(process_obj)
320 def _Popen(process_obj):
321 from .popen_spawn_win32 import Popen
–> 322 return Popen(process_obj)
323
324 class SpawnContext(BaseContext):

~\Anaconda3\envs\fastai\lib\multiprocessing\popen_spawn_win32.py in init(self, process_obj)
63 try:
64 reduction.dump(prep_data, to_child)
—> 65 reduction.dump(process_obj, to_child)
66 finally:
67 set_spawning_popen(None)

~\Anaconda3\envs\fastai\lib\multiprocessing\reduction.py in dump(obj, file, protocol)
58 def dump(obj, file, protocol=None):
59 ‘’‘Replacement for pickle.dump() using ForkingPickler.’’’
—> 60 ForkingPickler(file, protocol).dump(obj)
61
62 #

BrokenPipeError: [Errno 32] Broken pipe

Set num_workers=0 rather than 4 and you should be good. I had to do that on Windows too.

2 Likes

Thanks Brian, that worked.

@jcatanza @brismith sounds like something isn’t picklable. If you find out what it is, and find a workaround for Windows users so that multiprocessing works, let us know!

1 Like

I’m trying to wrap my head around 07a_lsuv.ipynb:

    while mdl(xb) is not None and abs(h.mean)  > 1e-3: m.bias -= h.mean
    while mdl(xb) is not None and abs(h.std-1) > 1e-3: m.weight.data /= h.std

the nb annotation says:

Note that the mean doesn’t exactly stay at 0. since we change the standard deviation after by scaling the weight.

It wasn’t helping me to understand why this was so, since, normally dividing by something shouldn’t affect mean.

But since we have ReLU, after the std adjustment changed conv2d weights leading to different output, so this adjustment will impact what ReLU will let through and thus the mean will be different.

But, wait, let’s flip the 2 and adjust the fixed bias post ReLU after changing the std:

    while mdl(xb) is not None and abs(h.std-1) > 1e-3: m.weight.data /= h.std
    while mdl(xb) is not None and abs(h.mean)  > 1e-3: m.bias -= h.mean

and voila now we get mean=0, std=1:

for m in mods: print(lsuv_module(m, xb))
(2.9802322387695312e-08, 1.0000479221343994)
(6.99442237461767e-09, 0.9994299411773682)
(-4.6566128730773926e-09, 0.9994862079620361)
(-2.7939677238464355e-08, 0.9999381303787231)
(-1.1175870895385742e-08, 1.00041663646698)

The change works, since the sub shift happens after ReLU:

class GeneralRelu(nn.Module):
    [...]
    def forward(self, x):
        x = F.relu(x) # removed leaky_relu since in this nb it is not used
        if self.sub is not None: x.sub_(self.sub)

p.s. should those while loops check that they don’t end up being an infinite loop?

9 Likes

The 10_transforms.ipynb notebook throws a ValueError: empty range for randrange()

at the line

show_batch(next(iter(dl)))

in the “Random crop” section (please see below).

And a the same error occurred again in the cell under “Perspective warping” with

x = next(iter(dl))
show_batch(x)

Anyone know how to fix this? I am running Windows 10 64-bit.
Thanks!

ValueError Traceback (most recent call last)
in
----> 1 show_batch(next(iter(dl)))

~\Anaconda3\envs\fastai\lib\site-packages\torch\utils\data\dataloader.py in next(self)
527 if self.num_workers == 0: # same-process loading
528 indices = next(self.sample_iter) # may raise StopIteration
–> 529 batch = self.collate_fn([self.dataset[i] for i in indices])
530 if self.pin_memory:
531 batch = _utils.pin_memory.pin_memory_batch(batch)

~\Anaconda3\envs\fastai\lib\site-packages\torch\utils\data\dataloader.py in (.0)
527 if self.num_workers == 0: # same-process loading
528 indices = next(self.sample_iter) # may raise StopIteration
–> 529 batch = self.collate_fn([self.dataset[i] for i in indices])
530 if self.pin_memory:
531 batch = _utils.pin_memory.pin_memory_batch(batch)

~\fastai\fastai_docs\dev_course\dl2_jcat\exp\nb_08.py in getitem(self, idx)
54 res = super().getitem(idx)
55 if isinstance(res,list): return [self._get(o) for o in res]
—> 56 return self._get(res)
57
58 class ImageList(ItemList):

~\fastai\fastai_docs\dev_course\dl2_jcat\exp\nb_08.py in _get(self, i)
49
50 def get(self, i): return i
—> 51 def _get(self, i): return compose(self.get(i), self.tfms)
52
53 def getitem(self, idx):

~\fastai\fastai_docs\dev_course\dl2_jcat\exp\nb_08.py in compose(x, funcs, order_key, *args, **kwargs)
37 def compose(x, funcs, *args, order_key=’_order’, **kwargs):
38 key = lambda o: getattr(o, order_key, 0)
—> 39 for f in sorted(listify(funcs), key=key): x = f(x, **kwargs)
40 return x
41

in call(self, x)
15 def call(self, x):
16 csize = default_crop_size(*x.size) if self.crop_size is None else self.crop_size
—> 17 return x.transform(self.size, PIL.Image.EXTENT, self.get_corners(*x.size, *csize), resample=self.resample)
18
19 def get_corners(self, w, h): return (0,0,w,h)

in get_corners(self, w, h, wc, hc)
25 class RandomCrop(GeneralCrop):
26 def get_corners(self, w, h,wc, hc):
—> 27 left,top = randint(0,w-wc),randint(0,h-hc)
28 return (left, top, left+wc, top+hc)

~\Anaconda3\envs\fastai\lib\random.py in randint(self, a, b)
220 “”"
221
–> 222 return self.randrange(a, b+1)
223
224 def _randbelow(self, n, int=int, maxsize=1<<BPF, type=type,

~\Anaconda3\envs\fastai\lib\random.py in randrange(self, start, stop, step, _int)
198 return istart + self._randbelow(width)
199 if step == 1:
–> 200 raise ValueError(“empty range for randrange() (%d,%d, %d)” % (istart, istop, width))
201
202 # Non-unit step argument supplied.

ValueError: empty range for randrange() (0,-5, -5)

I will let you know if I find a solution.

I wondered about that - didn’t have time to think thru whether it’s possible.

1 Like

I think I saw something similar, Jeremy used a hard-coded size/position of 320, which then failed if the chosen image was less than 320 pixels, I just used a different image other than the 0’th and it worked, but you could check the dimensions and use smaller re-size.

I don’t have an intuition, but this pytorch LSUV implementation aborts after n attempts.

Perhaps it’s more likely to happen if one doesn’t use a good weights init in first place?

I found this which suggest this only happens with a custom collate_fn. I’ll see if I can narrow it down.

The model ends up learning that a middle aged person with a smile must be a fish.

Anyway the idea of a middle aged person certainly has changed quite a bit over the years.

2 Likes

A post was merged into an existing topic: Lesson 10 Discussion & Wiki (2019)

In the notebook 09c_add_progress_bar.ipynb, the line

learn.fit(2)
throws AttributeError: ‘NBMasterBar’ object has no attribute 'update’

Anyone know how to fix this? I'm running Windows 10 64-bit. Thanks!

details below:

Total time: 00:00

epoch train_loss train_accuracy valid_loss valid_accuracy time


AttributeError Traceback (most recent call last)
in
----> 1 learn.fit(2)

~\fastai\fastai_docs\dev_course\dl2_jcat\exp\nb_09b.py in fit(self, epochs)
60 self.do_begin_fit(epochs)
61 for epoch in range(epochs):
—> 62 self.do_begin_epoch(epoch)
63 if not self(‘begin_epoch’): self.all_batches()
64

~\fastai\fastai_docs\dev_course\dl2_jcat\exp\nb_09b.py in do_begin_epoch(self, epoch)
54 def do_begin_epoch(self, epoch):
55 self.epoch,self.dl = epoch,self.data.train_dl
—> 56 return self(‘begin_epoch’)
57
58 def fit(self, epochs):

~\fastai\fastai_docs\dev_course\dl2_jcat\exp\nb_09b.py in call(self, cb_name)
79 res = False
80 assert cb_name in self.ALL_CBS
—> 81 for cb in sorted(self.cbs, key=lambda x: x._order): res = cb(cb_name) and res
82 return res
83

~\fastai\fastai_docs\dev_course\dl2_jcat\exp\nb_05b.py in call(self, cb_name)
19 def call(self, cb_name):
20 f = getattr(self, cb_name, None)
—> 21 if f and f(): return True
22 return False
23

in begin_epoch(self)
9 def after_fit(self): self.mbar.on_iter_end()
10 def after_batch(self): self.pb.update(self.iter)
—> 11 def begin_epoch (self): self.set_pb()
12 def begin_validate(self): self.set_pb()
13

in set_pb(self)
14 def set_pb(self):
15 self.pb = progress_bar(self.dl, parent=self.mbar, auto_update=False)
—> 16 self.mbar.update(self.epoch)

AttributeError: ‘NBMasterBar’ object has no attribute ‘update’

See the top post re fastprogress.

D’oh! Thanks Jeremy!

I shall not smile again. It makes me look like a fish :frowning:

2 Likes
2 Likes

Relevant question bécause de do normalisé at inférence time. one could argue that data that hasn’t been normaliséd By méan and std of the training data is a wast of time