Is there any way of preventing train from computing the losses??
This could be useful with models that return lossses at training and not at inference.
Is there any way of preventing train from computing the losses??
This could be useful with models that return lossses at training and not at inference.
Not sure what you mean here. At inference (e.g. predict
) there will be no ālossā since there is no target?
Yijin
I have a model that returns the losses computed so, I donāt want one_batch to execute
self.loss = self.loss_func(self.pred, *self.yb); self('after_loss')
Because the model in training model return losses and in validation mode returns predictions.
I am trying to redefine the learner as follows:
class Mask_RCNN_Learner(Learner):
def __init__(self, dls, model, loss_func=None, opt_func=Adam, lr=defaults.lr, splitter=trainable_params, cbs=None,
metrics=None, path=None, model_dir='models', wd=None, wd_bn_bias=False, train_bn=True,
moms=(0.95,0.85,0.95)):
super().__init__(dls, model, loss_func, opt_func, lr, splitter, cbs,
metrics, path, model_dir, wd, wd_bn_bias, train_bn,
moms)
def _split(self, b):
i = getattr(self.dls, 'n_inp', 1 if len(b)==1 else len(b)-1)
self.xb,self.yb = b[:i],b[i:]
def _do_epoch_train(self):
try:
self.dl = self.dls.train;
# Modification
self.n_iter = len(self.dl)
for o in enumerate(self.dl):
i, b = *o
self.iter = i
try:
self._split(b)
loss_dict = self.model(*self.xb,*self.yb)
if len(self.yb) == 0: return
self.loss = sum(loss for loss in loss_dict.values())
if not self.training: return
self.loss.backward()
self.opt.step()
self.opt.zero_grad()
except CancelBatchException as e:
raise e
except CancelTrainException as e:
raise e
def _do_epoch_validate(self, ds_idx=1, dl=None):
if dl is None: dl = self.dls[ds_idx]
try:
self.dl = dl;
with torch.no_grad():
# Modification
self.n_iter = len(self.dl)
for o in enumerate(self.dl):
i, b = *o
self.iter = i
try:
self._split(b)
detection = self.model(*self.xb);
self.loss = self.loss_func(detection, *self.yb)
# COMPUTING METRICS
if not self.training: return
except CancelBatchException as e:
raise e
except CancelValidException as e:
raise e
@log_args(but='cbs')
def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
with self.added_cbs(cbs):
if reset_opt or not self.opt: self.create_opt()
if wd is None: wd = self.wd
if wd is not None: self.opt.set_hypers(wd=wd)
self.opt.set_hypers(lr=self.lr if lr is None else lr)
try:
self._do_begin_fit(n_epoch)
for epoch in range(n_epoch):
try:
self.epoch=epoch
self._do_epoch_train()
self._do_epoch_validate()
except CancelEpochException as e:
raise e
except CancelFitException as e:
raise e
However, I am getting:
File "<ipython-input-39-0a0657f193c1>", line 23
self._split(b)
^
SyntaxError: can't use starred expression here
The error message points to your use of *
, in the line i, b = *o
. This SO page explains it, I think? You should try changing that line to i, b = o
, or delete that line and just change the line above to for i, b in enumerate(self.dl):
Not sure how all these relate to your question about not computing losses ā I did not read through your code, and donāt know whatās happening in itā¦!
Good luck.
Yijin
I have solved it, however donāt know pretty well how.
class Mask_RCNN_Learner(Learner):
def __init__(self, dls, model, loss_func=None, opt_func=Adam, lr=defaults.lr, splitter=trainable_params, cbs=None,
metrics=None, path=None, model_dir='models', wd=None, wd_bn_bias=False, train_bn=True,
moms=(0.95,0.85,0.95)):
super().__init__(dls, model, loss_func, opt_func, lr, splitter, cbs,
metrics, path, model_dir, wd, wd_bn_bias, train_bn,
moms)
def all_batches(self):
self.n_iter = len(self.dl)
for o in enumerate(self.dl): self.one_batch(*o)
def one_batch(self, i, b):
self.iter = i
try:
self._split(b); self('begin_batch')
loss_dict = self.model(*self.xb,self.yb); self('after_pred')
if len(self.yb) == 0: return
loss = sum(loss for loss in loss_dict.values())
self.loss = loss; self('after_loss')
if not self.training: return
self.loss.backward(); self('after_backward')
self.opt.step(); self('after_step')
self.opt.zero_grad()
except CancelBatchException: self('after_cancel_batch')
finally: self('after_batch')
def _do_begin_fit(self, n_epoch):
self.n_epoch,self.loss = n_epoch,tensor(0.); self('begin_fit')
def _do_epoch_train(self):
try:
self.dl = self.dls.train; self('begin_train')
self.all_batches()
except CancelTrainException: self('after_cancel_train')
finally: self('after_train')
def _do_epoch_validate(self, ds_idx=1, dl=None):
if dl is None: dl = self.dls[ds_idx]
try:
self.dl = dl; self('begin_validate')
with torch.no_grad(): self.all_batches()
except CancelValidException: self('after_cancel_validate')
finally: self('after_validate')
@log_args(but='cbs')
def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
with self.added_cbs(cbs):
if reset_opt or not self.opt: self.create_opt()
if wd is None: wd = self.wd
if wd is not None: self.opt.set_hypers(wd=wd)
self.opt.set_hypers(lr=self.lr if lr is None else lr)
try:
self._do_begin_fit(n_epoch)
for epoch in range(n_epoch):
try:
self.epoch=epoch; self('begin_epoch')
self._do_epoch_train()
self._do_epoch_validate()
except CancelEpochException: self('after_cancel_epoch')
finally: self('after_epoch')
except CancelFitException: self('after_cancel_fit')
finally: self('after_fit')
If you look, I am just adjusting these lines of code:
loss_dict = self.model(*self.xb,self.yb); self('after_pred')
if len(self.yb) == 0: return
loss = sum(loss for loss in loss_dict.values())
self.loss = loss; self('after_loss')
This is done because i am working with torchvision.models.detection.maskrcnn-resnet50_fpn
. This model expects as input and image an a dict with the target.
The thing is that in evaluation it return a dict with a mask, boxes and labels.
I would like the accuracy metrics to be calculated just in the mask.
Thatās why I was asking where to modify the data passed into metrics. The output of this model is not a usual one
Does image should look visually similar before and after normalization?
Iāve created dataloaders with no augmentations, so the images came out of dls.one_batch()
have been just transformed to float tensor.
xb,_ = dls.one_batch()
norm = Normalize.from_stats(*imagenet_stats)
xb_n = norm(xb)
show_image(xb_n[0])
Then I applied the Normalize
transform and viewed the image, it looks totally distorted ā some portion of image have been masked and while the visible portion seems like gone through major brightness/contrast change. Iāve also calculated my own statistics of dataset and tried to Normalize using that, but images look equally distorted.
I have seen in the code many annotations like @patch
and @typedispatch
.
What are they doing this annotations??
@patch
allows you to write extension functions to predefined types. For instance, thereās a patched implementation of Principal Component Analysis for torch.Tensor
. Thus, you can simply call it as a member function of tensor. Imagine x
is a 2D tensor storing some embeddings, you can call x.pca(2)
to get 2 principal components of that Tensor.
@typedispatch
enables dynamic dispatch of functions. We can overload a function with a behavior specific to a type and itāll act on those types only. A good example of this would be IntToFloatTensor
transform:
class IntToFloatTensor(Transform):
"Transform image to float tensor, optionally dividing by 255 (e.g. for images)."
order = 10 #Need to run after PIL transforms on the GPU
def __init__(self, div=255., div_mask=1): store_attr(self, 'div,div_mask')
def encodes(self, o:TensorImage): return o.float().div_(self.div)
def encodes(self, o:TensorMask ): return o.long() // self.div_mask
def decodes(self, o:TensorImage): return ((o.clamp(0., 1.) * self.div).long()) if self.div else o
The encodes method have 2 different implementations for TensorImage
and TensorMask
, thus IntToFloatTensor
will behave differently for TensorImage
and TensorMask
.
I believe all these annotations are meta-programming aspects of fastai, which are independent of any Deep Learning framework. The library is called fastcore
and you can learn more about it here.
Looks interesting.
Could it be used for overriding predefined methods with same parameters?
I guess youāre looking for a way to combine the both. Iām not sure if I understood correctly, could you tell us more about your use-case?
I would like to use @patch for overriding fastai2.learner.Recorder.after_batch
method easily.
If I create a new Recorder class I need to modify the line where learner extracts Callbacks array. So, subclassing Learner is not possibble!
If I create a subclass of fastai2.data.load.DataLoader(GetAttr) for changing this line of code and add a collate that doesnāt stack items:
def create_batch(self, b): return (fa_collate,fa_convert)[self.prebatched](b)
Is it possible to link DataBlock.dataloaders
to this new subclass? Or do I need to create new DataBlock subclass linked with Dataset subclass and with DataLoaders subclass?
However, I see something strange DataLoader is defined in data.core and data.load
I hope this is the right place for a short question:
I need to display data in a form: x,y = ((tensor, tensor), (tensor))
. I generate it as shown in the previous expression, the whole item at a time. I @typedispatch
show_batch() and I get the following error: AssertionError: Match length mismatch
(full stacktrace at the end of the post).
I declared my own tuple (that does nothing):
class HeatingTuple(Tuple):
pass
and tried to write the show_batch()
(and show_results()
for that matter):
def show_results(x, y, samples:HeatingTuple, . . .
def show_results(x:HeatingTuple, y, samples, . . .
to no avail. Calling dls.show_batch()
gives me the error. Manually running the show_batch works. (after I remove the annotation)
DataBlock
is using an ItemTransform.
DataBlock(item_tfms=HeatingItemiser(future_len))
HeatingItemiser.encodes()
returns a tuple and decodes()
a HeatingTuple()
This is the error from dls.show_batch()
:
AssertionError Traceback (most recent call last)
<ipython-input-12-785270320dfd> in <module>
56 seq_dloaders = seq_block.dataloaders(samples, bs=8)
57 # onebatch = seq_dloaders.one_batch()
---> 58 seq_dloaders.show_batch()
59 # show_batch(None, None, samples=onebatch)
60 # print(f"Data shapes: x[0] {onebatch[0][0].shape}, x[1] {onebatch[0][1].shape} y{onebatch[1].shape}")
~/work/installs/fastai2/fastai2/data/core.py in show_batch(self, b, max_n, ctxs, show, unique, **kwargs)
97 if b is None: b = self.one_batch()
98 if not show: return self._pre_show_batch(b, max_n=max_n)
---> 99 show_batch(*self._pre_show_batch(b, max_n=max_n), ctxs=ctxs, max_n=max_n, **kwargs)
100 if unique: self.get_idxs = old_get_idxs
101
~/work/installs/fastai2/fastai2/data/core.py in _pre_show_batch(self, b, max_n)
87 b = self.decode(b)
88 if hasattr(b, 'show'): return b,None,None
---> 89 its = self._decode_batch(b, max_n, full=False)
90 if not is_listy(b): b,its = [b],L((o,) for o in its)
91 return detuplify(b[:self.n_inp]),detuplify(b[self.n_inp:]),its
~/work/installs/fastai2/fastai2/data/core.py in _decode_batch(self, b, max_n, full)
81 f = self.after_item.decode
82 f = compose(f, partial(getattr(self.dataset,'decode',noop), full = full))
---> 83 return L(batch_to_samples(b, max_n=max_n)).map(f)
84
85 def _pre_show_batch(self, b, max_n=9):
~/work/installs/fastcore/fastcore/foundation.py in map(self, f, *args, **kwargs)
373 else f.format if isinstance(f,str)
374 else f.__getitem__)
--> 375 return self._new(map(g, self))
376
377 def filter(self, f, negate=False, **kwargs):
~/work/installs/fastcore/fastcore/foundation.py in _new(self, items, *args, **kwargs)
324 @property
325 def _xtra(self): return None
--> 326 def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
327 def __getitem__(self, idx): return self._get(idx) if is_indexer(idx) else L(self._get(idx), use_list=None)
328 def copy(self): return self._new(self.items.copy())
~/work/installs/fastcore/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
39 return x
40
---> 41 res = super().__call__(*((x,) + args), **kwargs)
42 res._newchk = 0
43 return res
~/work/installs/fastcore/fastcore/foundation.py in __init__(self, items, use_list, match, *rest)
315 if items is None: items = []
316 if (use_list is not None) or not _is_array(items):
--> 317 items = list(items) if use_list else _listify(items)
318 if match is not None:
319 if is_coll(match): match = len(match)
~/work/installs/fastcore/fastcore/foundation.py in _listify(o)
251 if isinstance(o, list): return o
252 if isinstance(o, str) or _is_array(o): return [o]
--> 253 if is_iter(o): return list(o)
254 return [o]
255
~/work/installs/fastcore/fastcore/foundation.py in __call__(self, *args, **kwargs)
217 if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
218 fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 219 return self.fn(*fargs, **kwargs)
220
221 # Cell
~/work/installs/fastcore/fastcore/utils.py in _inner(x, *args, **kwargs)
346 if order is not None: funcs = funcs.sorted(order)
347 def _inner(x, *args, **kwargs):
--> 348 for f in L(funcs): x = f(x, *args, **kwargs)
349 return x
350 return _inner
~/work/installs/fastai2/fastai2/data/core.py in decode(self, o, full)
294 def __iter__(self): return (self[i] for i in range(len(self)))
295 def __repr__(self): return coll_repr(self)
--> 296 def decode(self, o, full=True): return tuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))
297 def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), n_inp=self.n_inp)
298 def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
~/work/installs/fastcore/fastcore/utils.py in tuplify(o, use_list, match)
132 def tuplify(o, use_list=False, match=None):
133 "Make `o` a tuple"
--> 134 return tuple(L(o, use_list=use_list, match=match))
135
136 # Cell
~/work/installs/fastcore/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
39 return x
40
---> 41 res = super().__call__(*((x,) + args), **kwargs)
42 res._newchk = 0
43 return res
~/work/installs/fastcore/fastcore/foundation.py in __init__(self, items, use_list, match, *rest)
319 if is_coll(match): match = len(match)
320 if len(items)==1: items = items*match
--> 321 else: assert len(items)==match, 'Match length mismatch'
322 super().__init__(items)
323
AssertionError: Match length mismatch
Thank you!
it seems that a fresh install doesnt necesarily install correct cuda version
I installed as http://dev.fast.ai/#Installing
but later I see that torch.cuda.is_available()
returned false, but I have cudaā¦ so I went to the page of pytorch and grab their command line
conda install pytorch torchvision cudatoolkit=10.1 -c pytorch
Collecting package metadata (current_repodata.json): done
Solving environment: done
## Package Plan ##
environment location: /home/tyoc213/miniconda3/envs/fastai2
added / updated specs:
- cudatoolkit=10.1
- pytorch
- torchvision
The following packages will be downloaded:
package | build
---------------------------|-----------------
cudatoolkit-10.1.243 | h6bb024c_0 347.4 MB
pytorch-1.5.0 |py3.7_cuda10.1.243_cudnn7.6.3_0 399.5 MB pytorch
torchvision-0.6.0 | py37_cu101 11.8 MB pytorch
------------------------------------------------------------
Total: 758.7 MB
The following packages will be DOWNGRADED:
cudatoolkit 10.2.89-hfd86e86_1 --> 10.1.243-h6bb024c_0
pytorch 1.5.0-py3.7_cuda10.2.89_cudnn7.6.5_0 --> 1.5.0-py3.7_cuda10.1.243_cudnn7.6.3_0
torchvision 0.6.0-py37_cu102 --> 0.6.0-py37_cu101
Proceed ([y]/n)? y
Downloading and Extracting Packages
pytorch-1.5.0 | 399.5 MB | ########################################################### | 100%
cudatoolkit-10.1.243 | 347.4 MB | ########################################################### | 100%
torchvision-0.6.0 | 11.8 MB | ########################################################### | 100%
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
Like you see it installed incorrect cuda version, here it is 10.1 not 102, now torch.cuda.is_available()
is true
.
I dont know if this was just a failure or why it installed those other libs. Or is there a way that it dectects the correct ones on install? if not, ppl will think some things are āslowā because using CPU.
Hi everyone,
In some problems, for efficient storing, we can use lmdb to store our dataset with e.g. millions of images in just 1 or 2 files, like in Pytorch LSUN dataset or in most Scene text recognition problems.
Iām facing an issue when using a lmdb dataset with fastai2. Everything works fine except the export of the trained Learner
, like learner.export()
. The problem is when we use lmdb datasets, it opens an environment and then return indexed image, labels,ā¦ from that as arrays of bytes (see this example in Pytorch LSUN dataset). When a Learner
uses DataLoaders
created from a lmdb dataset, it canāt be exported, and gives typeError: can't pickle Environment objects
, which refers to the environment that was opened.
Is there anything I can do to make the .export()
work? Thanks.
Hi,
I am continuing to explore Siamese network in fastai2. I am trying to āhookā in GradCAM for a Siamese model, at the āencoderā layer, but I am not sure how to extract the activations output and gradient for both āpassesā of the two images that form the Siamese image pairā¦?
From the notebook, as per definition the Siamese modelās forward pass calls the same āencoderā twice, on the two images, and concatenate their output together before calling āheadā.
I have defined my hooks:
class Hook():
def __init__(self, m):
self.hook = m.register_forward_hook(self.hook_func)
def hook_func(self, m, i, o): self.stored = o.detach().clone()
def __enter__(self, *args): return self
def __exit__(self, *args): self.hook.remove()
class HookBwd():
def __init__(self, m):
self.hook = m.register_backward_hook(self.hook_func)
def hook_func(self, m, gi, go): self.stored = go[0].detach().clone()
def __enter__(self, *args): return self
def __exit__(self, *args): self.hook.remove()
And I created a test image-pair, and applied the transforms using the defined dataloaders:
img1 = PILImage.create(Path('/path/to/image1.jpg'))
img2 = PILImage.create(Path('/path/to/image2.jpg'))
siamtest = SiameseImage(img1, img2)
tdl = learn.dls.test_dl([siamtest])
x = tdl.one_batch()
And then I called the hooks and eval
the model:
cls = 1
with HookBwd(learn.model.encoder) as hookg:
with Hook(learn.model.encoder) as hook:
output = learn.model.eval()(x[0],x[1])
act = hook.stored[0].cpu()
output[0,cls].backward()
grad = hookg.stored[0].cpu()
But this will only get me 1x act
for the activations and 1x grad
for the gradients, presumably from the second (i.e. final) call of encoder
on the second image x[1]
. I am not sure how to make it output two different sets of act
and grad
, for the encoder pass of x[0]
and x[1]
respectively.
Thoughts and comments welcome. Thank you.
Regards,
Yijin
Is it better to early stopping at loss or at precission metric?
What do you think of next approach:
Compare the valid loss between 2 models at validation and selecting the best hyperparameters based upon this.
With this hyperparameters train a model that early stop when accuracy is decreasing.
How to create dataloader for multiple outputs?
@Transform
def get_x(x): return f"{path}\\train_images\\{x[4]}"
@Transform
def get_arrays(x):
return [rle_decode(x[0],(xs,ys)),
rle_decode(x[1],(xs,ys)),
rle_decode(x[2],(xs,ys)),
rle_decode(x[3],(xs,ys))]
@Transform
def array_to_mask(x):
return (PILMask.create(x[0]),PILMask.create(x[1]),PILMask.create(x[2]),PILMask.create(x[3]))
batch_tfms=[*aug_transforms(size=(xs//7,ys//7)), Normalize.from_stats(*imagenet_stats)]
dsets=Datasets(train_df_t,tfms=[[get_x,PILImage.create],[get_arrays,array_to_mask]],splits=splits)``
dsets.valid[0]
gives me
(PILImage mode=RGB size=2100x1400,
(PILMask mode=I size=2100x1400,
PILMask mode=I size=2100x1400,
PILMask mode=I size=2100x1400,
PILMask mode=I size=2100x1400))
but
dls = dsets.dataloaders(bs=12,after_item=[ToTensor],before_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)])
b=dls.one_batch()
len(b),len(b[0]),len(b[1])
gives me
(2, 12, 4)
I expect (2,12,12) but y has only one sample instead of a batch of sample.
Are those 4 masks you getting in b[1]
of only one sample ?
Considering those are PILMask
s, donāt you want to stack them up in a single Tensor? like a Tensor of shape (4,2100,1400) ?
Thereās one parameter n_inp
for Datasets
, have you tried setting that to 1 explicitly?
Do setups
in Transform
is called only once in case of DataLoaders? Normalize
uses setups
to calculate mean
and std
if theyāre None. Does this mean we only calculate mean
and std
of single batch and use that for the entire dataset?