Hi,
I have setup fastai, and am executing lesson1 notebook. I was able to download the tar files and unzip them. I have also replaced the “pat” regex to suit windows format, however, the notebook keeps on executing (without aborting / getting killed) when i execute the “create_cnn” step.
learn = create_cnn(data, models.resnet34, metrics=error_rate)
However if I replace the “learn” with the below code, I am able to proceed.
def get_model(pretrained=True, model_name = ‘resnet34’, **kwargs ):
arch = models.resnet34(pretrained, **kwargs )
return arch
learn = Learner(data, get_model(), metrics=[accuracy])
I’ve got Windows 10, GTX 1080, latest NVIDIA drivers (GeForce), updated latest available version of pytorch and fastai library. Can someone suggest how to debug and resolve this issue?
Update:
I extracted the code for “create_cnn” and added a pdb and this is where its stuck now
from fastai.vision import *
from fastai.metrics import error_rate
import torch
torch.cuda.set_device(0)
bs = 64
path = untar_data(URLs.PETS); path
WindowsPath(‘C:/Users/Imran/.fastai/data/oxford-iiit-pet’)
path_anno = path/‘annotations’
path_img = path/‘images’
fnames = get_image_files(path_img)
np.random.seed(2)
pat = re.compile(r’\([^\]+)_\d+.jpg$')
data = ImageDataBunch.from_name_re(path_img, fnames, pat, ds_tfms=get_transforms(), size=224, bs=bs ).normalize(imagenet_stats)
import pdb;
def _default_split(m:nn.Module): return (m[1],)
def _resnet_split(m:nn.Module): return (m[0][6],m[1])
…
_default_meta = {‘cut’:-1, ‘split’:_default_split}
_resnet_meta = {‘cut’:-2, ‘split’:_resnet_split }
model_meta = {
… models.resnet18 :{_resnet_meta}, models.resnet34: {_resnet_meta},
… models.resnet50 :{_resnet_meta}, models.resnet101:{_resnet_meta}}
def cnn_config(arch):
… “Get the metadata associated with arch
.”
… torch.backends.cudnn.benchmark = True
… return model_meta.get(arch, default_meta)
…
class Hook():
… “Create a hook on m
with hook_func
.”
… def init(self, m:nn.Module, hook_func:HookFunc, is_forward:bool=True, detach:bool=True):
… self.hook_func,self.detach,self.stored = hook_func,detach,None
… f = m.register_forward_hook if is_forward else m.register_backward_hook
… self.hook = f(self.hook_fn)
… self.removed = False
… def hook_fn(self, module:nn.Module, input:Tensors, output:Tensors):
… “Applies hook_func
to module
, input
, output
.”
… if self.detach:
… input = (o.detach() for o in input ) if is_listy(input ) else input.detach()
… output = (o.detach() for o in output) if is_listy(output) else output.detach()
… self.stored = self.hook_func(module, input, output)
… def remove(self):
… “Remove the hook from the model.”
… if not self.removed:
… self.hook.remove()
… self.removed=True
… def enter(self, *args): return self
… def exit(self, *args): self.remove()
…
class Hooks():
… “Create several hooks on the modules in ms
with hook_func
.”
… def init(self, ms:Collection[nn.Module], hook_func:HookFunc, is_forward:bool=True, detach:bool=True):
… self.hooks = [Hook(m, hook_func, is_forward, detach) for m in ms]
… def getitem(self,i:int)->Hook: return self.hooks[i]
… def len(self)->int: return len(self.hooks)
… def iter(self): return iter(self.hooks)
… @property
… def stored(self): return [o.stored for o in self]
… def remove(self):
… “Remove the hooks from the model.”
… for h in self.hooks: h.remove()
… def enter(self, *args): return self
… def exit (self, *args): self.remove()
…
def dummy_batch(m: nn.Module, size:tuple=(64,64))->Tensor:
… “Create a dummy batch to go through m
with size
.”
… ch_in = in_channels(m)
… pdb.set_trace()
… return one_param(m).new(1, ch_in, *size).requires_grad(False).uniform_(-1.,1.)
…
def dummy_eval(m:nn.Module, size:tuple=(64,64)):
… “Pass a dummy_batch
in evaluation mode in m
with size
.”
… return m.eval()(dummy_batch(m, size))
…
def _hook_inner(m,i,o): return o if isinstance(o,Tensor) else o if is_listy(o) else list(o)
…
def hook_outputs(modules:Collection[nn.Module], detach:bool=True, grad:bool=False)->Hooks:
… “Return Hooks
that store activations of all modules
in self.stored
”
… return Hooks(modules, _hook_inner, detach=detach, is_forward=not grad)
…
def model_sizes(m:nn.Module, size:tuple=(64,64))->Tuple[Sizes,Tensor,Hooks]:
… “Pass a dummy input through the model m
to get the various sizes of activations.”
… with hook_outputs(m) as hooks:
… x = dummy_eval(m, size)
… return [o.stored.shape for o in hooks]
…
def num_features_model(m:nn.Module)->int:
… “Return the number of output features for model
.”
… sz = 64
… while True:
… try: return model_sizes(m, size=(sz,sz))[-1][1]
… except Exception as e:
… sz *= 2
… if sz > 2048: raise
…
def create_cnn1(data:DataBunch, arch:Callable, cut:Union[int,Callable]=None, pretrained:bool=True,
… lin_ftrs:Optional[Collection[int]]=None, ps:Floats=0.5,
… custom_head:Optional[nn.Module]=None, split_on:Optional[SplitFuncOrIdxList]=None,
… bn_final:bool=False, **learn_kwargs:Any)->Learner:
… meta = cnn_config(arch)
… body = create_body(arch, pretrained, cut)
… nf = num_features_model(body) * 2
… head = custom_head or create_head(nf, data.c, lin_ftrs, ps=ps, bn_final=bn_final)
… model = nn.Sequential(body, head)
… learn = Learner(data, model, **learn_kwargs)
… learn.split(ifnone(split_on,meta[‘split’]))
… if pretrained: learn.freeze()
… apply_init(model[1], nn.init.kaiming_normal_)
… return learn
…
learn = create_cnn1(data, models.resnet34, metrics=error_rate)
(5)dummy_batch()
(Pdb) n
–Return–
(5)dummy_batch()->tensor([[[[ 3…5320e-01]]]])
(Pdb) n
–Call–
c:\users\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py(483)call()
→ def call(self, *input, **kwargs):
(Pdb) n
c:\users\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py(484)call()
→ for hook in self._forward_pre_hooks.values():
(Pdb) n
c:\users\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py(486)call()
→ if torch._C._get_tracing_state():
(Pdb) n
c:\users\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py(489)call()
→ result = self.forward(*input, **kwargs)
(Pdb) n
Execution does not proceed after this.
output of the install script
=== Software ===
python : 3.6.6
fastai : 1.0.42
fastprogress : 0.1.18
torch : 1.0.1
torch cuda : 9.0 / is available
torch cudnn : 7005 / is enabled
=== Hardware ===
torch devices : 1
- gpu0 : GeForce GTX 1080
=== Environment ===
platform : Windows-10-10.0.17134-SP0
conda env : fastai
python : C:\Users\Imran\Anaconda3\envs\fastai\python.exe
sys.path :
C:\Users\Imran\Anaconda3\envs\fastai\python36.zip
C:\Users\Imran\Anaconda3\envs\fastai\DLLs
C:\Users\Imran\Anaconda3\envs\fastai\lib
C:\Users\Imran\Anaconda3\envs\fastai
C:\Users\Imran\Anaconda3\envs\fastai\lib\site-packages
no nvidia-smi is found