How can I load a pretrained model on Kaggle using fastai?


(Zarak) #1

When I try to run this code:

learn = ConvLearner.pretrained(f_model, md, metrics=[accuracy])

I get an error because the kernel on Kaggle cannot download the weights from the internet

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /tmp/.torch/models/resnet34-333f7ec4.pth
---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
/opt/conda/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error

/opt/conda/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

/opt/conda/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

/opt/conda/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

/opt/conda/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1025         del self._buffer[:]
-> 1026         self.send(msg)
   1027 

/opt/conda/lib/python3.6/http/client.py in send(self, data)
    963             if self.auto_open:
--> 964                 self.connect()
    965             else:

/opt/conda/lib/python3.6/http/client.py in connect(self)
   1391 
-> 1392             super().connect()
   1393 

/opt/conda/lib/python3.6/http/client.py in connect(self)
    935         self.sock = self._create_connection(
--> 936             (self.host,self.port), self.timeout, self.source_address)
    937         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/opt/conda/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    703     err = None
--> 704     for res in getaddrinfo(host, port, 0, SOCK_STREAM):
    705         af, socktype, proto, canonname, sa = res

/opt/conda/lib/python3.6/socket.py in getaddrinfo(host, port, family, type, proto, flags)
    744     addrlist = []
--> 745     for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    746         af, socktype, proto, canonname, sa = res

gaierror: [Errno -2] Name or service not known

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-68-b0bc3f5d394a> in <module>()
----> 1 learn = ConvLearner.pretrained(f_model, md, metrics=[accuracy])
      2 learn.opt_fn = optim.Adam

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/conv_learner.py in pretrained(cls, f, data, ps, xtra_fc, xtra_cut, custom_head, precompute, **kwargs)
     98     def pretrained(cls, f, data, ps=None, xtra_fc=None, xtra_cut=0, custom_head=None, precompute=False, **kwargs):
     99         models = ConvnetBuilder(f, data.c, data.is_multi, data.is_reg,
--> 100             ps=ps, xtra_fc=xtra_fc, xtra_cut=xtra_cut, custom_head=custom_head)
    101         return cls(data, models, precompute, **kwargs)
    102 

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/conv_learner.py in __init__(self, f, c, is_multi, is_reg, ps, xtra_fc, xtra_cut, custom_head)
     36         else: cut,self.lr_cut = 0,0
     37         cut-=xtra_cut
---> 38         layers = cut_model(f(True), cut)
     39         self.nf = model_features[f] if f in model_features else (num_features(layers)*2)
     40         if not custom_head: layers += [AdaptiveConcatPool2d(), Flatten()]

/opt/conda/lib/python3.6/site-packages/torchvision-0.2.0-py3.6.egg/torchvision/models/resnet.py in resnet34(pretrained, **kwargs)
    174     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
    175     if pretrained:
--> 176         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
    177     return model
    178 

/opt/conda/lib/python3.6/site-packages/torch/utils/model_zoo.py in load_url(url, model_dir, map_location, progress)
     63         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
     64         hash_prefix = HASH_REGEX.search(filename).group(1)
---> 65         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
     66     return torch.load(cached_file, map_location=map_location)
     67 

/opt/conda/lib/python3.6/site-packages/torch/utils/model_zoo.py in _download_url_to_file(url, dst, hash_prefix, progress)
     68 
     69 def _download_url_to_file(url, dst, hash_prefix, progress):
---> 70     u = urlopen(url)
     71     if requests_available:
     72         file_size = int(u.headers["Content-Length"])

/opt/conda/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    221     else:
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 
    225 def install_opener(opener):

/opt/conda/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    524             req = meth(req)
    525 
--> 526         response = self._open(req, data)
    527 
    528         # post-process response

/opt/conda/lib/python3.6/urllib/request.py in _open(self, req, data)
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:
    546             return result

/opt/conda/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:
    506                 return result

/opt/conda/lib/python3.6/urllib/request.py in https_open(self, req)
   1359         def https_open(self, req):
   1360             return self.do_open(http.client.HTTPSConnection, req,
-> 1361                 context=self._context, check_hostname=self._check_hostname)
   1362 
   1363         https_request = AbstractHTTPHandler.do_request_

/opt/conda/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error
-> 1320                 raise URLError(err)
   1321             r = h.getresponse()
   1322         except:

URLError: <urlopen error [Errno -2] Name or service not known>

The weights are available here, but I’m not sure how to load them without having fastai try to download the weights from the internet.

Any ideas?


(James Requa) #2

In order to access the Pytorch weights in Kaggle kernels you would need to access them first from a Kaggle dataset (like the one you linked to) but then I believe you would first need to copy the weight files to the location where Pytorch is looking for them. If Pytorch sees the weights are already there then it won’t try to download them. You can do all of this in the Kaggle kernel.

I found someone did something similar here in a Kaggle kernel for Keras weights so maybe it would be helpful as a reference.


(Zarak) #4

Hi James,

Thanks for the help!

Here’s what I did:

Create the models directory:

cache_dir = expanduser(join('~', '.torch'))
if not exists(cache_dir):
    makedirs(cache_dir)
models_dir = join(cache_dir, 'models')
if not exists(models_dir):
    makedirs(models_dir)

Copy the weights over:

!cp ../input/resnet34/resnet34.pth /tmp/.torch/models/resnet34-333f7ec4.pth

Now I get this error

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-73-b0bc3f5d394a> in <module>()
----> 1 learn = ConvLearner.pretrained(f_model, md, metrics=[accuracy])
      2 learn.opt_fn = optim.Adam

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/conv_learner.py in pretrained(cls, f, data, ps, xtra_fc, xtra_cut, custom_head, precompute, **kwargs)
     99         models = ConvnetBuilder(f, data.c, data.is_multi, data.is_reg,
    100             ps=ps, xtra_fc=xtra_fc, xtra_cut=xtra_cut, custom_head=custom_head)
--> 101         return cls(data, models, precompute, **kwargs)
    102 
    103     @property

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/conv_learner.py in __init__(self, data, models, precompute, **kwargs)
     85     def __init__(self, data, models, precompute=False, **kwargs):
     86         self.precompute = False
---> 87         super().__init__(data, models, **kwargs)
     88         if hasattr(data, 'is_multi'):
     89             self.crit = F.binary_cross_entropy if data.is_multi else F.nll_loss

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/learner.py in __init__(self, data, models, opt_fn, tmp_name, models_name, metrics, clip)
     22         self.tmp_path = os.path.join(self.data.path, tmp_name)
     23         self.models_path = os.path.join(self.data.path, models_name)
---> 24         os.makedirs(self.tmp_path, exist_ok=True)
     25         os.makedirs(self.models_path, exist_ok=True)
     26         self.crit,self.reg_fn = None,None

/opt/conda/lib/python3.6/os.py in makedirs(name, mode, exist_ok)
    218             return
    219     try:
--> 220         mkdir(name, mode)
    221     except OSError:
    222         # Cannot rely on checking for EEXIST, since the operating system

OSError: [Errno 30] Read-only file system: '../input/tmp'

So fastai tries to write to the data directory, which is read-only.

I set the path to the working directory

md.path = pathlib.Path('.')

and now it runs.


(Shivam) #5

@z0k I am getting the
OSError: [Errno 30] Read-only file system: '../input/tmp'
even after doing:
data.path = pathlib.Path('.')


(Jeremy Howard) #6

Sounds like you haven’t connected the Kaggle dataset.


(Shivam) #7

@jeremy I have connected both the planets as well as the resnet pretrained weights datasets and they are being shown in kaggle/input/. I also tested displaying a few images in the kernal and it is working. Here is the link to my kaggle kernal : https://www.kaggle.com/shivamsaboo17/planetsfastai/code


Here is the code:
import pathlib
data.path = pathlib.Path('.')
data = data.resize(int(sz * 1.3), '.') # Tells to ignore training images more than sz * 1.3 to save time
learn = ConvLearner.pretrained(model, data, metrics=metrics)
And the error logs for the above code:
OSError Traceback (most recent call last)
in ()
1 import pathlib
2 data.path = pathlib.Path(’.’)
----> 3 data = data.resize(int(sz * 1.3), β€˜.’) # Tells to ignore training images more than sz * 1.3 to save time
4 learn = ConvLearner.pretrained(model, data, metrics=metrics)

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/dataset.py in resize(self, targ_sz, new_path)
344 else: dls += [None,None]
345 t = tqdm_notebook(dls)
–> 346 for dl in t: new_ds.append(self.resized(dl, targ_sz, new_path))
347 t.close()
348 return self.class(new_ds[0].path, new_ds, self.bs, self.num_workers, self.classes)

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/dataset.py in resized(self, dl, targ, new_path)
336
337 def resized(self, dl, targ, new_path):
–> 338 return dl.dataset.resize_imgs(targ,new_path) if dl else None
339
340 def resize(self, targ_sz, new_path=β€˜tmp’):

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/dataset.py in resize_imgs(self, targ, new_path)
233
234 def resize_imgs(self, targ, new_path):
–> 235 dest = resize_imgs(self.fnames, targ, self.path, new_path)
236 return self.class(self.fnames, self.y, self.transform, dest)
237

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/dataset.py in resize_imgs(fnames, targ, path, new_path)
29 with ThreadPoolExecutor(8) as e:
30 ims = e.map(lambda x: resize_img(x, targ, path, new_path), fnames)
β€”> 31 for x in tqdm(ims, total=len(fnames), leave=False): pass
32 return os.path.join(path,new_path,str(targ))
33

/opt/conda/lib/python3.6/site-packages/tqdm/_tqdm.py in iter(self)
895 β€œβ€", fp_write=getattr(self.fp, β€˜write’, sys.stderr.write))
896
–> 897 for obj in iterable:
898 yield obj
899 # Update and possibly print the progressbar.

/opt/conda/lib/python3.6/concurrent/futures/_base.py in result_iterator()
584 # Careful not to keep a reference to the popped future
585 if timeout is None:
–> 586 yield fs.pop().result()
587 else:
588 yield fs.pop().result(end_time - time.time())

/opt/conda/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
423 raise CancelledError()
424 elif self._state == FINISHED:
–> 425 return self.__get_result()
426
427 self._condition.wait(timeout)

/opt/conda/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
–> 384 raise self._exception
385 else:
386 return self._result

/opt/conda/lib/python3.6/concurrent/futures/thread.py in run(self)
54
55 try:
β€”> 56 result = self.fn(*self.args, **self.kwargs)
57 except BaseException as exc:
58 self.future.set_exception(exc)

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/dataset.py in (x)
28 if not os.path.exists(os.path.join(path,new_path,str(targ),fnames[0])):
29 with ThreadPoolExecutor(8) as e:
β€”> 30 ims = e.map(lambda x: resize_img(x, targ, path, new_path), fnames)
31 for x in tqdm(ims, total=len(fnames), leave=False): pass
32 return os.path.join(path,new_path,str(targ))

/opt/conda/lib/python3.6/site-packages/fastai-0.6-py3.6.egg/fastai/dataset.py in resize_img(fname, targ, path, new_path)
22 ratio = targ/min(r,c)
23 sz = (scale_to(r, ratio, targ), scale_to(c, ratio, targ))
β€”> 24 os.makedirs(os.path.split(dest)[0], exist_ok=True)
25 im.resize(sz, Image.LINEAR).save(dest)
26

/opt/conda/lib/python3.6/os.py in makedirs(name, mode, exist_ok)
208 if head and tail and not path.exists(head):
209 try:
–> 210 makedirs(head, mode, exist_ok)
211 except FileExistsError:
212 # Defeats race condition when another thread created the path

/opt/conda/lib/python3.6/os.py in makedirs(name, mode, exist_ok)
218 return
219 try:
–> 220 mkdir(name, mode)
221 except OSError:
222 # Cannot rely on checking for EEXIST, since the operating system

OSError: [Errno 30] Read-only file system: β€˜β€¦/input/planet-understanding-the-amazon-from-space/./83’


(Jeremy Howard) #8

You can skip the resizing step. Not much point using that in a kaggle kernel.


(Shivam) #9

Thanks that worked for now :slight_smile:
Just wanted to know if there is any other way for using resizing functionality in kaggle kernal?


(Zarak) #10

Hi @shivamsaboo17,

I don’t know of a workaround other than what I posted above.

Maybe it would be a good idea to allow the user to specify the path for a temp directory as a global config option?