Command untar_data does not extract dataset

Hello all,

I uploaded a large dataset as a .tar.xz file (I’m using linux) to paperspace storage
ran the command curl file:///filepath.tar.xz in paperspace terminal

Next I opened a jupyter notebook and ran this code
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai.vision import *
from fastai.metrics import error_rate

bs = 64
# bs = 16 # uncomment this line if you run out of memory even after clicking Kernel->R

filepath = 'http://filepath.tar.xz.tgz' 

Now running path = untar_data(filepath); path
yields the following output
Downloading http://filepath.tar.xz.tgz

---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connection.py in _new_conn(self)
    158             conn = connection.create_connection(
--> 159                 (self._dns_host, self.port), self.timeout, **extra_kw)
    160 

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
     56 
---> 57     for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     58         af, socktype, proto, canonname, sa = res

/opt/conda/envs/fastai/lib/python3.6/socket.py in getaddrinfo(host, port, family, type, proto, flags)
    744     addrlist = []
--> 745     for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    746         af, socktype, proto, canonname, sa = res

gaierror: [Errno -2] Name or service not known

During handling of the above exception, another exception occurred:

NewConnectionError                        Traceback (most recent call last)
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    353         else:
--> 354             conn.request(method, url, **httplib_request_kw)
    355 

/opt/conda/envs/fastai/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

/opt/conda/envs/fastai/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

/opt/conda/envs/fastai/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

/opt/conda/envs/fastai/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1025         del self._buffer[:]
-> 1026         self.send(msg)
   1027 

/opt/conda/envs/fastai/lib/python3.6/http/client.py in send(self, data)
    963             if self.auto_open:
--> 964                 self.connect()
    965             else:

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connection.py in connect(self)
    180     def connect(self):
--> 181         conn = self._new_conn()
    182         self._prepare_conn(conn)

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connection.py in _new_conn(self)
    167             raise NewConnectionError(
--> 168                 self, "Failed to establish a new connection: %s" % e)
    169 

NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7fd8a6c3cd30>: Failed to establish a new connection: [Errno -2] Name or service not known

During handling of the above exception, another exception occurred:

MaxRetryError                             Traceback (most recent call last)
/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    448                     retries=self.max_retries,
--> 449                     timeout=timeout
    450                 )

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    666                                 release_conn=release_conn, body_pos=body_pos,
--> 667                                 **response_kw)
    668 

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    666                                 release_conn=release_conn, body_pos=body_pos,
--> 667                                 **response_kw)
    668 

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    666                                 release_conn=release_conn, body_pos=body_pos,
--> 667                                 **response_kw)
    668 

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    666                                 release_conn=release_conn, body_pos=body_pos,
--> 667                                 **response_kw)
    668 

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    666                                 release_conn=release_conn, body_pos=body_pos,
--> 667                                 **response_kw)
    668 

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    637             retries = retries.increment(method, url, error=e, _pool=self,
--> 638                                         _stacktrace=sys.exc_info()[2])
    639             retries.sleep()

/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    397         if new_retry.is_exhausted():
--> 398             raise MaxRetryError(_pool, url, error or ResponseError(cause))
    399 

MaxRetryError: HTTPConnectionPool(host='~', port=80): Max retries exceeded with url: /filepath.tar.xz.tgz.tgz (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd8a6c3cd30>: Failed to establish a new connection: [Errno -2] Name or service not known',))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
<ipython-input-35-40b7ff7a4d8d> in <module>
----> 1 path = untar_data(filepath); path

/opt/conda/envs/fastai/lib/python3.6/site-packages/fastai/datasets.py in untar_data(url, fname, dest, data, force_download)
    154         shutil.rmtree(dest)
    155     if not dest.exists():
--> 156         fname = download_data(url, fname=fname, data=data)
    157         data_dir = Config().data_path()
    158         assert _check_file(fname) == _checks[url], f"Downloaded file {fname} does not match checksum expected! Remove that file from {data_dir} and try your code again."

/opt/conda/envs/fastai/lib/python3.6/site-packages/fastai/datasets.py in download_data(url, fname, data)
    136     if not fname.exists():
    137         print(f'Downloading {url}')
--> 138         download_url(f'{url}.tgz', fname)
    139     return fname
    140 

/opt/conda/envs/fastai/lib/python3.6/site-packages/fastai/core.py in download_url(url, dest, overwrite, pbar, show_progress, chunk_size, timeout, retries)
    164     s = requests.Session()
    165     s.mount('http://',requests.adapters.HTTPAdapter(max_retries=retries))
--> 166     u = s.get(url, stream=True, timeout=timeout)
    167     try: file_size = int(u.headers["Content-Length"])
    168     except: show_progress = False

/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/sessions.py in get(self, url, **kwargs)
    544 
    545         kwargs.setdefault('allow_redirects', True)
--> 546         return self.request('GET', url, **kwargs)
    547 
    548     def options(self, url, **kwargs):

/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    531         }
    532         send_kwargs.update(settings)
--> 533         resp = self.send(prep, **send_kwargs)
    534 
    535         return resp

/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
    644 
    645         # Send the request
--> 646         r = adapter.send(request, **kwargs)
    647 
    648         # Total elapsed time of the request (approximately)

/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    514                 raise SSLError(e, request=request)
    515 
--> 516             raise ConnectionError(e, request=request)
    517 
    518         except ClosedPoolError as e:

ConnectionError: HTTPConnectionPool(host='~', port=80): Max retries exceeded with url: /filepath.tar.xz.tgz.tgz (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd8a6c3cd30>: Failed to establish a new connection: [Errno -2] Name or service not known',))

it looks like the dataset is recognized but I cannot use it for some reason

1 Like