Hello all,
I uploaded a large dataset as a .tar.xz
file (I’m using linux) to paperspace storage
ran the command curl file:///filepath.tar.xz
in paperspace terminal
Next I opened a jupyter notebook and ran this code
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.vision import *
from fastai.metrics import error_rate
bs = 64
# bs = 16 # uncomment this line if you run out of memory even after clicking Kernel->R
filepath = 'http://filepath.tar.xz.tgz'
Now running path = untar_data(filepath); path
yields the following output
Downloading http://filepath.tar.xz.tgz
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connection.py in _new_conn(self)
158 conn = connection.create_connection(
--> 159 (self._dns_host, self.port), self.timeout, **extra_kw)
160
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
56
---> 57 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
58 af, socktype, proto, canonname, sa = res
/opt/conda/envs/fastai/lib/python3.6/socket.py in getaddrinfo(host, port, family, type, proto, flags)
744 addrlist = []
--> 745 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
746 af, socktype, proto, canonname, sa = res
gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
353 else:
--> 354 conn.request(method, url, **httplib_request_kw)
355
/opt/conda/envs/fastai/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
1238 """Send a complete request to the server."""
-> 1239 self._send_request(method, url, body, headers, encode_chunked)
1240
/opt/conda/envs/fastai/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1284 body = _encode(body, 'body')
-> 1285 self.endheaders(body, encode_chunked=encode_chunked)
1286
/opt/conda/envs/fastai/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
1233 raise CannotSendHeader()
-> 1234 self._send_output(message_body, encode_chunked=encode_chunked)
1235
/opt/conda/envs/fastai/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
/opt/conda/envs/fastai/lib/python3.6/http/client.py in send(self, data)
963 if self.auto_open:
--> 964 self.connect()
965 else:
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connection.py in connect(self)
180 def connect(self):
--> 181 conn = self._new_conn()
182 self._prepare_conn(conn)
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connection.py in _new_conn(self)
167 raise NewConnectionError(
--> 168 self, "Failed to establish a new connection: %s" % e)
169
NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7fd8a6c3cd30>: Failed to establish a new connection: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
666 release_conn=release_conn, body_pos=body_pos,
--> 667 **response_kw)
668
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
666 release_conn=release_conn, body_pos=body_pos,
--> 667 **response_kw)
668
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
666 release_conn=release_conn, body_pos=body_pos,
--> 667 **response_kw)
668
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
666 release_conn=release_conn, body_pos=body_pos,
--> 667 **response_kw)
668
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
666 release_conn=release_conn, body_pos=body_pos,
--> 667 **response_kw)
668
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
637 retries = retries.increment(method, url, error=e, _pool=self,
--> 638 _stacktrace=sys.exc_info()[2])
639 retries.sleep()
/opt/conda/envs/fastai/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
397 if new_retry.is_exhausted():
--> 398 raise MaxRetryError(_pool, url, error or ResponseError(cause))
399
MaxRetryError: HTTPConnectionPool(host='~', port=80): Max retries exceeded with url: /filepath.tar.xz.tgz.tgz (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd8a6c3cd30>: Failed to establish a new connection: [Errno -2] Name or service not known',))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-35-40b7ff7a4d8d> in <module>
----> 1 path = untar_data(filepath); path
/opt/conda/envs/fastai/lib/python3.6/site-packages/fastai/datasets.py in untar_data(url, fname, dest, data, force_download)
154 shutil.rmtree(dest)
155 if not dest.exists():
--> 156 fname = download_data(url, fname=fname, data=data)
157 data_dir = Config().data_path()
158 assert _check_file(fname) == _checks[url], f"Downloaded file {fname} does not match checksum expected! Remove that file from {data_dir} and try your code again."
/opt/conda/envs/fastai/lib/python3.6/site-packages/fastai/datasets.py in download_data(url, fname, data)
136 if not fname.exists():
137 print(f'Downloading {url}')
--> 138 download_url(f'{url}.tgz', fname)
139 return fname
140
/opt/conda/envs/fastai/lib/python3.6/site-packages/fastai/core.py in download_url(url, dest, overwrite, pbar, show_progress, chunk_size, timeout, retries)
164 s = requests.Session()
165 s.mount('http://',requests.adapters.HTTPAdapter(max_retries=retries))
--> 166 u = s.get(url, stream=True, timeout=timeout)
167 try: file_size = int(u.headers["Content-Length"])
168 except: show_progress = False
/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/sessions.py in get(self, url, **kwargs)
544
545 kwargs.setdefault('allow_redirects', True)
--> 546 return self.request('GET', url, **kwargs)
547
548 def options(self, url, **kwargs):
/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp
/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
644
645 # Send the request
--> 646 r = adapter.send(request, **kwargs)
647
648 # Total elapsed time of the request (approximately)
/opt/conda/envs/fastai/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
514 raise SSLError(e, request=request)
515
--> 516 raise ConnectionError(e, request=request)
517
518 except ClosedPoolError as e:
ConnectionError: HTTPConnectionPool(host='~', port=80): Max retries exceeded with url: /filepath.tar.xz.tgz.tgz (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd8a6c3cd30>: Failed to establish a new connection: [Errno -2] Name or service not known',))
it looks like the dataset is recognized but I cannot use it for some reason