Multifit issues loading "from_pretrained" on Paperspace

Hi all,

I am having trouble loading a language model for Multifit.

My setup is paperspace, I am running fastai 1.0.59 and multifit is properly installed. When I run:

lang='de'
exp = multifit.from_pretrained(f'{lang}_multifit_paper_version')

I get an error message:


EmptyHeaderError Traceback (most recent call last)
/usr/lib/python3.6/tarfile.py in next(self)
2296 try:
-> 2297 tarinfo = self.tarinfo.fromtarfile(self)
2298 except EOFHeaderError as e:

/usr/lib/python3.6/tarfile.py in fromtarfile(cls, tarfile)
1092 buf = tarfile.fileobj.read(BLOCKSIZE)
-> 1093 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1094 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE

/usr/lib/python3.6/tarfile.py in frombuf(cls, buf, encoding, errors)
1028 if len(buf) == 0:
-> 1029 raise EmptyHeaderError(“empty header”)
1030 if len(buf) != BLOCKSIZE:

EmptyHeaderError: empty header

During handling of the above exception, another exception occurred:

ReadError Traceback (most recent call last)
in
----> 1 exp = multifit.from_pretrained(f’{lang}_multifit_paper_version’)

/usr/local/lib/python3.6/dist-packages/multifit/training.py in from_pretrained(name)
583 #TODO: Detect name and load configuration
584 from . import configurations
–> 585 return configurations.multifit_paper_version().from_pretrained_(name)

/usr/local/lib/python3.6/dist-packages/multifit/training.py in from_pretrained_(self, name, repo)
576 name = name.rstrip(".tgz") # incase someone put’s tgz name the name
577 url = f"https://github.com/{repo}/releases/download/{name}/{name}.tgz"
–> 578 path = untar_data(url.rstrip(".tgz"), data=False) # untar_data adds .tgz
579 return self.load_(path)
580

/usr/local/lib/python3.6/dist-packages/fastai/datasets.py in untar_data(url, fname, dest, data, force_download)
231 if url in _checks:
232 assert _check_file(fname) == _checks[url], f"Downloaded file {fname} does not match checksum expected! Remove that file from {Config().data_archive_path()} and try your code again."
–> 233 tarfile.open(fname, ‘r:gz’).extractall(dest.parent)
234 return dest

/usr/lib/python3.6/tarfile.py in open(cls, name, mode, fileobj, bufsize, **kwargs)
1587 else:
1588 raise CompressionError(“unknown compression type %r” % comptype)
-> 1589 return func(name, filemode, fileobj, **kwargs)
1590
1591 elif “|” in mode:

/usr/lib/python3.6/tarfile.py in gzopen(cls, name, mode, fileobj, compresslevel, **kwargs)
1641
1642 try:
-> 1643 t = cls.taropen(name, mode, fileobj, **kwargs)
1644 except OSError:
1645 fileobj.close()

/usr/lib/python3.6/tarfile.py in taropen(cls, name, mode, fileobj, **kwargs)
1617 if mode not in (“r”, “a”, “w”, “x”):
1618 raise ValueError(“mode must be ‘r’, ‘a’, ‘w’ or ‘x’”)
-> 1619 return cls(name, mode, fileobj, **kwargs)
1620
1621 @classmethod

/usr/lib/python3.6/tarfile.py in init(self, name, mode, fileobj, format, tarinfo, dereference, ignore_zeros, encoding, errors, pax_headers, debug, errorlevel, copybufsize)
1480 if self.mode == “r”:
1481 self.firstmember = None
-> 1482 self.firstmember = self.next()
1483
1484 if self.mode == “a”:

/usr/lib/python3.6/tarfile.py in next(self)
2310 except EmptyHeaderError:
2311 if self.offset == 0:
-> 2312 raise ReadError(“empty file”)
2313 except TruncatedHeaderError as e:
2314 if self.offset == 0:

ReadError: empty file

I tried to just download and untar the model myself and tried to just use the load_ function, but I am getting an undefined error:
path = “de_multifit_paper_version”
exp = load_(path)


NameError Traceback (most recent call last)
in
----> 1 exp = load_(path)

NameError: name ‘load_’ is not defined

Is this a paperspace related issue?

Thank you for any help!