I’ll have to take a closer look at XLNet as I haven’t tested that architecture with exporting/loading.
Do you have a gist I could look at so I can try to replicate what you’re seeing?
I’ll have to take a closer look at XLNet as I haven’t tested that architecture with exporting/loading.
Do you have a gist I could look at so I can try to replicate what you’re seeing?
Cool thanks.
Give me a bit to run and figure out where things are going astray with XLNet (looks like fastai is having an issue to persisting the SentencePiece bits, but not 100% sure).
A side question, any particular reason you are using/trying XLNet? Just curious.
-wg
Im getting RuntimeError: CUDA error: device-side assert triggered
on Colab when calling learn.fit(1, lr=0.01)
on a classification task with distilbert. I can call dls.one_batch()
before fit, but after that I get the same device-side assert triggered
transformers==3.5.1
ohmeow-blurr==0.0.18
=== Software ===
python : 3.6.9
fastai : 2.1.8
fastprogress : 0.2.7
torch : 1.7.0+cu101
nvidia driver : 418.67
torch cuda : 10.1 / is available
torch cudnn : 7603 / is enabled
=== Hardware ===
nvidia gpus : 1
torch devices : 1
- gpu0 : Tesla T4
=== Environment ===
platform : Linux-4.19.112+-x86_64-with-Ubuntu-18.04-bionic
distro : #1 SMP Thu Jul 23 08:00:38 PDT 2020
conda env : Unknown
python : /usr/bin/python3
sys.path :
/env/python
/usr/lib/python36.zip
/usr/lib/python3.6
/usr/lib/python3.6/lib-dynload
/usr/local/lib/python3.6/dist-packages
/usr/lib/python3/dist-packages
/usr/local/lib/python3.6/dist-packages/IPython/extensions
/root/.ipython
epoch train_loss valid_loss accuracy time
0 nan 00:00
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-15-a00043b61013> in <module>()
2 os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
3
----> 4 learn.fit(1,lr=0.01)
15 frames
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
203 self.opt.set_hypers(lr=self.lr if lr is None else lr)
204 self.n_epoch = n_epoch
--> 205 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
206
207 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
152
153 def _with_events(self, f, event_type, ex, final=noop):
--> 154 try: self(f'before_{event_type}') ;f()
155 except ex: self(f'after_cancel_{event_type}')
156 finally: self(f'after_{event_type}') ;final()
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_fit(self)
194 for epoch in range(self.n_epoch):
195 self.epoch=epoch
--> 196 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
197
198 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
152
153 def _with_events(self, f, event_type, ex, final=noop):
--> 154 try: self(f'before_{event_type}') ;f()
155 except ex: self(f'after_cancel_{event_type}')
156 finally: self(f'after_{event_type}') ;final()
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch(self)
188
189 def _do_epoch(self):
--> 190 self._do_epoch_train()
191 self._do_epoch_validate()
192
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch_train(self)
180 def _do_epoch_train(self):
181 self.dl = self.dls.train
--> 182 self._with_events(self.all_batches, 'train', CancelTrainException)
183
184 def _do_epoch_validate(self, ds_idx=1, dl=None):
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
152
153 def _with_events(self, f, event_type, ex, final=noop):
--> 154 try: self(f'before_{event_type}') ;f()
155 except ex: self(f'after_cancel_{event_type}')
156 finally: self(f'after_{event_type}') ;final()
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in all_batches(self)
158 def all_batches(self):
159 self.n_iter = len(self.dl)
--> 160 for o in enumerate(self.dl): self.one_batch(*o)
161
162 def _do_one_batch(self):
/usr/local/lib/python3.6/dist-packages/fastai/data/load.py in __iter__(self)
100 self.__idxs=self.get_idxs() # called in context of main process (not workers/subprocesses)
101 for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
--> 102 if self.device is not None: b = to_device(b, self.device)
103 yield self.after_batch(b)
104 self.after_iter()
/usr/local/lib/python3.6/dist-packages/fastai/torch_core.py in to_device(b, device)
249 elif device is None: device=default_device()
250 def _inner(o): return o.to(device, non_blocking=True) if isinstance(o,Tensor) else o.to_device(device) if hasattr(o, "to_device") else o
--> 251 return apply(_inner, b)
252
253 # Cell
/usr/local/lib/python3.6/dist-packages/fastai/torch_core.py in apply(func, x, *args, **kwargs)
197 def apply(func, x, *args, **kwargs):
198 "Apply `func` recursively to `x`, passing on args"
--> 199 if is_listy(x): return type(x)([apply(func, o, *args, **kwargs) for o in x])
200 if isinstance(x,dict): return {k: apply(func, v, *args, **kwargs) for k,v in x.items()}
201 res = func(x, *args, **kwargs)
/usr/local/lib/python3.6/dist-packages/fastai/torch_core.py in <listcomp>(.0)
197 def apply(func, x, *args, **kwargs):
198 "Apply `func` recursively to `x`, passing on args"
--> 199 if is_listy(x): return type(x)([apply(func, o, *args, **kwargs) for o in x])
200 if isinstance(x,dict): return {k: apply(func, v, *args, **kwargs) for k,v in x.items()}
201 res = func(x, *args, **kwargs)
/usr/local/lib/python3.6/dist-packages/fastai/torch_core.py in apply(func, x, *args, **kwargs)
198 "Apply `func` recursively to `x`, passing on args"
199 if is_listy(x): return type(x)([apply(func, o, *args, **kwargs) for o in x])
--> 200 if isinstance(x,dict): return {k: apply(func, v, *args, **kwargs) for k,v in x.items()}
201 res = func(x, *args, **kwargs)
202 return res if x is None else retain_type(res, x)
/usr/local/lib/python3.6/dist-packages/fastai/torch_core.py in <dictcomp>(.0)
198 "Apply `func` recursively to `x`, passing on args"
199 if is_listy(x): return type(x)([apply(func, o, *args, **kwargs) for o in x])
--> 200 if isinstance(x,dict): return {k: apply(func, v, *args, **kwargs) for k,v in x.items()}
201 res = func(x, *args, **kwargs)
202 return res if x is None else retain_type(res, x)
/usr/local/lib/python3.6/dist-packages/fastai/torch_core.py in apply(func, x, *args, **kwargs)
199 if is_listy(x): return type(x)([apply(func, o, *args, **kwargs) for o in x])
200 if isinstance(x,dict): return {k: apply(func, v, *args, **kwargs) for k,v in x.items()}
--> 201 res = func(x, *args, **kwargs)
202 return res if x is None else retain_type(res, x)
203
/usr/local/lib/python3.6/dist-packages/fastai/torch_core.py in _inner(o)
248 if defaults.use_cuda==False: device='cpu'
249 elif device is None: device=default_device()
--> 250 def _inner(o): return o.to(device, non_blocking=True) if isinstance(o,Tensor) else o.to_device(device) if hasattr(o, "to_device") else o
251 return apply(_inner, b)
252
RuntimeError: CUDA error: device-side assert triggered
Probably doing something wrong?
I am having some trouble using blurr. In particular, I have created a colab notebook to apply a similar to the blurr tutorial on the webpage https://ohmeow.github.io/blurr/examples-multilabel/, but using the data from Kaggle arxiv dataset. In particular, I preprocess the data in a DataFrame called papers and a list of labels called labels
and then
pretrained_model_name = 'allenai/scibert_scivocab_uncased'
task = HF_TASKS_AUTO.TokenClassification
hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name, task=task)
hf_arch, type(hf_tokenizer), type(hf_config), type(hf_model)
config = AutoConfig.from_pretrained(pretrained_model_name)
config.num_labels = len(labels)
blocks = (
HF_TextBlock(hf_arch, hf_config, hf_tokenizer, hf_model),
MultiCategoryBlock(encoded=True, vocab=labels)
)
dblock = DataBlock(blocks=blocks,
get_x=ColReader('abstract'), get_y=ColReader(labels),
splitter=RandomSplitter())
dls = dblock.dataloaders(papers, bs=16)
model = HF_BaseModelWrapper(hf_model)
learn = Learner(dls,
model,
opt_func=partial(Adam),
loss_func=BCEWithLogitsLossFlat(),
metrics=[partial(accuracy_multi, thresh=0.2)],
cbs=[HF_BaseModelCallback],
splitter=hf_splitter).to_fp16()
Finally, I use
learn.loss_func.thresh = 0.2
learn.create_opt() # -> will create your layer groups based on your "splitter" function
learn.freeze()
but when I try to get a summary of the model or in fact do anything with the model
learn.blurr_summary()
there seems to be a mismatch of the dimension number
/usr/local/lib/python3.6/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, token_type_ids, position_ids, inputs_embeds, past_key_values_length)
204 if self.position_embedding_type == "absolute":
205 position_embeddings = self.position_embeddings(position_ids)
--> 206 embeddings += position_embeddings
207 embeddings = self.LayerNorm(embeddings)
208 embeddings = self.dropout(embeddings)
RuntimeError: The size of tensor a (686) must match the size of tensor b (512) at non-singleton dimension 1
You may check the colab notebook (you will need to upload the kaggle.json of your own into your drive). Does anyone know what I might be doing wrong? Perhaps @wgpubs?
Thanks a lot in advance!
You’re changing the config after its already been used to configure the tokenizer and model. The correct order, if you look at the multilabel example in the docs, is this:
task = HF_TASKS_ALL.SequenceClassification
pretrained_model_name = "distilroberta-base"
config = AutoConfig.from_pretrained(pretrained_model_name)
config.num_labels = len(lbl_cols)
hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name,
task=task,
config=config)
… with the config based in to the get_hf_objects
method.
@wgpubs thanks a lot for the help, but unfortunately it was not only that because after correcting that I still get the same error.
Not sure if you are still having problems with this. I don’t think you are after TokenClassification
here? It should be SequenceClassification
?
Yijin
I’ve only just had a chance to have a play with blurr
. It is amazing. Thank you for your great work. Hopefully we can all help to continue making blurr
better, with PR, bugfixes, and tests.
Yijin
Hello I got similar problem.
This is the version I installed
!pip install ohmeow-blurr==0.0.22 datasets==1.3.0 fsspec==0.8.5 -qq
I wonder if I need to install latest version for the code to work since I’m not familiar with blurr and fastai
I’m using this model
pretrained_model_name = 'bert-base-multilingual-cased'
model_name = 'bert-base-multilingual-cased'
This is the library I imported
from transformers import *
from fastai.text.all import *
from blurr.data.all import *
from blurr.modeling.all import *
SEED = 42
set_seed(SEED, True)
import json
with open('/content/wordlist.json', 'r') as f:
wordlist = json.load(f)
import ast
df_converters = {'tokens': ast.literal_eval, 'labels': ast.literal_eval}
train_df = pd.read_csv('/content/train_pre.csv', converters=df_converters)
valid_df = pd.read_csv('/content/valid.csv', converters=df_converters)
labels = sorted(list(set([lbls for sublist in train_df.labels.tolist() for lbls in sublist])))
print(labels)
This is where I get error where HF_TASKS_AUTO is not recognize TokenClassification is also not recognized. I tried SequenceClassification it also does not work so I tried HF_TASKS_ALL instead and it worked though the problem now moved to the labels
task = HF_TASKS_AUTO.TokenClassification
config = AutoConfig.from_pretrained(pretrained_model_name)
config.num_labels = len(labels)
hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name,
task=task,
config=config)
hf_arch, type(hf_config), type(hf_tokenizer), type(hf_model)
This is the changes
task = HF_TASKS_ALL.TokenClassification
config = AutoConfig.from_pretrained(pretrained_model_name)
config.num_labels = len(labels)
hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name,
task=task,
config=config)
hf_arch, type(hf_config), type(hf_tokenizer), type(hf_model)
This is the error I’m getting
IndexError Traceback (most recent call last)
<ipython-input-14-8c8982c723b4> in <module>()
5 hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name,
6 task=task,
----> 7 config=config)
8 hf_arch, type(hf_config), type(hf_tokenizer), type(hf_model)
/usr/local/lib/python3.7/dist-packages/blurr/utils.py in get_hf_objects(self, pretrained_model_name_or_path, task, config, tokenizer_cls, model_cls, config_kwargs, tokenizer_kwargs, model_kwargs, cache_dir)
173 else:
174 if (model_cls is None and task is not None):
--> 175 model_cls = self.get_models(arch="auto", task=task.name)[0]
176
177 model = model_cls.from_pretrained(pretrained_model_name_or_path,
IndexError: list index out of range
Why is the index out of range?
Not the answer to your question.
A new 1.0 version of blurr recently released. When I look into the docs one needs to specify the ‘AutoModelForTokenClassification’.
model_cls = AutoModelForTokenClassification
pretrained_model_name = "roberta-base"
config = AutoConfig.from_pretrained(pretrained_model_name)
config.num_labels = len(labels)
hf_arch, hf_config, hf_tokenizer, hf_model = get_hf_objects(pretrained_model_name, model_cls=model_cls, config=config)
hf_arch, type(hf_config), type(hf_tokenizer), type(hf_model)
After using AutoModelForTokenClassification I got different error.
AttributeError Traceback (most recent call last)
in ()
5 hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name,
6 task=task,
----> 7 config=config)
8 hf_arch, type(hf_config), type(hf_tokenizer), type(hf_model)
/usr/local/lib/python3.7/dist-packages/blurr/utils.py in get_hf_objects(self, pretrained_model_name_or_path, task, config, tokenizer_cls, model_cls, config_kwargs, tokenizer_kwargs, model_kwargs, cache_dir)
173 else:
174 if (model_cls is None and task is not None):
→ 175 model_cls = self.get_models(arch=“auto”, task=task.name)[0]
176
177 model = model_cls.from_pretrained(pretrained_model_name_or_path,
AttributeError: type object ‘AutoModelForTokenClassification’ has no attribute ‘name’
Can you post your code in colab or github gist ? And also can you confirm the version of blurr your installed?
Yah, you definitely want to check your version of blurr and grab the latest.
Also, refer to the docs here for how to set things up for token classification. For example, using the helper singleton has been deprecated and the approach for getting your HF objects looks like this:
hf_arch, hf_config, hf_tokenizer, hf_model = get_hf_objects(pretrained_model_name, model_cls=model_cls, config=config)