Hello colleagues.
Need some help with rather broad question. I have some sample data in a form of files. They are categorized in two category - [‘benign’, ‘malicious’]. To prepare the data for Resnet learner I apply some basic function which turns contents of a file into 64x16 tensor of digits:
So. i have all set:
db = DataBlock(
blocks=(RegressionBlock, CategoryBlock),
get_items=get_train_files,
splitter=RandomSplitter(),
get_y=label_func,
get_x=extract_features
)
db.summary seems to say me everything is OK.
Setting-up type transforms pipelines
Collecting items from /mnt/ramdisk/data
Found 90000 items
2 datasets of sizes 72000,18000
Setting up Pipeline: extract_features -> RegressionSetup -- {'c': None}
Setting up Pipeline: label_func -> Categorize -- {'vocab': None, 'sort': True, 'add_na': False}
Building one sample
Pipeline: extract_features -> RegressionSetup -- {'c': None}
starting from
/mnt/ramdisk/data/malicious_files/training/82c5b186a102632fb40c2bf89e9f0c68b229768d6e5e11e7fc0827171589a56f.utf8
applying extract_features gives
Tensor of size 16x64
applying RegressionSetup -- {'c': None} gives
Tensor of size 16x64
Pipeline: label_func -> Categorize -- {'vocab': None, 'sort': True, 'add_na': False}
starting from
/mnt/ramdisk/data/malicious_files/training/82c5b186a102632fb40c2bf89e9f0c68b229768d6e5e11e7fc0827171589a56f.utf8
applying label_func gives
malicious
applying Categorize -- {'vocab': None, 'sort': True, 'add_na': False} gives
TensorCategory(1)
Final sample: (tensor([[0., 1., 0., ..., 0., 1., 1.],
[0., 1., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 1., 0.],
[1., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]), TensorCategory(1))
Collecting items from /mnt/ramdisk/data
Found 90000 items
2 datasets of sizes 72000,18000
Setting up Pipeline: extract_features -> RegressionSetup -- {'c': None}
Setting up Pipeline: label_func -> Categorize -- {'vocab': None, 'sort': True, 'add_na': False}
Setting up after_item: Pipeline: ToTensor
Setting up before_batch: Pipeline:
Setting up after_batch: Pipeline:
Building one batch
Applying item_tfms to the first sample:
Pipeline: ToTensor
starting from
(Tensor of size 16x64, TensorCategory(1))
applying ToTensor gives
(Tensor of size 16x64, TensorCategory(1))
Adding the next 3 samples
No before_batch transform to apply
Collating items in a batch
No batch_tfms to apply
But when I launch actual training I receive some exception which is beyond my understading.
learn = vision_learner(test, resnet18, metrics=error_rate)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [24], in <cell line: 1>()
----> 1 learn = vision_learner(test, resnet18, metrics=error_rate)
2 learn.fine_tune(1)
File ~/.local/lib/python3.9/site-packages/fastai/vision/learner.py:225, in vision_learner(dls, arch, normalize, n_out, pretrained, loss_func, opt_func, lr, splitter, cbs, metrics, path, model_dir, wd, wd_bn_bias, train_bn, moms, cut, init, custom_head, concat_pool, pool, lin_ftrs, ps, first_bn, bn_final, lin_first, y_range, **kwargs)
223 else:
224 if normalize: _add_norm(dls, meta, pretrained)
--> 225 model = create_vision_model(arch, n_out, pretrained=pretrained, **model_args)
227 splitter=ifnone(splitter, meta['split'])
228 learn = Learner(dls=dls, model=model, loss_func=loss_func, opt_func=opt_func, lr=lr, splitter=splitter, cbs=cbs,
229 metrics=metrics, path=path, model_dir=model_dir, wd=wd, wd_bn_bias=wd_bn_bias, train_bn=train_bn, moms=moms)
File ~/.local/lib/python3.9/site-packages/fastai/vision/learner.py:167, in create_vision_model(arch, n_out, pretrained, cut, n_in, init, custom_head, concat_pool, pool, lin_ftrs, ps, first_bn, bn_final, lin_first, y_range)
165 body = create_body(model, n_in, pretrained, ifnone(cut, meta['cut']))
166 nf = num_features_model(nn.Sequential(*body.children())) if custom_head is None else None
--> 167 return add_head(body, nf, n_out, init=init, head=custom_head, concat_pool=concat_pool, pool=pool,
168 lin_ftrs=lin_ftrs, ps=ps, first_bn=first_bn, bn_final=bn_final, lin_first=lin_first, y_range=y_range)
File ~/.local/lib/python3.9/site-packages/fastai/vision/learner.py:153, in add_head(body, nf, n_out, init, head, concat_pool, pool, lin_ftrs, ps, first_bn, bn_final, lin_first, y_range)
151 "Add a head to a vision body"
152 if head is None:
--> 153 head = create_head(nf, n_out, concat_pool=concat_pool, pool=pool,
154 lin_ftrs=lin_ftrs, ps=ps, first_bn=first_bn, bn_final=bn_final, lin_first=lin_first, y_range=y_range)
155 model = nn.Sequential(body, head)
156 if init is not None: apply_init(model[1], init)
File ~/.local/lib/python3.9/site-packages/fastai/vision/learner.py:100, in create_head(nf, n_out, lin_ftrs, ps, pool, concat_pool, first_bn, bn_final, lin_first, y_range)
98 if lin_first: layers.append(nn.Dropout(ps.pop(0)))
99 for ni,no,bn,p,actn in zip(lin_ftrs[:-1], lin_ftrs[1:], bns, ps, actns):
--> 100 layers += LinBnDrop(ni, no, bn=bn, p=p, act=actn, lin_first=lin_first)
101 if lin_first: layers.append(nn.Linear(lin_ftrs[-2], n_out))
102 if bn_final: layers.append(nn.BatchNorm1d(lin_ftrs[-1], momentum=0.01))
File ~/.local/lib/python3.9/site-packages/fastai/layers.py:179, in LinBnDrop.__init__(self, n_in, n_out, bn, p, act, lin_first)
177 layers = [BatchNorm(n_out if lin_first else n_in, ndim=1)] if bn else []
178 if p != 0: layers.append(nn.Dropout(p))
--> 179 lin = [nn.Linear(n_in, n_out, bias=not bn)]
180 if act is not None: lin.append(act)
181 layers = lin+layers if lin_first else layers+lin
File ~/.local/lib/python3.9/site-packages/torch/nn/modules/linear.py:85, in Linear.__init__(self, in_features, out_features, bias, device, dtype)
83 self.in_features = in_features
84 self.out_features = out_features
---> 85 self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
86 if bias:
87 self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
TypeError: empty() received an invalid combination of arguments - got (tuple, dtype=NoneType, device=NoneType), but expected one of:
* (tuple of ints size, *, tuple of names names, torch.memory_format memory_format, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
* (tuple of ints size, *, torch.memory_format memory_format, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
Help please.