I am trying to use fastai Learner with DNA sequence for deeplearning, however I am facing some error. My question is do I need to reshape my input tensor with given error??
I have tried to reduce my input’s dinmesion to suit the out put tensor:
x = x.permute(0,2,1).mean( dim = [1,2], keepdim = True).squeeze(dim= -1)
however I get
Given groups=1, weight of size [64, 4, 8], expected input[1, 512, 2] to have 4 channels, but got 512 channels instead
Do I need to different loss function or set differenct reduction option?
Input :
def one_hot_encode(seq):
"""
Given a DNA sequence, return its one-hot encoding
"""
# Make sure seq has only allowed bases
allowed = set("ACTGN")
if not set(seq).issubset(allowed):
invalid = set(seq) - allowed
raise ValueError(f"Sequence contains chars not in allowed DNA alphabet (ACGTN): {invalid}")
# Dictionary returning one-hot encoding for each nucleotide
nuc_d = {'A':[1.0,0.0,0.0,0.0],
'C':[0.0,1.0,0.0,0.0],
'G':[0.0,0.0,1.0,0.0],
'T':[0.0,0.0,0.0,1.0],
'N':[0.0,0.0,0.0,0.0]
}
# Create array from nucleotide sequence
vec=np.array([nuc_d[x] for x in seq])
return vec
class SeqDatasetOHE(Dataset):
'''
Dataset for one-hot-encoded sequences
'''
def __init__(self,
df,
seq_col='Seq',
target_col= target_col
):
# extract the DNA from the appropriate column in the df
self.seqs = list(df[seq_col].values)
self.seq_len = len(self.seqs[0])
# one-hot encode sequences, then stack in a torch tensor
self.ohe_seqs = torch.stack([torch.tensor(one_hot_encode(x),dtype=torch.float, device=device) for x in self.seqs])
self.labels = torch.tensor(list(df[target_col].values),dtype=torch.float, device=device).unsqueeze(1)
def __len__(self): return len(self.seqs)
def __getitem__(self,idx):
# Given an index, return a tuple of an X with it's associated Y
# This is called inside DataLoader
seq = self.ohe_seqs[idx]
label = self.labels[idx]
return seq, label
## constructed DataLoaders from Datasets.
def build_dataloaders(train_df,
test_df,
seq_col='Seq',
target_col= target_col,
batch_size=batch_size_init,
shuffle=True
):
# create Datasets
train_ds = SeqDatasetOHE(train_df,seq_col=seq_col,target_col=target_col)
test_ds = SeqDatasetOHE(test_df,seq_col=seq_col,target_col=target_col)
# Put DataSets into DataLoaders
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=shuffle)
test_dl = DataLoader(test_ds, batch_size=batch_size)
#train_dl.ohe_seqs.to("cuda") # put data into GPU entirely
#train_dl.labels.to("cuda")
#test_dl.ohe_seqs.to("cuda") # put data into GPU entirely
#test_dl.labels.to("cuda")
return train_dl,test_dl
train_dl, val_dl = build_dataloaders(train_df, val_df)
Model :
# import the necessary packages
from torch.nn import Module,Conv1d,Linear,MaxPool1d,ReLU,Dropout
class model_test(nn.Module): # deepcre model
def __init__(self,
seq_len: int =1000,
kernel_size: int = 8,
p = 0.25): # drop out value
super().__init__()
self.seq_len = seq_len
# adjusting window size corresponding to sequence length
window_size = int(seq_len*(8/3000))
# CNN module
self.conv11 = Conv1d(4,64,kernel_size=kernel_size, padding='same')
self.relu11 = ReLU()
self.conv12 = Conv1d(64,64,kernel_size=kernel_size, padding='same')
self.relu12 = ReLU()
self.maxpool1 = MaxPool1d(kernel_size=window_size)
self.Dropout1 = Dropout(p)
self.fc1 =Linear(seq_len*p, 1)
def forward(self, x: torch.Tensor):
"""Forward pass."""
x = x.permute(0,2,1)
print(x.shape)
x = self.conv11(x)
x = self.relu11(x)
x = self.conv12(x)
x = self.relu12(x)
x = self.maxpool1(x)
x = self.Dropout1(x)
x = self.fc1(x)
return x
Excuting model:
from fastai.optimizer import OptimWrapper
from torch import optim
from functools import partial
# get the sequence length from the first seq in the df
seq_len = len(train_df['Seq'].values[0])
device = 'cuda'
# setting model
model_test = model_test(seq_len)
model_test.to(device)
# setting model to tensor.cuda.double
model_test.float()
# setting optimizer
opt = torch.optim.SGD(model_test.parameters(), lr=lr)
opt_func = OptimWrapper(opt=opt)
# Setting learner
learn = Learner(data, model_test, loss_func=nn.MSELoss(), metrics=accuracy)
learn.opt = OptimWrapper(opt=opt)
Error :
UserWarning: Using a target size (torch.Size([512, 1])) that is different to the input size (torch.Size([512, 64, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[178], line 1
----> 1 learn.fit_one_cycle(n_epoch=1, lr_max=1e-2)
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/callback/schedule.py:119, in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt, start_epoch)
116 lr_max = np.array([h['lr'] for h in self.opt.hypers])
117 scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
118 'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 119 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd, start_epoch=start_epoch)
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:264, in Learner.fit(self, n_epoch, lr, wd, cbs, reset_opt, start_epoch)
262 self.opt.set_hypers(lr=self.lr if lr is None else lr)
263 self.n_epoch = n_epoch
--> 264 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:199, in Learner._with_events(self, f, event_type, ex, final)
198 def _with_events(self, f, event_type, ex, final=noop):
--> 199 try: self(f'before_{event_type}'); f()
200 except ex: self(f'after_cancel_{event_type}')
201 self(f'after_{event_type}'); final()
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:253, in Learner._do_fit(self)
251 for epoch in range(self.n_epoch):
252 self.epoch=epoch
--> 253 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:199, in Learner._with_events(self, f, event_type, ex, final)
198 def _with_events(self, f, event_type, ex, final=noop):
--> 199 try: self(f'before_{event_type}'); f()
200 except ex: self(f'after_cancel_{event_type}')
201 self(f'after_{event_type}'); final()
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:247, in Learner._do_epoch(self)
246 def _do_epoch(self):
--> 247 self._do_epoch_train()
248 self._do_epoch_validate()
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:239, in Learner._do_epoch_train(self)
237 def _do_epoch_train(self):
238 self.dl = self.dls.train
--> 239 self._with_events(self.all_batches, 'train', CancelTrainException)
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:199, in Learner._with_events(self, f, event_type, ex, final)
198 def _with_events(self, f, event_type, ex, final=noop):
--> 199 try: self(f'before_{event_type}'); f()
200 except ex: self(f'after_cancel_{event_type}')
201 self(f'after_{event_type}'); final()
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:205, in Learner.all_batches(self)
203 def all_batches(self):
204 self.n_iter = len(self.dl)
--> 205 for o in enumerate(self.dl): self.one_batch(*o)
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:235, in Learner.one_batch(self, i, b)
233 b = self._set_device(b)
234 self._split(b)
--> 235 self._with_events(self._do_one_batch, 'batch', CancelBatchException)
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:199, in Learner._with_events(self, f, event_type, ex, final)
198 def _with_events(self, f, event_type, ex, final=noop):
--> 199 try: self(f'before_{event_type}'); f()
200 except ex: self(f'after_cancel_{event_type}')
201 self(f'after_{event_type}'); final()
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/fastai/learner.py:219, in Learner._do_one_batch(self)
217 self('after_pred')
218 if len(self.yb):
--> 219 self.loss_grad = self.loss_func(self.pred, *self.yb)
220 self.loss = self.loss_grad.clone()
221 self('after_loss')
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/torch/nn/modules/loss.py:536, in MSELoss.forward(self, input, target)
535 def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 536 return F.mse_loss(input, target, reduction=self.reduction)
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/torch/nn/functional.py:3294, in mse_loss(input, target, size_average, reduce, reduction)
3291 if size_average is not None or reduce is not None:
3292 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3294 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
3295 return torch._C._nn.mse_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
File /mnt/biostat/environments/parkj/dna2rna/lib/python3.11/site-packages/torch/functional.py:74, in broadcast_tensors(*tensors)
72 if has_torch_function(tensors):
73 return handle_torch_function(broadcast_tensors, tensors, *tensors)
---> 74 return _VF.broadcast_tensors(tensors)
RuntimeError: The size of tensor a (64) must match the size of tensor b (512) at non-singleton dimension 1