Eembedding error: "Expected tensor for argument #1 'indices' to have scalar type Long; but got torch.FloatTensor instead"

Error message:
RuntimeError: Expected tensor for argument #1 ‘indices’ to have scalar type Long; but got torch.FloatTensor instead (while checking arguments for embedding)
(When calling ‘learner.fit_one_cycle(1, 1e-3)’)

I use a concated model including both tabular and text data. No error either without text data or use seperate model for text data.

The last part of the collate function(x4 is text data):
x4, y = pad_collate(list(zip(x4, y)), pad_idx=1, pad_first=True)
x4 = to_data(x4) # (this line was not used at first and the error occur in both cases)
return (x1,x2,x3,x4), y

Part of the concated model:
(nn_lstm): Sequential(
(0): Lambda()
(1): Embedding(10140, 400, padding_idx=1)
(2): EmbeddingDropout(
(emb): Embedding(10140, 400, padding_idx=1)
)
(3): RNNBlock(
(rnn): LSTM(400, 64)
)
(4): RNNBlock(
(rnn): LSTM(64, 64)
)
)

  • The error message didn’t change without the initial lambda function Lambda(lambda x: x.permute(1,0)).
  • The model used embedding and embedding dropout from awd_lstm language model, and pytorch lstm for speed.

I’ve been trying to fix the bug myself for many hours but failed. Some solutions found online were .data[0](seems only work for tensor with one value) and .numpy() (can not use a numpy array here)

No one can help you without seeing the whole code you’re using and the full error message. The problem can come from your data, your model or your loss function and we’re not magicians :wink:

1 Like

But what does the bug message mean? Should I just quit?

The error message means that at some point PyTorch expected a tensor of type float and got a tensor of type int, which could come from any of the things I spelled earlier.

I got the similar error and below fix helped!

b_input_ids = torch.tensor(b_input_ids).to(device).long()

this is my code it produces same error i cant fix it please help thank you!
import torch
from torch.utils.data import Dataset
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import Trainer, TrainingArguments
import torch

MODEL_NAME = “vilsonrodrigues/falcon-7b-instruct-sharded”

bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type=“nf4”,
bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map=“auto”,
trust_remote_code=True,
quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=[“query_key_value”],
lora_dropout=0.05,
bias=“none”,
task_type=“CAUSAL_LM”
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

dataset = tokenizer(data, padding=True, truncation=True,return_tensors=‘pt’)
tokenizerer=lambda x: tokenizer(x, padding=True, truncation=True,return_tensors=‘pt’)
new_data=[tokenizer(item) for item in data]
data_tok=new_data
data_chuncked=
max_tok=2048
for item in data_tok:
if len(item[‘input_ids’])>max_tok:
stride=len(item[‘input_ids’])%max_tok
datasize=len(item[‘input_ids’])
for i in range(0,datasize,stride):
# if (i+stride)<datasize:
if i<len(item[‘input_ids’])-stride:
data_chuncked.append({
‘input_ids’: item[‘input_ids’][i:i + stride],
‘attention_mask’: item[‘attention_mask’][i:i + stride]
})
else:
data_chuncked.append({
‘input_ids’: item[‘input_ids’][i:],
‘attention_mask’: item[‘attention_mask’][i:]
})
else:
data_chuncked.append({
‘input_ids’: item[‘input_ids’],
‘attention_mask’: item[‘attention_mask’]
})

Check if a GPU is available

device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
class MyDataset(Dataset):
def init(self, data, device=‘cpu’):
self.data = data
self.device = torch.device(device)

def __len__(self):
    return len(self.data)

def __getitem__(self, index):
    item = self.data[index]

    # Convert 'input_ids' and 'attention_mask' to tensors
    item['input_ids'] = torch.tensor(item['input_ids'], dtype=torch.int64).to(self.device)
    item['attention_mask'] = torch.tensor(item['attention_mask'], dtype=torch.int64).to(self.device)

    return item

first_data=data_chuncked[:4000]
dataset = MyDataset(first_data)
training_args = transformers.TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=20,
num_train_epochs=1,
learning_rate=2e-4,
fp16=True,
save_total_limit=3,
logging_steps=1,
output_dir=“experiments”,
optim=“paged_adamw_8bit”,
lr_scheduler_type=“cosine”,
warmup_ratio=0.05,
)

trainer = transformers.Trainer(
model=model,
train_dataset=dataset,
args=training_args,
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()