this is my code it produces same error i cant fix it please help thank you!
import torch
from torch.utils.data import Dataset
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import Trainer, TrainingArguments
import torch
MODEL_NAME = “vilsonrodrigues/falcon-7b-instruct-sharded”
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type=“nf4”,
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map=“auto”,
trust_remote_code=True,
quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=[“query_key_value”],
lora_dropout=0.05,
bias=“none”,
task_type=“CAUSAL_LM”
)
model = get_peft_model(model, config)
print_trainable_parameters(model)
dataset = tokenizer(data, padding=True, truncation=True,return_tensors=‘pt’)
tokenizerer=lambda x: tokenizer(x, padding=True, truncation=True,return_tensors=‘pt’)
new_data=[tokenizer(item) for item in data]
data_tok=new_data
data_chuncked=
max_tok=2048
for item in data_tok:
if len(item[‘input_ids’])>max_tok:
stride=len(item[‘input_ids’])%max_tok
datasize=len(item[‘input_ids’])
for i in range(0,datasize,stride):
# if (i+stride)<datasize:
if i<len(item[‘input_ids’])-stride:
data_chuncked.append({
‘input_ids’: item[‘input_ids’][i:i + stride],
‘attention_mask’: item[‘attention_mask’][i:i + stride]
})
else:
data_chuncked.append({
‘input_ids’: item[‘input_ids’][i:],
‘attention_mask’: item[‘attention_mask’][i:]
})
else:
data_chuncked.append({
‘input_ids’: item[‘input_ids’],
‘attention_mask’: item[‘attention_mask’]
})
Check if a GPU is available
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
class MyDataset(Dataset):
def init(self, data, device=‘cpu’):
self.data = data
self.device = torch.device(device)
def __len__(self):
return len(self.data)
def __getitem__(self, index):
item = self.data[index]
# Convert 'input_ids' and 'attention_mask' to tensors
item['input_ids'] = torch.tensor(item['input_ids'], dtype=torch.int64).to(self.device)
item['attention_mask'] = torch.tensor(item['attention_mask'], dtype=torch.int64).to(self.device)
return item
first_data=data_chuncked[:4000]
dataset = MyDataset(first_data)
training_args = transformers.TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=20,
num_train_epochs=1,
learning_rate=2e-4,
fp16=True,
save_total_limit=3,
logging_steps=1,
output_dir=“experiments”,
optim=“paged_adamw_8bit”,
lr_scheduler_type=“cosine”,
warmup_ratio=0.05,
)
trainer = transformers.Trainer(
model=model,
train_dataset=dataset,
args=training_args,
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()