IndexError: размер вне диапазона (ожидается, что он будет в диапазоне [-2, 1], но получено 2) на трансформаторах в PyTorch

У меня есть 14 свойств и 1 вывод метки и выходной класс номер 10, я хочу запустить его на трансформаторах, но получаю эту ошибку. Когда я запускал этот код в модели MLP, у меня не было проблем, но у меня есть проблемы здесь .

Вот пример значения моих ключей: [ 1.3870e-01, -5.7892e-02, 2.0621e-01, -6.8972e-02, 7.8414e-02, 5.7779e-02, 3.0018e-01, -6.9849e-02] ошибка уже здесь, на multiHeadAttetionself._scores = torch.bmm(queries, keys.transpose(1,2)) / np.sqrt(K)

https://github.com/nurkbts/error/blob/main/error.ipynb

К=64

      IndexError Traceback (most recent call last)
<ipython-input-191-239442fa3bfb> in <module>()
    227         optimizer.zero_grad()
    228         print(x)
--> 229         netout = net(x.to(device))
    230         # calculate loss
    231         loss = loss_function(yhat, y)

5 frames
/content/multiHeadAttention.py in forward(self, query, key, value, mask)
     89 
     90         # Scaled Dot Product
---> 91         self._scores = torch.bmm(queries, keys.transpose(1, 2)) / np.sqrt(K)
     92 
     93         # Compute local map mask

IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

Вот код:

      # dataset definition
class CSVDataset(Dataset):
    # load the dataset
    def __init__(self, path):
        # load the csv file as a dataframe
        df = read_csv(path,sep=',',names=KINEMATICS_COL_NAMES,usecols=KINEMATICS_USECOLS)
        # store the inputs and outputs
        self.X = df.values[:, :-1]
        self.y = df.values[:, -1]
        # ensure input data is floats
        self.X = self.X.astype('float32')
        # label encode target and ensure the values are floats
        self.y = LabelEncoder().fit_transform(self.y)
        print(self.X.shape)
        print(self.y.shape)
                # Convert to float32
        self.X = torch.Tensor(self.X)
        self.y = torch.Tensor(self.y)

    # number of rows in the dataset
    def __len__(self):
        return len(self.X)

    # get a row at an index
    def __getitem__(self, idx):
      if torch.is_tensor(idx):
       idx = idx.tolist()
      return [self.X[idx], self.y[idx]]

    # get indexes for train and test rows
    def get_splits(self, n_test=0.33):
        # determine sizes
        test_size = round(n_test * len(self.X))
        val_size =100
        train_size = len(self.X) - test_size-100
        # calculate the split
        return random_split(self, [train_size, val_size, test_size])
     
# prepare the dataset
def prepare_data(path):
    # load the dataset
    
    dataset = CSVDataset(path)
    # calculate split
    train, val, test = dataset.get_splits()
    # prepare data loaders
    train_dl = DataLoader(train, batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=NUM_WORKERS,
                              pin_memory=False)
    val_dl= DataLoader(val, batch_size=BATCH_SIZE,
                            shuffle=True,
                            num_workers=NUM_WORKERS
                              )
    test_dl = DataLoader(test, batch_size=BATCH_SIZE,
                             shuffle=False,
                             num_workers=NUM_WORKERS)
    return train_dl, val_dl,test_dl

# prepare the data
path = '_B001_train.txt'
train_dl, val_dl, test_dl = prepare_data(path)
print(train_dl)
print(len(train_dl.dataset), len(val_dl.dataset),  len(test_dl.dataset))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device {device}")

BATCH_SIZE = 8
NUM_WORKERS = 0
LR = 2e-4
EPOCHS = 30
# Model parameters
d_model = 64 # Lattent dim #???
q = 8 # Query size
v = 8 # Value size
h = 8 # Number of heads
N = 4 # Number of encoder and decoder to stack
attention_size = 12 # Attention window size
dropout = 0.2 # Dropout rate
pe = None # Positional encoding
chunk_mode = None
d_input = 14 # From dataset
d_output = 10 # From dataset#???
#model = MLP(14)
# Load transformer with Adam optimizer and MSE loss function
sns.set()
net = Transformer(d_input, d_model, d_output, q, v, h, N, attention_size=attention_size, dropout=dropout, chunk_mode=chunk_mode, pe=pe).to(device)
optimizer = optim.Adam(net.parameters(), lr=LR)
#loss_function = OZELoss(alpha=0.3)
from torch.nn import CrossEntropyLoss
loss_function = CrossEntropyLoss
# Prepare loss history
   
hist_loss = np.zeros(EPOCHS)
hist_loss_val = np.zeros(EPOCHS)
for idx_epoch in range(EPOCHS):
    running_loss = 0
      for idx_batch, (x, y) in enumerate(train_dl):
        optimizer.zero_grad()
        netout = net(x.to(device)) ----> error 
        loss = loss_function(yhat, y)
        loss.backward()
        optimizer.step()

0 ответов

Другие вопросы по тегам