Глубокое обучение NLIP Keras: TextVectorization IndexError: индекс кортежа вне допустимого диапазона

После очистки текста я пытаюсь векторизовать текст, после чего получаю следующую ошибку: «индекс кортежа вне диапазона»

Я не уверен, что я делаю что-то не так? Пожалуйста, помогите.

заранее спасибо

      from __future__ import print_function
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Input, Embedding, Dropout, Activation
import numpy as np
import random
import sys
import io
import string
      path = get_file('21000-8.txt',
                origin='http://www.gutenberg.org/files/21000/21000-8.txt')
    

with io.open(path, encoding='ISO-8859-1', errors='ignore') as f:
    text = f.read()

# print somewhere in the middle of the text 200 characters
print(text[1200:1400])

# print corpus length
print('corpus length in characters:', len(text))
      # turn a doc into clean tokens
def clean_doc(doc):
    # replace '--' with a space ' '
    doc = doc.replace('--', ' ')
    # split into tokens by white space
    tokens = doc.split()
    # remove punctuation from each token
    table = str.maketrans('', '', string.punctuation)
    tokens = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    tokens = [word for word in tokens if word.isalpha()]
    # make lower case
    tokens = [word.lower() for word in tokens]
    return tokens
      max_features = 10000
sequence_length = 250

text_vectorization = TextVectorization(
    standardize= clean_doc,
    max_tokens=20000,
    # Encode the output tokens 
    output_mode="int"

)
      text_vectorization.adapt(text)
      IndexError                                Traceback (most recent call last)
<ipython-input-41-8eabca3cac65> in <module>
----> 1 text_vectorization.adapt(text)

6 frames
/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/tensor_shape.py in __getitem__(self, key)
    907       else:
    908         if self._v2_behavior:
--> 909           return self._dims[key]
    910         else:
    911           return self.dims[key]

IndexError: tuple index out of range

0 ответов

Другие вопросы по тегам