Глубокое обучение NLIP Keras: TextVectorization IndexError: индекс кортежа вне допустимого диапазона
После очистки текста я пытаюсь векторизовать текст, после чего получаю следующую ошибку: «индекс кортежа вне диапазона»
Я не уверен, что я делаю что-то не так? Пожалуйста, помогите.
заранее спасибо
from __future__ import print_function
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Input, Embedding, Dropout, Activation
import numpy as np
import random
import sys
import io
import string
path = get_file('21000-8.txt',
origin='http://www.gutenberg.org/files/21000/21000-8.txt')
with io.open(path, encoding='ISO-8859-1', errors='ignore') as f:
text = f.read()
# print somewhere in the middle of the text 200 characters
print(text[1200:1400])
# print corpus length
print('corpus length in characters:', len(text))
# turn a doc into clean tokens
def clean_doc(doc):
# replace '--' with a space ' '
doc = doc.replace('--', ' ')
# split into tokens by white space
tokens = doc.split()
# remove punctuation from each token
table = str.maketrans('', '', string.punctuation)
tokens = [w.translate(table) for w in tokens]
# remove remaining tokens that are not alphabetic
tokens = [word for word in tokens if word.isalpha()]
# make lower case
tokens = [word.lower() for word in tokens]
return tokens
max_features = 10000
sequence_length = 250
text_vectorization = TextVectorization(
standardize= clean_doc,
max_tokens=20000,
# Encode the output tokens
output_mode="int"
)
text_vectorization.adapt(text)
IndexError Traceback (most recent call last)
<ipython-input-41-8eabca3cac65> in <module>
----> 1 text_vectorization.adapt(text)
6 frames
/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/tensor_shape.py in __getitem__(self, key)
907 else:
908 if self._v2_behavior:
--> 909 return self._dims[key]
910 else:
911 return self.dims[key]
IndexError: tuple index out of range