Pytorchを使って映画に対するコメントのネガポジ分析をしています。エラーの原因がわかりません。
class Sentiment_data_set(Dataset):
def __init__(self, data_dir, data_file, wv_file):
data = self._get_data(data_dir, data_file)
wvd = self._get_wv(data_dir, wv_file)
self.data = self._convert_data(data, wvd)
def __getitem__(self, index):
return self.data[index]
def __len__(self):
return len(self.data)
def _get_data(self, data_dir, file_name):
data_path = os.path.join(data_dir, file_name)
if not os.path.exists(data_path):
print("DATA FILE NOT FOUND")
return None
data = []
#00:20
with open(data_path, encoding="utf-8") as f:
for line in f:
text = [w.lower().strip() for w in line.split()[:-1]]
sentiment = int(line.split()[-1].strip())
example = (text, sentiment)
data.append(example)
return data
def _get_wv(self, data_dir, wv_file):
data_path = os.path.join(data_dir, wv_file)
if not os.path.exists(data_path):
print("Word Vector FILE NOT FOUND")
return None
#00:27
wvd =dict()
with open(data_path, encoding="utf-8") as f:
for line in f:
key = line.split()[0].lower().strip()
vec = np.array([float(x) for x in line.split()[1:]])
wvd[key] = vec
return wvd
#00:35
def _convert_data(self, data, wvd):
s = next(iter(wvd.values())).shape
result = []
vl = []
for d in data:
words = d[0]
label = d[1]
vs = np.array([wvd.get(w, np.zeros(s)) for w in words])
av = np.mean(vs, axis = 0)
result.append((av, label))
return result
ds_train = Sentiment_data_set(data_dir, training_file, wv_file)
train_data_loader = torch.utils.data.DataLoader(dataset=ds_train, batch_size=batch_size, shuffle=True)
next(iter(train_data_loader))
実行結果
[tensor([[ 2.3268e-01, 2.8075e-01, 5.1483e-02, -1.1740e-01, 7.2760e-01,
-9.8404e-02, -3.3779e-01, -6.0064e-02, 1.3120e-02, 1.4159e-01,
-1.0131e-01, -3.4091e-01, 1.3472e-02, -6.8300e-03, 5.5985e-01,
2.6401e-01, 9.5887e-02, 2.7210e-01, -2.7710e-01, -2.4456e-01,
9.1790e-02, 4.5828e-01, 5.5885e-02, 3.6671e-01, 5.3345e-01,
-1.3135e+00, -6.5693e-01, 2.3515e-01, 1.8337e-01, -4.1516e-01,
2.5550e+00, 4.6701e-01, 1.5873e-02, -6.1980e-01, -3.9348e-01,
-1.1219e-01, 1.0424e-01, 5.8896e-02, -1.7226e-01, -2.8285e-01,
-9.2470e-02, 5.5494e-02, -2.6005e-01, 3.1202e-01, -3.5885e-01,
7.2462e-02, -1.6250e-01, -1.9688e-01, -9.7038e-02, 1.9388e-01],
...
[ 4.8415e-01, 3.7791e-01, -1.9045e-01, 4.9401e-02, 2.9869e-01,
3.2306e-01, -3.5511e-01, -1.0950e-01, 1.0837e-02, -1.3993e-01,
-8.2864e-02, -2.0526e-01, -3.8563e-01, 5.0814e-02, 4.3061e-01,
-6.6627e-02, 7.1168e-02, -2.2624e-02, -6.3862e-01, -9.9058e-02,
1.1668e-01, 8.0578e-02, 2.3456e-02, -1.3852e-01, 7.7296e-02,
-1.4985e+00, -4.2845e-01, -4.2376e-02, 4.3659e-02, 4.6751e-02,
3.2178e+00, -1.6511e-02, -2.7963e-01, -3.4116e-01, 1.0839e-01,
1.2656e-01, -4.8618e-02, 7.1068e-02, -8.9768e-02, -2.1865e-01,
-3.1404e-01, 2.7174e-02, 3.3482e-02, 6.7144e-02, -1.6599e-01,
9.8886e-02, 5.0593e-02, -4.5888e-02, -1.2297e-01, -4.2410e-01]],
dtype=torch.float64),
tensor([0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1])]
この後に
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.l1 = nn.Linear(50, 20)
self.l2 = nn.Linear(20, 10)
self.l3 = nn.Linear(10, 1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self,x):
out1 = self.l1(x)
out2 = self.sigmoid(self.l2(out1))
y_pred = self.sigmoid(self.l3(out2))
return y_pred
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
ここまでOK
この下を実行するとエラー
for epoch in range(2):
for i, data in enumerate(train_data_loader, 0):
# get the inputs
inputs, labels = data
# wrap them in Variable
inputs, labels = Variable(inputs), Variable(labels)
y_pred = model(inputs)
#Compute and print loss
loss = criterion (y_pred, labels)
print(epoch, i, loss.data[0])
optimizer.zero_grad()
loss.backward()
optimizer.step()
これを実行すると、以下のようなエラーがでてしまいます。どこが間違っているのでしょうか。教えてください。よろしくお願いします。
RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed. at /Users/administrator/nightlies/pytorch-1.0.0/wheel_build_dirs/conda_3.6/conda/conda-bld/pytorch_1544137972173/work/aten/src/THNN/generic/ClassNLLCriterion.c:93