スクレイピングでmecabが動きません
スクレイピングで以下のコードを動かしたいのですが、以下のエラーが出てしまいます。
もしよろしければ、ご教授お願いいたします。
fetching data... http://www.haiku-data.jp/work_detail.php?cd=9 result: SUCCESS
Traceback (most recent call last):
File "/home/yudai/Desktop/haiku_correct.py", line 52, in
for fname, word in zip(poem, morphes):
NameError: name 'poem' is not defined
# -*- coding: utf-8 -*-
import time
from urllib import request
from bs4 import BeautifulSoup
import MeCab
import codecs
import re
def get_data(url):
req = request.urlopen(url)
soup = BeautifulSoup(req.read(), 'html.parser')
# 俳句
poem_elem = soup.select('td[height=40] b')[0]
poem = poem_elem.text.replace('*', '').strip() # サニタイズ
if not poem:
return False # 存在しないID
return {'poem': poem}
morphes = []
MAX_ID = 10 # 登録されているIDの最大値
for i in range(1, MAX_ID):
url = 'http://www.haiku-data.jp/work_detail.php?cd={id}'.format(id=i)
print('fetching data... ' + url, end=' ')
d = get_data(url)
if d:
print('result: SUCCESS')
morphes.append(d)
else:
print('result: MISSING')
time.sleep(1) # アクセス間隔
def write2file(fname, sentences):
with codecs.open(fname, 'w', 'utf-8') as f:
f.write("".join(f, fieldnames=['poem']))
def get_morphemes(sentences):
morphemes = []
for sent in sentences:
if len(sent) == 0:
continue
temp = tagger.parse(sent).split()
temp.append("。\n")
morphemes.append(" ".join(temp))
return morphemes if morphemes else -1
tagger = MeCab.Tagger("-Owakati")
for fname, word in zip(poem, morphes):
with req.urlopen(link + par.quote_plus(word)) as response:
html = response.read().decode('utf-8')
morphes = []
for p in all_p_tag:
# 半角文字を削除
p = re.sub("[\s!-~]*", "", p)
p = p.split("。")
# 分かち書き
morphemes = get_morphemes(p)
if morphemes == -1:
continue
temp = temp + morphemes
write2file(fname + ".txt", temp)