Python PDF -> 文字取得
from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
# from pdfminer.high_level import extract_text
#実行ファイルのパスを取得
# pwd = os.getcwd()
# print(pwd)
rsrcmgr = PDFResourceManager()
outfp = StringIO()
# laparams の設定
laparams = LAParams()
laparams.detect_vertical = True
# pdf_filename = "sample.pdf"
pdf = open('sample.pdf', 'rb')
get_str = ''
get_list =
interpreter = PDFPageInterpreter(rsrcmgr, device)
############### PDF を 文字列へ出力
for page in PDFPage.get_pages(pdf):
interpreter.process_page(page)
str = outfp.getvalue()
get_str += str
pdf.close()
device.close()
outfp.close()
#print(get_list)
####### 改行コードの削除
a_str = ''
a_str = get_str.replace('\n', '')
####### 空白行でリスト作成
a_list =
a_list = a_str.split(" ")
for a in a_list:
print(a, end='')
### PDF 読み取り
# text = extract_text(pdf_filename,laparams)