I find out that file_name
keeps full path to file and this path replaces src
, dst
in
src / file_name
dst / file_name
You have to get only name file_name.name
src / file_name.name
dst / file_name.name
BTW:
Full path
print( file_name.path )
only filename
print( file_name.name )
BTW: You copy the same file after every match but you could do it only once
Using variable found
and copy after for i
-loop
from pathlib import Path
import PyPDF2
import re
import os
import shutil
pattern = input("Enter string pattern to search: ")
basepath = Path('hrdinhalDataDesktopAnalizeSearch engine')
src = basepath / 'Folder 1'
dst = basepath / 'Folder 2'
#print('[DEBUG] (before for file_name) src:', src)
for file_name in os.scandir(src):
file = PyPDF2.PdfFileReader(str(src / file_name.name), 'rb')
numPages = file.getNumPages()
found = False
# ---
#print('[DEBUG] (before for i) src:', src)
for i in range(0, numPages):
pageObj = file.getPage(i)
text = pageObj.extractText()
#print('[DEBUG] (before if re) src:', src)
if re.findall(pattern, text, re.IGNORECASE):
found = True
# ----
#print('[DEBUG] (before for found) src:', src)
if found:
#print('[DEBUG] (before copy) src:', src)
shutil.copyfile(str(src / file_name.name), str(dst / file_name.name))
or using break
to skip for i
-loop after first copy
from pathlib import Path
import PyPDF2
import re
import os
import shutil
pattern = input("Enter string pattern to search: ")
basepath = Path('hrdinhalDataDesktopAnalizeSearch engine')
src = basepath / 'Folder 1'
dst = basepath / 'Folder 2'
#print('[DEBUG] (before for file_name) src:', src)
for file_name in os.scandir(src):
#print('[DEBUG] (before pyPDF2) file_name:', file_name)
file = PyPDF2.PdfFileReader(str(src / file_name.name), 'rb')
numPages = file.getNumPages()
# ---
#print('[DEBUG] (before for i) src:', src)
for i in range(0, numPages):
pageObj = file.getPage(i)
text = pageObj.extractText()
#print('[DEBUG] (before if re) src:', src)
if re.findall(pattern, text, re.IGNORECASE):
#print('[DEBUG] (before copy) src:', src)
shutil.copyfile(str(src / file_name.name), str(dst / file_name.name))
break # there is no need to check rest of PDF
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…