有没有更好的方法来使用glob。Glob在python中获取多个文件类型的列表,如.txt, .mdown和.markdown?现在我有这样的东西:
projectFiles1 = glob.glob( os.path.join(projectDir, '*.txt') )
projectFiles2 = glob.glob( os.path.join(projectDir, '*.mdown') )
projectFiles3 = glob.glob( os.path.join(projectDir, '*.markdown') )
例如:
import glob
lst_img = []
base_dir = '/home/xy/img/'
# get all the jpg file in base_dir
lst_img += glob.glob(base_dir + '*.jpg')
print lst_img
# ['/home/xy/img/2.jpg', '/home/xy/img/1.jpg']
# append all the png file in base_dir to lst_img
lst_img += glob.glob(base_dir + '*.png')
print lst_img
# ['/home/xy/img/2.jpg', '/home/xy/img/1.jpg', '/home/xy/img/3.png']
一个函数:
import glob
def get_files(base_dir='/home/xy/img/', lst_extension=['*.jpg', '*.png']):
"""
:param base_dir:base directory
:param lst_extension:lst_extension: list like ['*.jpg', '*.png', ...]
:return:file lists like ['/home/xy/img/2.jpg','/home/xy/img/3.png']
"""
lst_files = []
for ext in lst_extension:
lst_files += glob.glob(base_dir+ext)
return lst_files
from glob import glob
files = glob('*.gif')
files.extend(glob('*.png'))
files.extend(glob('*.jpg'))
print(files)
如果你需要指定一个路径,循环匹配模式,并保持连接在循环中简单:
from os.path import join
from glob import glob
files = []
for ext in ('*.gif', '*.png', '*.jpg'):
files.extend(glob(join("path/to/dir", ext)))
print(files)
与@BPL相同的答案(计算效率高),但它可以处理任何glob模式,而不是扩展:
import os
from fnmatch import fnmatch
folder = "path/to/folder/"
patterns = ("*.txt", "*.md", "*.markdown")
files = [f.path for f in os.scandir(folder) if any(fnmatch(f, p) for p in patterns)]
这种解决方案既高效又方便。它还与glob的行为紧密匹配(请参阅文档)。
注意,使用内置包pathlib会更简单:
from pathlib import Path
folder = Path("/path/to/folder")
patterns = ("*.txt", "*.md", "*.markdown")
files = [f for f in folder.iterdir() if any(f.match(p) for p in patterns)]
下面的函数_glob用于多个文件扩展名。
import glob
import os
def _glob(path, *exts):
"""Glob for multiple file extensions
Parameters
----------
path : str
A file name without extension, or directory name
exts : tuple
File extensions to glob for
Returns
-------
files : list
list of files matching extensions in exts in path
"""
path = os.path.join(path, "*") if os.path.isdir(path) else path + "*"
return [f for files in [glob.glob(path + ext) for ext in exts] for f in files]
files = _glob(projectDir, ".txt", ".mdown", ".markdown")