长话短说,我的Gist。
给定一个没有requirements.txt
的Python项目,如果想知道需要安装哪些包才能满足这个项目的依赖需求,一个容易想到的方法就是对每一个.py
文件,用模式匹配(如正则表达式)找import xxx
,并记录xxx
为需要的包。然而import
语句有很多形式,如:import xxx
、import xxx as aaa
、import xxx as aaa, yyy as bbb
、from xxx.yyy import fff as ccc
、from .zzz import ggg
。因此,更好的方法是利用抽象语法树ast
模块来找出所有import
语句。
Python的import
语句对应ast
的两种节点:ast.Import
和ast.ImportFrom
。要从ast.Import
获取导入包的列表,可用:
[a.name for a in node.names] # 其中node是ast.Import类型的
要从ast.ImportFrom
获取导入的包,可用:
node.module # 其中node是ast.ImportFrom类型的
值得注意的是如果当前import
语句是from . import xxx
,node.module
将会是None
,此时node.level > 0
,意味着相对导入。因此,要想获得所有导入的包(除了相对导入外,因为相对导入的包绝不会是需要安装的依赖),可以这样:
import ast # 假设source包含待解析源码 root = ast.parse(source) result = [] for node in ast.walk(root): if isinstance(node, ast.Import): for a in node.names: result.append(a.name.split('.', maxsplit=1)[0]) elif isinstance(node, ast.ImportFrom): if node.level == 0: result.append(node.module.split('.', maxsplit=1)[0])
然而绝对导入的包也有可能是工作目录中已存在的模块或包啊,此时我们就可以根据导入路径判断它是不是指工作目录下的包:
def exists_local(path, rootpkg): filepath = os.path.join(rootpkg, path.replace('.', os.path.sep)) # see if path is a local package if os.path.isdir(filepath) and os.path.isfile( os.path.join(filepath, '__init__.py')): return True # see if path is a local module if os.path.isfile(filepath + '.py'): return True return False
其中path
是导入路径,rootpkg
是根包所在目录(定义见这里)。
把这个核心功能稍作包装,便可写出下面的完整可执行代码:
import argparse import os import ast import sys import pkgutil import itertools def make_parser(): parser = argparse.ArgumentParser( description=('List all root imports. The *root* import of ' '`import pkg1.mod1` is "pkg1".')) parser.add_argument( '-g', '--greedy', action='store_true', help=('find also import statements within try block, ' 'if block, while block, function definition, ' 'etc.')) parser.add_argument( '-i', '--include-installed', action='store_true', help='include installed/built-in modules/packages') parser.add_argument( '--rootpkg', metavar='DIR', help= ('the directory of the root package. See ' 'https://docs.python.org/3.7/distutils/setupscript.html#listing-whole-packages ' 'about *root package*. If supplied, local packages/modules will be ' 'excluded from the results. For example, if ' 'there are "mod1.py" and "mod2.py", and in ' '"mod2.py" there is `import mod1`, then "mod1" ' 'won\'t be listed in the result. Without it, ' 'only relative imports are excluded')) parser.add_argument( 'filenames', metavar='SOURCE_FILE', nargs='*', help=('if specified multiple SOURCE_FILEs, the ' 'results will be joined; if specified no ' 'SOURCE_FILE, the stdin will be read for the ' 'source')) return parser def exists_local(path, rootpkg): filepath = os.path.join(rootpkg, path.replace('.', os.path.sep)) # see if path is a local package if os.path.isdir(filepath) and os.path.isfile( os.path.join(filepath, '__init__.py')): return True # see if path is a local module if os.path.isfile(filepath + '.py'): return True return False # Reference: https://stackoverflow.com/a/9049549/7881370 def yield_imports(root, greedy: bool, rootpkg): traverse = ast.walk if greedy else ast.iter_child_nodes for node in traverse(root): if isinstance(node, ast.Import): for a in node.names: if (rootpkg and not exists_local(a.name, rootpkg)) \ or not rootpkg: yield a.name.split('.', maxsplit=1)[0] elif isinstance(node, ast.ImportFrom): # if node.level > 0, the import is relative if node.level == 0 and ( (rootpkg and not exists_local(node.module, rootpkg)) \ or not rootpkg): yield node.module.split('.', maxsplit=1)[0] def collect_results(filenames, greedy: bool, include_installed: bool, rootpkg): agg = [] if not filenames: try: root = ast.parse(sys.stdin.read()) except KeyboardInterrupt: pass else: agg.append(yield_imports(root, greedy, rootpkg)) else: for filename in filenames: with open(filename) as infile: root = ast.parse(infile.read(), filename) agg.append(yield_imports(root, greedy, rootpkg)) agg = set(itertools.chain.from_iterable(agg)) if not include_installed: installed = set( itertools.chain(sys.builtin_module_names, (x[1] for x in pkgutil.iter_modules()))) agg = filter(lambda x: x not in installed, agg) agg = sorted(agg) return agg def main(): args = make_parser().parse_args() agg = collect_results(args.filenames, args.greedy, args.include_installed, args.rootpkg) if agg: print('\n'.join(agg)) if __name__ == '__main__': main()
需要注意的是,程序的输出并不一定是PyPI上包的名字(例如,import bs4
然而pip install beautifulsoup4
)。