Source code for rituals.util.antglob

# -*- coding: utf-8 -*-
# pylint: disable=too-few-public-methods
""" Recursive globbing with ant-style syntax.
"""
#
# The MIT License (MIT)
#
# Original source (2014-02-17) from https://github.com/zacherates/fileset.py
# Copyright (c) 2012 Aaron Maenpaa
#
# Modifications at https://github.com/jhermann/rituals
# Copyright ⓒ  2015 Jürgen Hermann
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from __future__ import absolute_import, unicode_literals, print_function

import os
import re

from ._compat import string_types

# TODO: allow '?'
# TODO: matching for Windows? (need to canonize to forward slashes in 'root')

__all__ = ['FileSet', 'includes', 'excludes']


def glob2re(part):
    """Convert a path part to regex syntax."""
    return "[^/]*".join(
        re.escape(bit).replace(r'\[\^', '[^').replace(r'\[', '[').replace(r'\]', ']')
        for bit in part.split("*")
    )


def parse_glob(pattern):
    """Generate parts of regex transformed from glob pattern."""
    if not pattern:
        return

    bits = pattern.split("/")
    dirs, filename = bits[:-1], bits[-1]

    for dirname in dirs:
        if dirname == "**":
            yield  "(|.+/)"
        else:
            yield glob2re(dirname) + "/"

    yield glob2re(filename)


def compile_glob(spec):
    """Convert the given glob `spec` to a compiled regex."""
    parsed = "".join(parse_glob(spec))
    regex = "^{0}$".format(parsed)
    return re.compile(regex)


class Pattern(object):
    """A single pattern for either inclusion or exclusion."""

    def __init__(self, spec, inclusive):
        """Create regex-based pattern matcher from glob `spec`."""
        self.compiled = compile_glob(spec.rstrip('/'))
        self.inclusive = inclusive
        self.is_dir = spec.endswith('/')

    def __str__(self):
        """Return inclusiveness indicator and original glob pattern."""
        return ('+' if self.inclusive else '-') + self.compiled.pattern

    def matches(self, path):
        """Check this pattern against given `path`."""
        return bool(self.compiled.match(path))


[docs]class FileSet(object): """ Ant-style file and directory matching. Produces an iterator of all of the files that match the provided patterns. Note that directory matches must end with a slash, and if they're exclusions, they won't be scanned (which prunes anything in that directory that would otherwise match). Directory specifiers: ** matches zero or more directories. / path separator. File specifiers: * glob style wildcard. [chars] inclusive character sets. [^chars] exclusive character sets. Examples: **/*.py recursively match all python files. foo/**/*.py recursively match all python files in the 'foo' directory. *.py match all the python files in the current directory. */*.txt match all the text files in top-level directories. foo/**/* all files under directory 'foo'. */ top-level directories. foo/ the directory 'foo' itself. **/foo/ any directory named 'foo'. **/.* hidden files. **/.*/ hidden directories. """ def __init__(self, root, patterns): if isinstance(patterns, string_types): patterns = [patterns] self.root = root self.patterns = [i if hasattr(i, 'inclusive') else includes(i) for i in patterns] def __repr__(self): return "<FileSet at {0} {1}>".format(repr(self.root), ' '.join(str(i) for i in self. patterns))
[docs] def included(self, path, is_dir=False): """Check patterns in order, last match that includes or excludes `path` wins. Return `None` on undecided.""" inclusive = None for pattern in self.patterns: if pattern.is_dir == is_dir and pattern.matches(path): inclusive = pattern.inclusive #print('+++' if inclusive else '---', path, pattern) return inclusive
def __iter__(self): for path in self.walk(): yield path def __or__(self, other): return set(self) | set(other) def __ror__(self, other): return self | other def __and__(self, other): return set(self) & set(other) def __rand__(self, other): return self & other
[docs] def walk(self, **kwargs): """ Like `os.walk` and taking the same keyword arguments, but generating paths relative to the root. Starts in the fileset's root and filters based on its patterns. If ``with_root=True`` is passed in, the generated paths include the root path. """ lead = '' if 'with_root' in kwargs and kwargs.pop('with_root'): lead = self.root.rstrip(os.sep) + os.sep for base, dirs, files in os.walk(self.root, **kwargs): prefix = base[len(self.root):].lstrip(os.sep) bits = prefix.split(os.sep) if prefix else [] for dirname in dirs[:]: path = '/'.join(bits + [dirname]) inclusive = self.included(path, is_dir=True) if inclusive: yield lead + path + '/' elif inclusive is False: dirs.remove(dirname) for filename in files: path = '/'.join(bits + [filename]) if self.included(path): yield lead + path
[docs]def includes(pattern): """A single inclusive glob pattern.""" return Pattern(pattern, inclusive=True)
[docs]def excludes(pattern): """A single exclusive glob pattern.""" return Pattern(pattern, inclusive=False)