Source code for rituals.util.antglob

# -*- coding: utf-8 -*-
# pylint: disable=too-few-public-methods
""" Recursive globbing with ant-style syntax.
"""
#
# The MIT License (MIT)
#
# Original source (2014-02-17) from https://github.com/zacherates/fileset.py
# Copyright (c) 2012 Aaron Maenpaa
#
# Modifications at https://github.com/jhermann/rituals
# Copyright ⓒ  2015 Jürgen Hermann
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from __future__ import absolute_import, unicode_literals, print_function

import os
import re

from ._compat import string_types

# TODO: allow '?'
# TODO: matching for Windows? (need to canonize to forward slashes in 'root')

__all__ = ['FileSet', 'includes', 'excludes']


def glob2re(part):
    """Convert a path part to regex syntax."""
    return "[^/]*".join(
        re.escape(bit).replace(r'\[\^', '[^').replace(r'\[', '[').replace(r'\]', ']')
        for bit in part.split("*")
    )


def parse_glob(pattern):
    """Generate parts of regex transformed from glob pattern."""
    if not pattern:
        return

    bits = pattern.split("/")
    dirs, filename = bits[:-1], bits[-1]

    for dirname in dirs:
        if dirname == "**":
            yield  "(|.+/)"
        else:
            yield glob2re(dirname) + "/"

    yield glob2re(filename)


def compile_glob(spec):
    """Convert the given glob `spec` to a compiled regex."""
    parsed = "".join(parse_glob(spec))
    regex = "^{0}$".format(parsed)
    return re.compile(regex)


class Pattern(object):
    """A single pattern for either inclusion or exclusion."""

    def __init__(self, spec, inclusive):
        """Create regex-based pattern matcher from glob `spec`."""
        self.compiled = compile_glob(spec.rstrip('/'))
        self.inclusive = inclusive
        self.is_dir = spec.endswith('/')

    def __str__(self):
        """Return inclusiveness indicator and original glob pattern."""
        return ('+' if self.inclusive else '-') + self.compiled.pattern

    def matches(self, path):
        """Check this pattern against given `path`."""
        return bool(self.compiled.match(path))


[docs]class FileSet(object):
    """ Ant-style file and directory matching.

        Produces an iterator of all of the files that match the provided patterns.
        Note that directory matches must end with a slash, and if they're exclusions,
        they won't be scanned (which prunes anything in that directory that would
        otherwise match).

        Directory specifiers:
            **              matches zero or more directories.
            /               path separator.

        File specifiers:
            *               glob style wildcard.
            [chars]         inclusive character sets.
            [^chars]        exclusive character sets.

        Examples:
            **/*.py         recursively match all python files.
            foo/**/*.py     recursively match all python files in the 'foo' directory.
            *.py            match all the python files in the current directory.
            */*.txt         match all the text files in top-level directories.
            foo/**/*        all files under directory 'foo'.
            */              top-level directories.
            foo/            the directory 'foo' itself.
            **/foo/         any directory named 'foo'.
            **/.*           hidden files.
            **/.*/          hidden directories.
    """

    def __init__(self, root, patterns):
        if isinstance(patterns, string_types):
            patterns = [patterns]

        self.root = root
        self.patterns = [i if hasattr(i, 'inclusive') else includes(i) for i in patterns]

    def __repr__(self):
        return "<FileSet at {0} {1}>".format(repr(self.root), ' '.join(str(i) for i in self. patterns))

[docs]    def included(self, path, is_dir=False):
        """Check patterns in order, last match that includes or excludes `path` wins. Return `None` on undecided."""
        inclusive = None
        for pattern in self.patterns:
            if pattern.is_dir == is_dir and pattern.matches(path):
                inclusive = pattern.inclusive

        #print('+++' if inclusive else '---', path, pattern)
        return inclusive

    def __iter__(self):
        for path in self.walk():
            yield path

    def __or__(self, other):
        return set(self) | set(other)

    def __ror__(self, other):
        return self | other

    def __and__(self, other):
        return set(self) & set(other)

    def __rand__(self, other):
        return self & other

[docs]    def walk(self, **kwargs):
        """ Like `os.walk` and taking the same keyword arguments,
            but generating paths relative to the root.

            Starts in the fileset's root and filters based on its patterns.
            If ``with_root=True`` is passed in, the generated paths include
            the root path.
        """
        lead = ''
        if 'with_root' in kwargs and kwargs.pop('with_root'):
            lead = self.root.rstrip(os.sep) + os.sep

        for base, dirs, files in os.walk(self.root, **kwargs):
            prefix = base[len(self.root):].lstrip(os.sep)
            bits = prefix.split(os.sep) if prefix else []

            for dirname in dirs[:]:
                path = '/'.join(bits + [dirname])
                inclusive = self.included(path, is_dir=True)
                if inclusive:
                    yield lead + path + '/'
                elif inclusive is False:
                    dirs.remove(dirname)

            for filename in files:
                path = '/'.join(bits + [filename])
                if self.included(path):
                    yield lead + path


[docs]def includes(pattern):
    """A single inclusive glob pattern."""
    return Pattern(pattern, inclusive=True)


[docs]def excludes(pattern):
    """A single exclusive glob pattern."""
    return Pattern(pattern, inclusive=False)