#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File : matching.py
# Author : Jiayuan Mao
# Email : maojiayuan@gmail.com
# Date : 03/02/2017
#
# This file is part of Jacinle.
# Distributed under terms of the MIT license.
"""Functions to match names using glob patterns."""
import fnmatch
import re
import collections
from typing import Any, Optional, Union, Iterable, Tuple, List, Dict
__all__ = ['NameMatcher', 'IENameMatcher']
[docs]
class NameMatcher(object):
"""A name matcher based on a set of glob patterns.
The rule set is a list of (pattern, value) pairs. The pattern is a glob pattern, and the value is the value to be returned when the pattern matches.
Example:
.. code-block:: python
matcher = NameMatcher({'*.jpg': 'image', '*.png': 'image', '*.txt': 'text'})
with matcher:
matcher.match('a.jpg') # 'image'
matcher.match('a.png') # 'image'
matched, unused = matcher.get_last_stat() # Return a tuple of (matched values, unmatched patterns).
print(matched) # [('a.jpg', '*.jpg', 'image'), ('a.png', '*.png', 'image')]
print(unused) # {'*.txt'}
"""
[docs]
def __init__(self, rules: Optional[Union[Iterable[Tuple[str, Any]], Dict[str, Any]]] = None):
"""Initialize the name matcher.
Args:
rules: A list of (pattern, value) pairs, or a dict of {pattern: value}.
"""
if rules is None:
self._rules = []
elif isinstance(rules, dict):
self._rules = list(rules.items())
else:
assert isinstance(rules, collections.Iterable)
self._rules = list(rules)
self._map = {}
self._compiled_rules = []
self._compiled = False
self._matched = []
self._unused = set()
self._last_stat = None
@property
def rules(self) -> List[Tuple[str, Any]]:
"""Get the rules."""
return self._rules
[docs]
def map(self) -> Dict[str, Any]:
"""Get the map of {pattern: value}."""
assert self._compiled
return self._map
[docs]
def append_rule(self, rule: Tuple[str, Any]):
"""Append a rule to the rule set. The rule is a (pattern, value) pair.
Args:
rule: the rule to be appended.
"""
self._rules.append(tuple(rule))
[docs]
def insert_rule(self, index: int, rule: Tuple[str, Any]):
"""Insert a rule to the rule set at a given position (priority). The rule is a (pattern, value) pair."""
self._rules.insert(index, rule)
[docs]
def pop_rule(self, index=None):
"""Pop a rule from the rule set.
Args:
index: the index of the rule to be popped. If None, the last rule will be popped.
"""
self._rules.pop(index)
[docs]
def begin(self, *, force_compile=False):
"""Begin a matching session."""
if not self._compiled or force_compile:
self.compile()
self._matched = []
self._unused = set(range(len(self._compiled_rules)))
[docs]
def end(self):
"""End a matching session, which returns a tuple of (matched values, unmatched patterns). See the docstring of :class:`NameMatcher` for more details."""
return self._matched, {self._compiled_rules[i][0] for i in self._unused}
[docs]
def match(self, k: str) -> Optional[Any]:
"""Match a name against the rule set. Return the value if matched, otherwise return None.
Args:
k: the name to be matched.
Returns:
The value if matched, otherwise None.
"""
for i, (r, p, v) in enumerate(self._compiled_rules):
if p.match(k):
if i in self._unused:
self._unused.remove(i)
self._matched.append((k, r, v))
return v
return None
[docs]
def compile(self):
"""Compile the rule set."""
self._map = dict()
self._compiled_rules = []
for r, v in self._rules:
self._map[r] = v
p = fnmatch.translate(r)
p = re.compile(p, flags=re.IGNORECASE)
self._compiled_rules.append((r, p, v))
self._compiled = True
def __enter__(self):
self.begin()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self._last_stat = self.end()
[docs]
def get_last_stat(self):
"""Get the last matching session's result."""
return self._last_stat
[docs]
class IENameMatcher(object):
"""A name matcher based on two sets of glob patterns: one for inclusion and one for exclusion.
- When ``include`` is None, ``exclude`` is not None, the matcher will match all names that are not excluded.
- When ``include`` is not None, ``exclude`` is None, the matcher will match all names that are included.
- When ``include`` is not None, ``exclude`` is not None, the matcher will match all names that are included and not excluded.
The ``exclude`` rule set has higher priority than the ``include`` rule set.
Example:
.. code-block:: python
matcher = IENameMatcher(include=['*.jpg', '*.png'], exclude=['*.bak.png'])
with matcher:
matcher.match('a.jpg') # True
matcher.match('a.png') # True
matcher.match('a.bak.png') # False
matcher.match('a.txt') # False
matcher.match('a.bak.txt') # False
stat_type, things = matcher.get_last_stat()
print(stat_type) # 'exclude'
# Everything that has been rejected.
print(things) # ['a.bak.png', 'a.txt', 'a.bak.txt']
"""
[docs]
def __init__(self, include: Optional[Iterable[str]], exclude: Optional[Iterable[str]]):
"""Initialize the name matcher.
Args:
include: a list of glob patterns for inclusion.
exclude: a list of glob patterns for exclusion.
"""
if include is None:
self.include = None
else:
self.include = NameMatcher([(i, True) for i in include])
if exclude is None:
self.exclude = None
else:
self.exclude = NameMatcher([(e, True) for e in exclude])
self._last_stat = None
[docs]
def begin(self):
"""Begin a matching session."""
if self.include is not None:
self.include.begin()
if self.exclude is not None:
self.exclude.begin()
self._last_stat = (set(), set())
[docs]
def end(self):
"""End a matching session, which returns a tuple of ``(stat_type, things)`` See the docstring of :class:`IENameMatcher`."""
if self.include is not None:
self.include.end()
if self.exclude is not None:
self.exclude.end()
if len(self._last_stat[0]) < len(self._last_stat[1]):
self._last_stat = ('included', self._last_stat[0])
else:
self._last_stat = ('excluded', self._last_stat[1])
[docs]
def match(self, k: str) -> bool:
"""Match a name against the rule set. Return True if matched, otherwise return False."""
if self.include is None:
ret = True
else:
ret = bool(self.include.match(k))
if self.exclude is not None:
ret = ret and not bool(self.exclude.match(k))
if ret:
self._last_stat[0].add(k)
else:
self._last_stat[1].add(k)
return ret
def __enter__(self):
self.begin()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.end()
[docs]
def get_last_stat(self):
"""Get the last matching session's result."""
return self._last_stat