Source code for ocdskit.hierarchy

from collections import Counter
from itertools import chain

from concepts import Context

from ocdskit.util import _dedupe_with_counter, _get_prop_name, _split_camel_case, longest_common_subsequence


[docs] def get_base_class_name(class_names, prefix=""): """ Derive a base class name from the longest common subsequence of words within class names. :param list[str] class_names: a list of class names :param str prefix: a prefix for the base class name :returns: a base class name :rtype: str or None """ if len(class_names) < 2: return None sequences = [_split_camel_case(name) for name in class_names] lcs = sequences[0] for sequence in sequences[1:]: lcs = longest_common_subsequence(lcs, sequence) if not lcs: return None seen = set() unique = [] for word in lcs: if word not in seen: seen.add(word) unique.append(word) return prefix + "".join(unique)
# https://en.wikipedia.org/wiki/Formal_concept_analysis
[docs] def get_base_classes_via_fca(classes, min_intent=2, min_extent=2, max_field_prevalence=1.0, base_class_name_prefix=""): """ Identify base classes using `Formal Concept Analysis <https://en.wikipedia.org/wiki/Formal_concept_analysis>`__. Builds a concept lattice from the property sets of each class. Concepts are filtered to those with at least ``min_extent`` member classes and ``min_intent`` non-inherited, non-common properties. Properties found in more than ``max_field_prevalence`` of classes are considered common and ignored for the ``min_intent`` threshold. :param dict classes: mapping of definition names to sets of ``{prop}:{hash}`` strings :param int min_intent: minimum number of non-inherited, non-common properties for a base class :param int min_extent: minimum number of member classes for a base class :param float max_field_prevalence: fields found in more than this proportion of classes are considered common :param str base_class_name_prefix: a prefix to disambiguate base class names from existing class names :returns: a list of dicts with ``name``, ``members``, and ``props`` keys :rtype: list[dict] """ # Sort the properties to achieve deterministic behavior. properties = sorted(set().union(*classes.values())) bools = [tuple(prop in classes[name] for prop in properties) for name in classes] context = Context(classes, properties, bools) # Determine the common fields. n = len(classes) counts = Counter(chain.from_iterable(classes.values())) common_properties = {prop for prop, count in counts.items() if count / n > max_field_prevalence} # Iterate general-to-specific (reversed lattice) so the most general concept claims the base name, # and more specific concepts get a minimal()-based suffix for disambiguation. names = set(classes) base_classes = [] for concept in reversed(context.lattice): # `intent` is a tuple of the shared properties. intent = set(concept.intent) # `extent` is a tuple of the classes with the shared properties. extent = list(concept.extent) # Base classes must have at least `min_extent` specialized classes. if len(extent) < min_extent: continue # `upper_neighbors` are the concept's parents in the lattice (with strictly fewer `intent` properties). if concept.upper_neighbors: best_parent_properties = set(max(concept.upper_neighbors, key=lambda c: len(c.intent)).intent) inherited_properties = set().union(*(set(p.intent) for p in concept.upper_neighbors)) else: best_parent_properties = set() inherited_properties = set() # Base classes must have at least `min_intent` non-common fields more than the best parent. if len(intent - common_properties - best_parent_properties) < min_intent: continue # Base classes must have at least one field not covered by any parent. if intent <= inherited_properties: continue name = get_base_class_name(extent, prefix=base_class_name_prefix) if name is None or name in names: suffix = "".join(word for prop in concept.minimal() for word in _split_camel_case(_get_prop_name(prop))) name = _dedupe_with_counter(f"{name or base_class_name_prefix}{suffix or 'Base'}", names) names.add(name) base_classes.append({"name": name, "members": extent, "props": intent}) return base_classes