Source code for frex.utils.class_generator

import owlready2.class_construct
from owlready2 import *
from pathlib import Path
from typing import List, Tuple, Any
import argparse
import json
from rdflib import URIRef


BASIC_CLASS_TEMPLATE = """from dataclasses import dataclass
from dataclasses_json import dataclass_json
from rdflib import URIRef
from frex.models import DomainObject


@dataclass_json
@dataclass(frozen=True)
"""


[docs]class ClassGenerator: """ The ClassGenerator utility is used to generate python dataclasses based on an ontology's data models. These generated dataclasses should be suitable for use with the DomainKgQueryService, as its basic query implementation relies on some properties that are automatically included in classes generated by this tool. Dataclasses generated by this utility have some type hints, but the type hints are not extremely detailed. In particular, in cases where properties are known to point to a certain data type that is another domain object, this utility will simply add a type hint of "URIRef". This implementation is partially based on the fact that some restrictions on property ranges in owl are difficult to parse in a meaningful way, and partially based on the fact that we can't necessarily guarantee that a user would want to fully parse through the URIs that a given property points to. Trying to re-query and convert results to objects might also result in cycles if URIs point to each other for certain properties, to simply stopping at a point that a property refers to a URI simplifies the process. """ def __init__(self, *, onto_file: str, save_dir: Path): self.onto = get_ontology(f"file://{onto_file}").load() self.save_dir = save_dir
[docs] def to_snake_case(self, name: str) -> str: name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) name = re.sub("__([A-Z])", r"_\1", name) name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name) return name.lower()
[docs] def generate_classes(self): """ Generate python dataclasses based on classes present in an ontology. """ class_to_file = {} file_deps = {} init_order = [] for c in self.onto.classes(): if isinstance(c, owlready2.ThingClass): python_rep_string, super_cl = self.convert_to_py_class(c) file_name = self.to_snake_case(c.name) class_to_file[str(c.name)] = file_name file_deps[c.name] = super_cl with open((self.save_dir / f"{file_name}.py").resolve(), "w") as f: f.write(python_rep_string) # go through the dependencies of classes to make sure that the init file has them imported in # an order that wont cause a circular import error while True: found_change = False if len(init_order) == len(class_to_file): break for class_name, class_deps in file_deps.items(): if class_name in init_order: continue if set(class_deps).issubset(init_order): init_order.append(class_name) found_change = True if not found_change: print( "WARINING: a circular import probably exists in your object models. " "Offending classes, and their subclasses, will not be included in the init file." ) break init_file_contents = "" for class_name in init_order: init_file_contents += ( f"from .{class_to_file[class_name]} import {class_name}\n" ) with open((self.save_dir / "__init__.py").resolve(), "w") as f: f.write(init_file_contents)
[docs] def get_superclass_names(self, c: owlready2.ThingClass) -> List[str]: """ Identify all superclasses of a given owl class, and if those classes are present in the main ontology, return a list their names. :param c: The target owl class to get superclasses for :return: a list of class names that are valid superclasses of the target class """ superclasses = [] for subcl in c.is_a: if isinstance(subcl, owlready2.ThingClass) and subcl in self.onto.classes(): superclasses.append(subcl) return [str(sc.name) for sc in superclasses]
[docs] def add_restriction( self, *, p: owlready2.class_construct.Restriction, properties: List ): """ Add properties based on owl restrictions. Restrictions should correspond to class restrictions in owl, such as requiring some property be filled for a class to be valid. :param p: The target class restriction to parse into a property for code generation :param properties: The ongoing list of properties for the current class that is being updated """ if isinstance(p.property, str): # if a namespace isn't properly specified, the property is just a string instead of having a namespace. prop_name = p.property.split("/")[-1] prop_iri = str(p.property) else: prop_name = p.property.name prop_iri = str(p.property.iri) # TODO: currently only supporting str, int, and float data types for specific type hints if p.value == str: prop_type = "str" elif p.value == int: prop_type = "int" elif p.value == float: prop_type = "float" else: prop_type = "URIRef" # convert the property name to snake case, to follow PEP guidelines prop_name = self.to_snake_case(prop_name) properties.insert(0, (prop_name, prop_type, prop_iri))
[docs] def get_inner_restrictions(self, *, p: owlready2.class_construct, properties: List): """ Parse restrictions that are nested within a class construct. This should be getting called when a restriction that is a logical construct (AND and OR types) ocurs. :param p: The target class construct to parse into a property for code generation :param properties: The ongoing list of properties for the current class that is being updated """ for v in p.Classes: if isinstance(v, owlready2.class_construct.Restriction): self.add_restriction(p=v, properties=properties) else: self.get_inner_restrictions(p=v, properties=properties)
[docs] def get_property_names_and_types( self, c: owlready2.ThingClass ) -> List[Tuple[str, Any, str]]: """ For the target owl class, extract the property names and types that the class should have. These properties are based on owl restrictions that define the class. :param c: The target class to extract property names for :return: A list of tuples, ordered as (prop_name, prop_type, prop_iri). """ properties = [] for p in c.is_a + c.equivalent_to: if isinstance(p, owlready2.class_construct.Restriction): self.add_restriction(p=p, properties=properties) elif isinstance(p, owlready2.class_construct.LogicalClassConstruct): # AND and OR -type class constructions self.get_inner_restrictions(p=p, properties=properties) return properties
[docs] def populate_template( self, *, name: str, superclasses: List[str], properties: List[Tuple[str, Any, str]], ) -> str: """ Populate a template for producing generated python dataclasses. The current template is based on implementations in python 3.8 - in future versions, some minor details (like keyword-only dataclasses) might be introduced, which may call for change. For the moment, templates are populated to assume that none of the dataclass's properties have default values, and instead we will assume that the querying service will properly handle adding default values in cases where the appropriate properties weren't returned as part of a SPARQL query. :param name: The name of the class to be generating :param superclasses: A list of superclass names that the generated class should inherit from. All superclasses in this list are expected to also be generated by this same code generation script. :param properties: A list of tuples, ordered as (prop_name, prop_type, prop_iri) corresponding to the properties that this dataclass should include. :return: A string, corresponding to the content of the new python dataclass that will be written to a file """ write_string = "" if superclasses: import_str = ", ".join(superclasses) write_string = f"from {str(self.save_dir.name).replace('/', '.')} import {import_str}\n" write_string += BASIC_CLASS_TEMPLATE write_string += f"class {name}(" for sc in superclasses: write_string += f"{sc}, " write_string += "DomainObject):" property_to_uri_dict = {} # default to using 4 spaces for indentation # using default values (e.g. = None) for properties can cause significant issues when # handling inheritance, since if any super class has a default value, all subsequent # properties in subclasses must all have default values. # the current compromise for this situation is to just make all classes have no default values. property_lines = [] for (p, t, iri) in properties: p_str = f"\n {p}: {t}" if p_str not in property_lines: property_lines.append(p_str) property_to_uri_dict[str(p)] = iri for l in property_lines: write_string += l write_string += "\n\n" write_string += " prop_to_uri = {\n" for k, v in property_to_uri_dict.items(): write_string += f" URIRef(\"{v}\"): '{k}',\n" write_string += " }\n" for sup_cls in superclasses: write_string += f" prop_to_uri.update({sup_cls}.prop_to_uri)\n" return write_string
[docs] def convert_to_py_class(self, c: owlready2.ThingClass) -> Tuple[str, List[str]]: """ Produce a string to generate a python dataclass corresponding to the input owl class. :param c: The target owl class to generate code for :return: A tuple, containing the string that will be output to a file for the class and a list of superclasses that the generated class will inherit from. The superclasses are necessary to ensure that import ordering is correct and circular import errors aren't caused down the line. """ superclasses = self.get_superclass_names(c) properties = self.get_property_names_and_types(c) write_string = self.populate_template( name=str(c.name), superclasses=superclasses, properties=properties ) return write_string, superclasses
if __name__ == "__main__": parser = argparse.ArgumentParser( description="Specify an ontology, either from the web or stored locally, " "to use to generate Python classes." ) parser.add_argument( "--onto", type=str, help="The URL or local directory for the target ontology" ) parser.add_argument( "--local", dest="onto_local", action="store_true", help="Specify that the onto string refers to a local directory (default true)", ) parser.add_argument( "--remote", dest="onto_local", action="store_false", help="Specify that the onto string refers to a URL.", ) parser.set_defaults(onto_local=True) parser.add_argument( "--save_dir", type=str, help="The directory to save the generated Python classes", ) args = parser.parse_args() onto_file = args.onto if args.onto_local: onto_file = str(Path(onto_file).resolve()) Path(args.save_dir).mkdir(parents=True, exist_ok=True) save_dir = Path(args.save_dir).resolve() cg = ClassGenerator(onto_file=onto_file, save_dir=save_dir) cg.generate_classes()