Add more structured syntax

2025-12-20 17:22:01 -06:00 · 2025-05-13 16:17:42 -05:00 · 2025-05-13 16:17:42 -05:00 · 66026fe74b
commit 66026fe74b
parent d330f85c67
11 changed files with 18956 additions and 4528 deletions
--- a/test/highlight/data_augmenter.hy
+++ b/test/highlight/data_augmenter.hy
@ -0,0 +1,266 @@
+#!/usr/bin/env hy
+
+(import xml.etree.ElementTree :as ET)
+
+(require hyrule [-> doto meth ncut])
+(import catboost :as cb)
+(import numpy :as np)
+(import pandas :as pd)
+(import rdkit [Chem RDLogger])
+(import tqdm [tqdm])
+
+(import maplight-gnn)
+
+
+(defclass DrugBank []
+  (setv namespaces {"" "http://www.drugbank.ca"})
+
+  (defmacro ap-find [element name if-found]
+    `(do
+       (setv it (.find ~element ~name self.namespaces))
+       (if-let it ~if-found)))
+
+  (defmacro if-let [maybe execute]
+    `(when (is-not ~maybe None)
+       ~execute))
+
+  (meth __init__ [@filename @ids @id-types names]
+    (setv @names (.str.lower names))
+    (setv @get-ids {"ChEBI" @chebi
+                    "ChEMBL" @chembl
+                    "drugbank-id" @drugbank
+                    "InChIKey" @inchikey
+                    "PubChem Compound" @pubchem-compound
+                    "PubChem Substance" @pubchem-substance
+                    "unii" @unii}))
+
+  (meth get-matches []
+    (for [#(_ element) (tqdm (ET.iterparse @filename ["end"]))]
+      ;; don't care about non-drug entries
+      (when (!= (cut element.tag 24 None) "drug")
+        (continue))
+      (setv matches (@check-match element))
+      ;; make sure there are matches before doing more work
+      (when (not (matches.any))
+        (continue))
+      (yield #(matches element))))
+
+  (meth check-match [element]
+    (setv matches (pd.Series False :index @ids.index))
+    (for [#(id-type id-func) (.items @get-ids)]
+      (setv id-val (id-func element))
+      (when (is id-val None) (continue))
+      (setv id-matches (& (= @id-types id-type) (= @ids id-val)))
+      (setv matches (| matches id-matches)))
+    ;; names can't use the same logic as the other id types
+    (setv #(generic-names brand-names) (@all-names element))
+    (setv matches (| matches (@names.isin generic-names)))
+    (setv matches (| matches (@names.isin brand-names)))
+    (return matches))
+
+  (meth all-names [element]
+    (setv generic-names (set))
+    (setv brand-names (set))
+    (setv main-name (@name element))
+    (when (is-not main-name None) (generic-names.add (.lower main-name)))
+    (ap-find element "synonyms"
+      (for [synonym (.iter it)]
+        (when (and (is-not synonym None) (is-not synonym.text None))
+          (generic-names.add (.lower synonym.text)))))
+    (ap-find element "products"
+      (for [product (.iter it)]
+        (setv brand-name (product.find "name" @namespaces))
+        (if-let brand-name (brand-names.add (.lower brand-name.text)))))
+    (setv generic-names (tuple (filter (fn [s] (not-in "\n" s)) generic-names)))
+    (setv brand-names (tuple (filter (fn [s] (not-in "\n" s)) brand-names)))
+    (return #(generic-names brand-names)))
+
+  (meth cas-number [element]
+    (ap-find element "cas-number" it.text))
+
+  (meth chebi [element]
+    (@from-external-identifiers element "ChEBI"))
+
+  (meth chembl [element]
+    (@from-external-identifiers element "ChEMBL"))
+
+  (meth drugbank [element]
+    (ap-find element "drugbank-id" it.text))
+
+  (meth fda-approval [element]
+    (ap-find element "groups" (in "approved" (tuple (it.itertext)))))
+
+  (meth inchikey [element]
+    (@from-calculated-properties element "InChIKey"))
+
+  (meth indication [element]
+    (ap-find element "indication" it.text))
+
+  (meth mechanism [element]
+    (ap-find element "mechanism-of-action" it.text))
+
+  (meth name [element]
+    (ap-find element "name" it.text))
+
+  (meth prices [element]
+    (ap-find element "prices"
+      (do
+        (setv prices (list))
+        (for [price-element (it.iterfind "price" @namespaces)]
+          (setv price (price-element.find "cost" @namespaces))
+          (if-let price (.append prices (+ price.text (price.attrib.get "currency")))))
+        (return prices))))
+
+  (meth pubchem-compound [element]
+    (@from-external-identifiers element "PubChem Compound"))
+
+  (meth pubchem-substance [element]
+    (@from-external-identifiers element "PubChem Substance"))
+
+  (meth smiles [element]
+    (@from-calculated-properties element "SMILES"))
+
+  (meth unii [element]
+    (ap-find element "unii" it.text))
+
+  (meth from-external-identifiers [element resource-type]
+    (ap-find element "external-identifiers"
+      (for [external-identifier (it.iterfind "external-identifier" @namespaces)]
+        (when (= (external-identifier.findtext "resource" :namespaces @namespaces) resource-type)
+          (return (external-identifier.findtext "identifier" :namespaces @namespaces))))))
+
+  (meth from-calculated-properties [element kind-type]
+    (ap-find element "calculated-properties"
+      (for [property (it.iterfind "property" @namespaces)]
+        (when (= (property.findtext "kind" :namespaces @namespaces) kind-type)
+          (return (property.findtext "value" :namespaces @namespaces)))))))
+
+
+(defclass DataAugmenter []
+  (defmacro create-var-column [var-name col-name col-initial-value]
+    `(do
+       (setv ~var-name ~col-name)
+       (setv (get self.drug-list ~var-name) ~col-initial-value)))
+
+  (meth __init__ [@filename]
+    (setv @drug-list None)
+    (setv @admet-models None))
+
+  (meth load-drug-queries []
+    (cond
+      (@filename.endswith ".csv")
+      (with [f (open @filename "r")]
+        (setv @drug-list (pd.read-csv f)))
+      (@filename.endswith ".json")
+      (with [f (open @filename "r")]
+        (setv @drug-list (pd.read-json f :orient "records")))
+      True
+      (raise (ValueError "Data file must be .csv or .json")))
+    (return self))
+
+  (meth load-admet-models [models]
+    (setv @admet-models (dict))
+    (for [#(name path) (models.items)]
+      (setv model (cb.CatBoostClassifier))
+      (model.load-model path)
+      (setv (get @admet-models name) model))
+    (return self))
+
+  (meth save-drug-info [filename]
+    (when (is @drug-list None)
+      (raise (ValueError "drug-list must be loaded first.")))
+    (with [f (open filename "w")]
+      (@drug-list.to-json f :orient "records")))
+
+  (meth match-drugbank [filename id-col-name id-type-col-name name-col-name]
+    (when (is @drug-list None)
+      (raise (ValueError "drug-list is not defined. Call load-drug-queries before match-drugbank.")))
+    ;; make sure the cols are strings and not lists of strings
+    (setv unwrap-list (fn [x] (if (isinstance x list) (get x 0) x)))
+    (setv id-col (.apply (get @drug-list id-col-name) unwrap-list))
+    (setv id-type-col (.apply (get @drug-list id-type-col-name) unwrap-list))
+    (setv name-col (.apply (get @drug-list name-col-name) unwrap-list))
+    ;; tedious column making for what we're about to store
+    ;; variable name, column title, initial value
+    (create-var-column cas-column "CAS Registry Number" None)
+    (create-var-column fda-column "FDA Approved" None)
+    (create-var-column indication-column "Indication" None)
+    (create-var-column mechanism-column "Mechanism" None)
+    (create-var-column name-column "DrugBank Name" None)
+    (create-var-column price-column "Prices" (@drug-list.apply (fn [_] (list)) :axis 1))
+    (create-var-column smiles-column "SMILES" None)
+    (create-var-column unii-column "UNII" None)
+    (setv drugbank (DrugBank filename id-col id-type-col name-col))
+    (for [#(matches element) (drugbank.get-matches)]
+      (setv (ncut @drug-list.loc matches cas-column) (drugbank.cas-number element))
+      (setv (ncut @drug-list.loc matches fda-column) (drugbank.fda-approval element))
+      (setv (ncut @drug-list.loc matches indication-column) (drugbank.indication element))
+      (setv (ncut @drug-list.loc matches mechanism-column) (drugbank.mechanism element))
+      (setv (ncut @drug-list.loc matches name-column) (drugbank.name element))
+      (setv (ncut @drug-list.loc matches price-column)
+        (.apply (ncut @drug-list.loc matches price-column) (fn [_] (drugbank.prices element)))) ; prices is a list
+      (setv (ncut @drug-list.loc matches smiles-column) (drugbank.smiles element))
+      (setv (ncut @drug-list.loc matches unii-column) (drugbank.unii element))))
+
+  (meth deduplicate []
+    (when (is @drug-list None)
+      (raise (ValueError "drug-list is not defined. Call load-drug-queries before deduplicate.")))
+    (when (not-in "DrugBank Name" @drug-list.columns)
+      (raise (ValueError "ID data does not exist yet. Run match-drugbank to create it.")))
+    (setv @drug-list
+      (-> @drug-list
+        (.groupby "DrugBank Name")
+        (.agg
+          (fn [x]
+            (setv y [])
+            (for [item x]
+              (if (isinstance item list)
+                (y.extend item)
+                (y.append item)))
+            (setv z (set y))
+            (z.discard None)
+            (cond
+              (= (len z) 0) None
+              (= (len z) 1) (.pop z)
+              True z)))
+        (.reset-index))))
+
+  (meth predict-admet []
+    (when (is @drug-list None)
+      (raise (ValueError "drug-list is not defined. Call load-drug-queries before predict-admet.")))
+    (when (is @admet-models None)
+      (raise (ValueError "admet-models is not defined. Call load-admet-models before predict-admet.")))
+    (when (not-in "SMILES" @drug-list.columns)
+      (raise (ValueError "SMILES data does not exist yet. Run match-drugbank to create it.")))
+    (RDLogger.DisableLog "rdApp.*")
+    (setv smiles-mask (.notna (get @drug-list "SMILES")))
+    (setv smiles (ncut @drug-list.loc smiles-mask "SMILES"))
+    (setv molecules (smiles.apply Chem.MolFromSmiles))
+    (setv molecules-mask (.notna molecules))
+    (setv fingerprints (@get-fingerprints (get molecules molecules-mask)))
+    (setv combined-mask (pd.Series False :index @drug-list.index))
+    (setv (ncut combined-mask.loc (. (get smiles molecules-mask) index)) True)
+    (for [#(name model) (@admet-models.items)]
+      (setv predictions (model.predict-proba fingerprints))
+      (setv (ncut @drug-list.loc combined-mask name) (ncut predictions : 1))))
+
+  (meth get-fingerprints [molecules]
+    (setv fingerprints (list))
+    (fingerprints.append (maplight-gnn.get-morgan-fingerprints molecules))
+    (fingerprints.append (maplight-gnn.get-avalon-fingerprints molecules))
+    (fingerprints.append (maplight-gnn.get-erg-fingerprints molecules))
+    (fingerprints.append (maplight-gnn.get-rdkit-features molecules))
+    (fingerprints.append (maplight-gnn.get-gin-supervised-masking molecules))
+    (np.concatenate fingerprints :axis 1)))
+
+
+(when (= __name__ "__main__")
+  (setv augmenter
+    (-> (DataAugmenter "data/translator_drugs.json")
+      (.load-drug-queries)
+      (.load-admet-models {"Blood Brain Barrier" "data/admet/bbb_martins-0.916-0.002.dump" "Bioavailability" "data/admet/bioavailability_ma-0.74-0.01.dump" "Human Intestinal Absorption" "data/admet/hia_hou-0.989-0.001.dump"})))
+  (doto augmenter
+    (.match-drugbank "data/src/drugbank.xml" "result_id" "id_type" "result_name")
+    (.deduplicate)
+    (.predict-admet)
+    (.save-drug-info "data/translator_drug_list.json")))
--- a/test/highlight/data_augmenter.py
+++ b/test/highlight/data_augmenter.py
@ -0,0 +1,347 @@
+import hy
+import xml.etree.ElementTree as ET
+hy.macros.require('hyrule', None, target_module_name='data_augmenter', assignments=[['->', '->'], ['doto', 'doto'], ['meth', 'meth'], ['ncut', 'ncut']], prefix='')
+import catboost as cb
+import numpy as np
+import pandas as pd
+from rdkit import Chem, RDLogger
+from tqdm import tqdm
+import maplight_gnn
+
+class DrugBank:
+    namespaces = {'': 'http://www.drugbank.ca'}
+    _hy_local_macro__ap_find = lambda element, name, if_found: hy.models.Expression([hy.models.Symbol('do', from_parser=True), hy.models.Expression([hy.models.Symbol('setv', from_parser=True), hy.models.Symbol('it', from_parser=True), hy.models.Expression([hy.models.Expression([hy.models.Symbol('.', from_parser=True), hy.models.Symbol('None', from_parser=True), hy.models.Symbol('find', from_parser=True)]), element, name, hy.models.Expression([hy.models.Symbol('.', from_parser=True), hy.models.Symbol('self', from_parser=True), hy.models.Symbol('namespaces', from_parser=True)])])]), hy.models.Expression([hy.models.Symbol('if-let', from_parser=True), hy.models.Symbol('it', from_parser=True), if_found])])
+    _hy_local_macro__if_let = lambda maybe, execute: hy.models.Expression([hy.models.Symbol('when', from_parser=True), hy.models.Expression([hy.models.Symbol('is-not', from_parser=True), maybe, hy.models.Symbol('None', from_parser=True)]), execute])
+
+    def __init__(self, filename, ids, id_types, names):
+        None
+        self.filename = filename
+        self.ids = ids
+        self.id_types = id_types
+        self.names = names.str.lower()
+        self.get_ids = {'ChEBI': self.chebi, 'ChEMBL': self.chembl, 'drugbank-id': self.drugbank, 'InChIKey': self.inchikey, 'PubChem Compound': self.pubchem_compound, 'PubChem Substance': self.pubchem_substance, 'unii': self.unii}
+
+    def get_matches(self):
+        None
+        for (_, element) in tqdm(ET.iterparse(self.filename, ['end'])):
+            if element.tag[24:None:None] != 'drug':
+                continue
+                _hy_anon_var_1 = None
+            else:
+                _hy_anon_var_1 = None
+            matches = self.check_match(element)
+            if not matches.any():
+                continue
+                _hy_anon_var_2 = None
+            else:
+                _hy_anon_var_2 = None
+            yield (matches, element)
+
+    def check_match(self, element):
+        None
+        matches = pd.Series(False, index=self.ids.index)
+        for (id_type, id_func) in self.get_ids.items():
+            id_val = id_func(element)
+            if id_val is None:
+                continue
+                _hy_anon_var_3 = None
+            else:
+                _hy_anon_var_3 = None
+            id_matches = (self.id_types == id_type) & (self.ids == id_val)
+            matches = matches | id_matches
+        (generic_names, brand_names) = self.all_names(element)
+        matches = matches | self.names.isin(generic_names)
+        matches = matches | self.names.isin(brand_names)
+        return matches
+
+    def all_names(self, element):
+        None
+        generic_names = set()
+        brand_names = set()
+        main_name = self.name(element)
+        generic_names.add(main_name.lower()) if main_name is not None else None
+        it = element.find('synonyms', self.namespaces)
+        if it is not None:
+            for synonym in it.iter():
+                generic_names.add(synonym.text.lower()) if synonym is not None and synonym.text is not None else None
+            _hy_anon_var_4 = None
+        else:
+            _hy_anon_var_4 = None
+        it = element.find('products', self.namespaces)
+        if it is not None:
+            for product in it.iter():
+                brand_name = product.find('name', self.namespaces)
+                brand_names.add(brand_name.text.lower()) if brand_name is not None else None
+            _hy_anon_var_5 = None
+        else:
+            _hy_anon_var_5 = None
+        generic_names = tuple(filter(lambda s: '\n' not in s, generic_names))
+        brand_names = tuple(filter(lambda s: '\n' not in s, brand_names))
+        return (generic_names, brand_names)
+
+    def cas_number(self, element):
+        None
+        it = element.find('cas-number', self.namespaces)
+        return it.text if it is not None else None
+
+    def chebi(self, element):
+        None
+        return self.from_external_identifiers(element, 'ChEBI')
+
+    def chembl(self, element):
+        None
+        return self.from_external_identifiers(element, 'ChEMBL')
+
+    def drugbank(self, element):
+        None
+        it = element.find('drugbank-id', self.namespaces)
+        return it.text if it is not None else None
+
+    def fda_approval(self, element):
+        None
+        it = element.find('groups', self.namespaces)
+        return 'approved' in tuple(it.itertext()) if it is not None else None
+
+    def inchikey(self, element):
+        None
+        return self.from_calculated_properties(element, 'InChIKey')
+
+    def indication(self, element):
+        None
+        it = element.find('indication', self.namespaces)
+        return it.text if it is not None else None
+
+    def mechanism(self, element):
+        None
+        it = element.find('mechanism-of-action', self.namespaces)
+        return it.text if it is not None else None
+
+    def name(self, element):
+        None
+        it = element.find('name', self.namespaces)
+        return it.text if it is not None else None
+
+    def prices(self, element):
+        None
+        it = element.find('prices', self.namespaces)
+        if it is not None:
+            prices = list()
+            for price_element in it.iterfind('price', self.namespaces):
+                price = price_element.find('cost', self.namespaces)
+                prices.append(price.text + price.attrib.get('currency')) if price is not None else None
+            return prices
+            _hy_anon_var_6 = None
+        else:
+            _hy_anon_var_6 = None
+        return _hy_anon_var_6
+
+    def pubchem_compound(self, element):
+        None
+        return self.from_external_identifiers(element, 'PubChem Compound')
+
+    def pubchem_substance(self, element):
+        None
+        return self.from_external_identifiers(element, 'PubChem Substance')
+
+    def smiles(self, element):
+        None
+        return self.from_calculated_properties(element, 'SMILES')
+
+    def unii(self, element):
+        None
+        it = element.find('unii', self.namespaces)
+        return it.text if it is not None else None
+
+    def from_external_identifiers(self, element, resource_type):
+        None
+        it = element.find('external-identifiers', self.namespaces)
+        if it is not None:
+            for external_identifier in it.iterfind('external-identifier', self.namespaces):
+                if external_identifier.findtext('resource', namespaces=self.namespaces) == resource_type:
+                    return external_identifier.findtext('identifier', namespaces=self.namespaces)
+                    _hy_anon_var_7 = None
+                else:
+                    _hy_anon_var_7 = None
+            _hy_anon_var_8 = None
+        else:
+            _hy_anon_var_8 = None
+        return _hy_anon_var_8
+
+    def from_calculated_properties(self, element, kind_type):
+        None
+        it = element.find('calculated-properties', self.namespaces)
+        if it is not None:
+            for property in it.iterfind('property', self.namespaces):
+                if property.findtext('kind', namespaces=self.namespaces) == kind_type:
+                    return property.findtext('value', namespaces=self.namespaces)
+                    _hy_anon_var_9 = None
+                else:
+                    _hy_anon_var_9 = None
+            _hy_anon_var_10 = None
+        else:
+            _hy_anon_var_10 = None
+        return _hy_anon_var_10
+
+class DataAugmenter:
+    _hy_local_macro__create_var_column = lambda var_name, col_name, col_initial_value: hy.models.Expression([hy.models.Symbol('do', from_parser=True), hy.models.Expression([hy.models.Symbol('setv', from_parser=True), var_name, col_name]), hy.models.Expression([hy.models.Symbol('setv', from_parser=True), hy.models.Expression([hy.models.Symbol('get', from_parser=True), hy.models.Expression([hy.models.Symbol('.', from_parser=True), hy.models.Symbol('self', from_parser=True), hy.models.Symbol('drug-list', from_parser=True)]), var_name]), col_initial_value])])
+
+    def __init__(self, filename):
+        None
+        self.filename = filename
+        self.drug_list = None
+        self.admet_models = None
+
+    def load_drug_queries(self):
+        None
+        if self.filename.endswith('.csv'):
+            _hy_anon_var_11 = None
+            with open(self.filename, 'r') as f:
+                self.drug_list = pd.read_csv(f)
+                _hy_anon_var_11 = None
+            _hy_anon_var_15 = _hy_anon_var_11
+        else:
+            if self.filename.endswith('.json'):
+                _hy_anon_var_12 = None
+                with open(self.filename, 'r') as f:
+                    self.drug_list = pd.read_json(f, orient='records')
+                    _hy_anon_var_12 = None
+                _hy_anon_var_14 = _hy_anon_var_12
+            else:
+                if True:
+                    raise ValueError('Data file must be .csv or .json')
+                    _hy_anon_var_13 = None
+                else:
+                    _hy_anon_var_13 = None
+                _hy_anon_var_14 = _hy_anon_var_13
+            _hy_anon_var_15 = _hy_anon_var_14
+        return self
+
+    def load_admet_models(self, models):
+        None
+        self.admet_models = dict()
+        for (name, path) in models.items():
+            model = cb.CatBoostClassifier()
+            model.load_model(path)
+            self.admet_models[name] = model
+        return self
+
+    def save_drug_info(self, filename):
+        None
+        if self.drug_list is None:
+            raise ValueError('drug-list must be loaded first.')
+            _hy_anon_var_16 = None
+        else:
+            _hy_anon_var_16 = None
+        _hy_anon_var_17 = None
+        with open(filename, 'w') as f:
+            _hy_anon_var_17 = self.drug_list.to_json(f, orient='records')
+        return _hy_anon_var_17
+
+    def match_drugbank(self, filename, id_col_name, id_type_col_name, name_col_name):
+        None
+        if self.drug_list is None:
+            raise ValueError('drug-list is not defined. Call load-drug-queries before match-drugbank.')
+            _hy_anon_var_18 = None
+        else:
+            _hy_anon_var_18 = None
+        unwrap_list = lambda x: x[0] if isinstance(x, list) else x
+        id_col = self.drug_list[id_col_name].apply(unwrap_list)
+        id_type_col = self.drug_list[id_type_col_name].apply(unwrap_list)
+        name_col = self.drug_list[name_col_name].apply(unwrap_list)
+        cas_column = 'CAS Registry Number'
+        self.drug_list[cas_column] = None
+        fda_column = 'FDA Approved'
+        self.drug_list[fda_column] = None
+        indication_column = 'Indication'
+        self.drug_list[indication_column] = None
+        mechanism_column = 'Mechanism'
+        self.drug_list[mechanism_column] = None
+        name_column = 'DrugBank Name'
+        self.drug_list[name_column] = None
+        price_column = 'Prices'
+        self.drug_list[price_column] = self.drug_list.apply(lambda _: list(), axis=1)
+        smiles_column = 'SMILES'
+        self.drug_list[smiles_column] = None
+        unii_column = 'UNII'
+        self.drug_list[unii_column] = None
+        drugbank = DrugBank(filename, id_col, id_type_col, name_col)
+        for (matches, element) in drugbank.get_matches():
+            self.drug_list.loc[matches, cas_column] = drugbank.cas_number(element)
+            self.drug_list.loc[matches, fda_column] = drugbank.fda_approval(element)
+            self.drug_list.loc[matches, indication_column] = drugbank.indication(element)
+            self.drug_list.loc[matches, mechanism_column] = drugbank.mechanism(element)
+            self.drug_list.loc[matches, name_column] = drugbank.name(element)
+            self.drug_list.loc[matches, price_column] = self.drug_list.loc[matches, price_column].apply(lambda _: drugbank.prices(element))
+            self.drug_list.loc[matches, smiles_column] = drugbank.smiles(element)
+            self.drug_list.loc[matches, unii_column] = drugbank.unii(element)
+
+    def deduplicate(self):
+        None
+        if self.drug_list is None:
+            raise ValueError('drug-list is not defined. Call load-drug-queries before deduplicate.')
+            _hy_anon_var_19 = None
+        else:
+            _hy_anon_var_19 = None
+        if 'DrugBank Name' not in self.drug_list.columns:
+            raise ValueError('ID data does not exist yet. Run match-drugbank to create it.')
+            _hy_anon_var_20 = None
+        else:
+            _hy_anon_var_20 = None
+
+        def _hy_anon_var_21(x):
+            y = []
+            for item in x:
+                y.extend(item) if isinstance(item, list) else y.append(item)
+            z = set(y)
+            z.discard(None)
+            return None if len(z) == 0 else z.pop() if len(z) == 1 else z if True else None
+        self.drug_list = self.drug_list.groupby('DrugBank Name').agg(_hy_anon_var_21).reset_index()
+
+    def predict_admet(self):
+        None
+        if self.drug_list is None:
+            raise ValueError('drug-list is not defined. Call load-drug-queries before predict-admet.')
+            _hy_anon_var_22 = None
+        else:
+            _hy_anon_var_22 = None
+        if self.admet_models is None:
+            raise ValueError('admet-models is not defined. Call load-admet-models before predict-admet.')
+            _hy_anon_var_23 = None
+        else:
+            _hy_anon_var_23 = None
+        if 'SMILES' not in self.drug_list.columns:
+            raise ValueError('SMILES data does not exist yet. Run match-drugbank to create it.')
+            _hy_anon_var_24 = None
+        else:
+            _hy_anon_var_24 = None
+        RDLogger.DisableLog('rdApp.*')
+        smiles_mask = self.drug_list['SMILES'].notna()
+        smiles = self.drug_list.loc[smiles_mask, 'SMILES']
+        molecules = smiles.apply(Chem.MolFromSmiles)
+        molecules_mask = molecules.notna()
+        fingerprints = self.get_fingerprints(molecules[molecules_mask])
+        combined_mask = pd.Series(False, index=self.drug_list.index)
+        combined_mask.loc[smiles[molecules_mask].index] = True
+        for (name, model) in self.admet_models.items():
+            predictions = model.predict_proba(fingerprints)
+            self.drug_list.loc[combined_mask, name] = predictions[slice(None, None), 1]
+
+    def get_fingerprints(self, molecules):
+        None
+        fingerprints = list()
+        fingerprints.append(maplight_gnn.get_morgan_fingerprints(molecules))
+        fingerprints.append(maplight_gnn.get_avalon_fingerprints(molecules))
+        fingerprints.append(maplight_gnn.get_erg_fingerprints(molecules))
+        fingerprints.append(maplight_gnn.get_rdkit_features(molecules))
+        fingerprints.append(maplight_gnn.get_gin_supervised_masking(molecules))
+        return np.concatenate(fingerprints, axis=1)
+if __name__ == '__main__':
+    augmenter = DataAugmenter('data/translator_drugs.json').load_drug_queries().load_admet_models({'Blood Brain Barrier': 'data/admet/bbb_martins-0.916-0.002.dump', 'Bioavailability': 'data/admet/bioavailability_ma-0.74-0.01.dump', 'Human Intestinal Absorption': 'data/admet/hia_hou-0.989-0.001.dump'})
+    _hy_gensym_f_1 = augmenter
+    _hy_gensym_f_1.match_drugbank('data/src/drugbank.xml', 'result_id', 'id_type', 'result_name')
+    _hy_gensym_f_1.deduplicate()
+    _hy_gensym_f_1.predict_admet()
+    _hy_gensym_f_1.save_drug_info('data/translator_drug_list.json')
+    _hy_anon_var_25 = _hy_gensym_f_1
+else:
+    _hy_anon_var_25 = None
--- a/test/highlight/highlight_examples.hy
+++ b/test/highlight/highlight_examples.hy
@ -0,0 +1,30 @@
+(setv foobar (+ 2 2))
+(setv [tim eric] ["jim" "derrick"])
+(setv  alpha "a"  beta "b")
+
+(sorted "abcBC"
+  :key (fn [x] (.lower x)))
+
+(defn test [a b [c "x"] #* d]
+  [a b c d])
+
+(with [o (open "file.txt" "rt")]
+  (setv buffer [])
+  (while (< (len buffer) 10)
+    (.append buffer (next o))))
+
+(lfor
+  x (range 3)
+  y (range 3)
+  :if (= (+ x y) 3)
+  (* x y))
+
+(defmacro do-while [test #* body]
+  `(do
+    ~@body
+    (while ~test
+      ~@body)))
+
+(setv x 0)
+(do-while x
+  (print "Printed once."))