added deepsad base code

2024-06-28 07:42:12 +02:00
parent 2eb1bf2e05
commit 914bb020d0
57 changed files with 4974 additions and 0 deletions
--- a/Deep-SAD-PyTorch/src/baselines/SemiDGM.py
+++ b/Deep-SAD-PyTorch/src/baselines/SemiDGM.py
@@ -0,0 +1,128 @@
+import json
+import torch
+
+from base.base_dataset import BaseADDataset
+from networks.main import build_network, build_autoencoder
+from optim import SemiDeepGenerativeTrainer, VAETrainer
+
+
+class SemiDeepGenerativeModel(object):
+    """A class for the Semi-Supervised Deep Generative model (M1+M2 model).
+
+    Paper: Kingma et al. (2014). Semi-supervised learning with deep generative models. In NIPS (pp. 3581-3589).
+    Link: https://papers.nips.cc/paper/5352-semi-supervised-learning-with-deep-generative-models.pdf
+
+    Attributes:
+        net_name: A string indicating the name of the neural network to use.
+        net: The neural network.
+        trainer: SemiDeepGenerativeTrainer to train a Semi-Supervised Deep Generative model.
+        optimizer_name: A string indicating the optimizer to use for training.
+        results: A dictionary to save the results.
+    """
+
+    def __init__(self, alpha: float = 0.1):
+        """Inits SemiDeepGenerativeModel."""
+
+        self.alpha = alpha
+
+        self.net_name = None
+        self.net = None
+
+        self.trainer = None
+        self.optimizer_name = None
+
+        self.vae_net = None  # variational autoencoder network for pretraining
+        self.vae_trainer = None
+        self.vae_optimizer_name = None
+
+        self.results = {
+            'train_time': None,
+            'test_auc': None,
+            'test_time': None,
+            'test_scores': None,
+        }
+
+        self.vae_results = {
+            'train_time': None,
+            'test_auc': None,
+            'test_time': None
+        }
+
+    def set_vae(self, net_name):
+        """Builds the variational autoencoder network for pretraining."""
+        self.net_name = net_name
+        self.vae_net = build_autoencoder(self.net_name)  # VAE for pretraining
+
+    def set_network(self, net_name):
+        """Builds the neural network."""
+        self.net_name = net_name
+        self.net = build_network(net_name, ae_net=self.vae_net)  # full M1+M2 model
+
+    def train(self, dataset: BaseADDataset, optimizer_name: str = 'adam', lr: float = 0.001, n_epochs: int = 50,
+              lr_milestones: tuple = (), batch_size: int = 128, weight_decay: float = 1e-6, device: str = 'cuda',
+              n_jobs_dataloader: int = 0):
+        """Trains the Semi-Supervised Deep Generative model on the training data."""
+
+        self.optimizer_name = optimizer_name
+
+        self.trainer = SemiDeepGenerativeTrainer(alpha=self.alpha, optimizer_name=optimizer_name, lr=lr,
+                                                 n_epochs=n_epochs, lr_milestones=lr_milestones, batch_size=batch_size,
+                                                 weight_decay=weight_decay, device=device,
+                                                 n_jobs_dataloader=n_jobs_dataloader)
+        self.net = self.trainer.train(dataset, self.net)
+        self.results['train_time'] = self.trainer.train_time
+
+    def test(self, dataset: BaseADDataset, device: str = 'cuda', n_jobs_dataloader: int = 0):
+        """Tests the Semi-Supervised Deep Generative model on the test data."""
+
+        if self.trainer is None:
+            self.trainer = SemiDeepGenerativeTrainer(alpha=self.alpha, device=device,
+                                                     n_jobs_dataloader=n_jobs_dataloader)
+
+        self.trainer.test(dataset, self.net)
+        # Get results
+        self.results['test_auc'] = self.trainer.test_auc
+        self.results['test_time'] = self.trainer.test_time
+        self.results['test_scores'] = self.trainer.test_scores
+
+    def pretrain(self, dataset: BaseADDataset, optimizer_name: str = 'adam', lr: float = 0.001, n_epochs: int = 100,
+                 lr_milestones: tuple = (), batch_size: int = 128, weight_decay: float = 1e-6, device: str = 'cuda',
+                 n_jobs_dataloader: int = 0):
+        """Pretrains a variational autoencoder (M1) for the Semi-Supervised Deep Generative model."""
+
+        # Train
+        self.vae_optimizer_name = optimizer_name
+        self.vae_trainer = VAETrainer(optimizer_name=optimizer_name, lr=lr, n_epochs=n_epochs,
+                                      lr_milestones=lr_milestones, batch_size=batch_size, weight_decay=weight_decay,
+                                      device=device, n_jobs_dataloader=n_jobs_dataloader)
+        self.vae_net = self.vae_trainer.train(dataset, self.vae_net)
+        # Get train results
+        self.vae_results['train_time'] = self.vae_trainer.train_time
+
+        # Test
+        self.vae_trainer.test(dataset, self.vae_net)
+        # Get test results
+        self.vae_results['test_auc'] = self.vae_trainer.test_auc
+        self.vae_results['test_time'] = self.vae_trainer.test_time
+
+    def save_model(self, export_model):
+        """Save a Semi-Supervised Deep Generative model to export_model."""
+
+        net_dict = self.net.state_dict()
+        torch.save({'net_dict': net_dict}, export_model)
+
+    def load_model(self, model_path):
+        """Load a Semi-Supervised Deep Generative model from model_path."""
+
+        model_dict = torch.load(model_path)
+        self.net.load_state_dict(model_dict['net_dict'])
+
+    def save_results(self, export_json):
+        """Save results dict to a JSON-file."""
+        with open(export_json, 'w') as fp:
+            json.dump(self.results, fp)
+
+    def save_vae_results(self, export_json):
+        """Save variational autoencoder results dict to a JSON-file."""
+        with open(export_json, 'w') as fp:
+            json.dump(self.vae_results, fp)
--- a/Deep-SAD-PyTorch/src/baselines/init.py
+++ b/Deep-SAD-PyTorch/src/baselines/init.py
@@ -0,0 +1,6 @@
+from .SemiDGM import SemiDeepGenerativeModel
+from .ocsvm import OCSVM
+from .kde import KDE
+from .isoforest import IsoForest
+from .ssad import SSAD
+from .shallow_ssad.ssad_convex import ConvexSSAD
--- a/Deep-SAD-PyTorch/src/baselines/isoforest.py
+++ b/Deep-SAD-PyTorch/src/baselines/isoforest.py
@@ -0,0 +1,147 @@
+import json
+import logging
+import time
+import torch
+import numpy as np
+
+from torch.utils.data import DataLoader
+from sklearn.ensemble import IsolationForest
+from sklearn.metrics import roc_auc_score
+from base.base_dataset import BaseADDataset
+from networks.main import build_autoencoder
+
+
+class IsoForest(object):
+    """A class for Isolation Forest models."""
+
+    def __init__(self, hybrid=False, n_estimators=100, max_samples='auto', contamination=0.1, n_jobs=-1, seed=None,
+                 **kwargs):
+        """Init Isolation Forest instance."""
+        self.n_estimators = n_estimators
+        self.max_samples = max_samples
+        self.contamination = contamination
+        self.n_jobs = n_jobs
+        self.seed = seed
+
+        self.model = IsolationForest(n_estimators=n_estimators, max_samples=max_samples, contamination=contamination,
+                                     n_jobs=n_jobs, random_state=seed, **kwargs)
+
+        self.hybrid = hybrid
+        self.ae_net = None  # autoencoder network for the case of a hybrid model
+
+        self.results = {
+            'train_time': None,
+            'test_time': None,
+            'test_auc': None,
+            'test_scores': None
+        }
+
+    def train(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
+        """Trains the Isolation Forest model on the training data."""
+        logger = logging.getLogger()
+
+        # do not drop last batch for non-SGD optimization shallow_ssad
+        train_loader = DataLoader(dataset=dataset.train_set, batch_size=128, shuffle=True,
+                                  num_workers=n_jobs_dataloader, drop_last=False)
+
+        # Get data from loader
+        X = ()
+        for data in train_loader:
+            inputs, _, _, _ = data
+            inputs = inputs.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X += (X_batch.cpu().data.numpy(),)
+        X = np.concatenate(X)
+
+        # Training
+        logger.info('Starting training...')
+        start_time = time.time()
+        self.model.fit(X)
+        train_time = time.time() - start_time
+        self.results['train_time'] = train_time
+
+        logger.info('Training Time: {:.3f}s'.format(self.results['train_time']))
+        logger.info('Finished training.')
+
+    def test(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
+        """Tests the Isolation Forest model on the test data."""
+        logger = logging.getLogger()
+
+        _, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader)
+
+        # Get data from loader
+        idx_label_score = []
+        X = ()
+        idxs = []
+        labels = []
+        for data in test_loader:
+            inputs, label_batch, _, idx = data
+            inputs, label_batch, idx = inputs.to(device), label_batch.to(device), idx.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X += (X_batch.cpu().data.numpy(),)
+            idxs += idx.cpu().data.numpy().astype(np.int64).tolist()
+            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()
+        X = np.concatenate(X)
+
+        # Testing
+        logger.info('Starting testing...')
+        start_time = time.time()
+        scores = (-1.0) * self.model.decision_function(X)
+        self.results['test_time'] = time.time() - start_time
+        scores = scores.flatten()
+
+        # Save triples of (idx, label, score) in a list
+        idx_label_score += list(zip(idxs, labels, scores.tolist()))
+        self.results['test_scores'] = idx_label_score
+
+        # Compute AUC
+        _, labels, scores = zip(*idx_label_score)
+        labels = np.array(labels)
+        scores = np.array(scores)
+        self.results['test_auc'] = roc_auc_score(labels, scores)
+
+        # Log results
+        logger.info('Test AUC: {:.2f}%'.format(100. * self.results['test_auc']))
+        logger.info('Test Time: {:.3f}s'.format(self.results['test_time']))
+        logger.info('Finished testing.')
+
+    def load_ae(self, dataset_name, model_path):
+        """Load pretrained autoencoder from model_path for feature extraction in a hybrid Isolation Forest model."""
+
+        model_dict = torch.load(model_path, map_location='cpu')
+        ae_net_dict = model_dict['ae_net_dict']
+        if dataset_name in ['mnist', 'fmnist', 'cifar10']:
+            net_name = dataset_name + '_LeNet'
+        else:
+            net_name = dataset_name + '_mlp'
+
+        if self.ae_net is None:
+            self.ae_net = build_autoencoder(net_name)
+
+        # update keys (since there was a change in network definition)
+        ae_keys = list(self.ae_net.state_dict().keys())
+        for i in range(len(ae_net_dict)):
+            k, v = ae_net_dict.popitem(False)
+            new_key = ae_keys[i]
+            ae_net_dict[new_key] = v
+            i += 1
+
+        self.ae_net.load_state_dict(ae_net_dict)
+        self.ae_net.eval()
+
+    def save_model(self, export_path):
+        """Save Isolation Forest model to export_path."""
+        pass
+
+    def load_model(self, import_path, device: str = 'cpu'):
+        """Load Isolation Forest model from import_path."""
+        pass
+
+    def save_results(self, export_json):
+        """Save results dict to a JSON-file."""
+        with open(export_json, 'w') as fp:
+            json.dump(self.results, fp)
--- a/Deep-SAD-PyTorch/src/baselines/kde.py
+++ b/Deep-SAD-PyTorch/src/baselines/kde.py
@@ -0,0 +1,164 @@
+import json
+import logging
+import time
+import torch
+import numpy as np
+
+from torch.utils.data import DataLoader
+from sklearn.neighbors import KernelDensity
+from sklearn.metrics import roc_auc_score
+from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.model_selection import GridSearchCV
+from base.base_dataset import BaseADDataset
+from networks.main import build_autoencoder
+
+
+class KDE(object):
+    """A class for Kernel Density Estimation models."""
+
+    def __init__(self, hybrid=False, kernel='gaussian', n_jobs=-1, seed=None, **kwargs):
+        """Init Kernel Density Estimation instance."""
+        self.kernel = kernel
+        self.n_jobs = n_jobs
+        self.seed = seed
+
+        self.model = KernelDensity(kernel=kernel, **kwargs)
+        self.bandwidth = self.model.bandwidth
+
+        self.hybrid = hybrid
+        self.ae_net = None  # autoencoder network for the case of a hybrid model
+
+        self.results = {
+            'train_time': None,
+            'test_time': None,
+            'test_auc': None,
+            'test_scores': None
+        }
+
+    def train(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0,
+              bandwidth_GridSearchCV: bool = True):
+        """Trains the Kernel Density Estimation model on the training data."""
+        logger = logging.getLogger()
+
+        # do not drop last batch for non-SGD optimization shallow_ssad
+        train_loader = DataLoader(dataset=dataset.train_set, batch_size=128, shuffle=True,
+                                  num_workers=n_jobs_dataloader, drop_last=False)
+
+        # Get data from loader
+        X = ()
+        for data in train_loader:
+            inputs, _, _, _ = data
+            inputs = inputs.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X += (X_batch.cpu().data.numpy(),)
+        X = np.concatenate(X)
+
+        # Training
+        logger.info('Starting training...')
+        start_time = time.time()
+
+        if bandwidth_GridSearchCV:
+            # use grid search cross-validation to select bandwidth
+            logger.info('Using GridSearchCV for bandwidth selection...')
+            params = {'bandwidth': np.logspace(0.5, 5, num=10, base=2)}
+            hyper_kde = GridSearchCV(KernelDensity(kernel=self.kernel), params, n_jobs=self.n_jobs, cv=5, verbose=0)
+            hyper_kde.fit(X)
+            self.bandwidth = hyper_kde.best_estimator_.bandwidth
+            logger.info('Best bandwidth: {:.8f}'.format(self.bandwidth))
+            self.model = hyper_kde.best_estimator_
+        else:
+            # if exponential kernel, re-initialize kde with bandwidth minimizing the numerical error
+            if self.kernel == 'exponential':
+                self.bandwidth = np.max(pairwise_distances(X)) ** 2
+                self.model = KernelDensity(kernel=self.kernel, bandwidth=self.bandwidth)
+
+            self.model.fit(X)
+
+        train_time = time.time() - start_time
+        self.results['train_time'] = train_time
+
+        logger.info('Training Time: {:.3f}s'.format(self.results['train_time']))
+        logger.info('Finished training.')
+
+    def test(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
+        """Tests the Kernel Density Estimation model on the test data."""
+        logger = logging.getLogger()
+
+        _, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader)
+
+        # Get data from loader
+        idx_label_score = []
+        X = ()
+        idxs = []
+        labels = []
+        for data in test_loader:
+            inputs, label_batch, _, idx = data
+            inputs, label_batch, idx = inputs.to(device), label_batch.to(device), idx.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X += (X_batch.cpu().data.numpy(),)
+            idxs += idx.cpu().data.numpy().astype(np.int64).tolist()
+            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()
+        X = np.concatenate(X)
+
+        # Testing
+        logger.info('Starting testing...')
+        start_time = time.time()
+        scores = (-1.0) * self.model.score_samples(X)
+        self.results['test_time'] = time.time() - start_time
+        scores = scores.flatten()
+
+        # Save triples of (idx, label, score) in a list
+        idx_label_score += list(zip(idxs, labels, scores.tolist()))
+        self.results['test_scores'] = idx_label_score
+
+        # Compute AUC
+        _, labels, scores = zip(*idx_label_score)
+        labels = np.array(labels)
+        scores = np.array(scores)
+        self.results['test_auc'] = roc_auc_score(labels, scores)
+
+        # Log results
+        logger.info('Test AUC: {:.2f}%'.format(100. * self.results['test_auc']))
+        logger.info('Test Time: {:.3f}s'.format(self.results['test_time']))
+        logger.info('Finished testing.')
+
+    def load_ae(self, dataset_name, model_path):
+        """Load pretrained autoencoder from model_path for feature extraction in a hybrid KDE model."""
+
+        model_dict = torch.load(model_path, map_location='cpu')
+        ae_net_dict = model_dict['ae_net_dict']
+        if dataset_name in ['mnist', 'fmnist', 'cifar10']:
+            net_name = dataset_name + '_LeNet'
+        else:
+            net_name = dataset_name + '_mlp'
+
+        if self.ae_net is None:
+            self.ae_net = build_autoencoder(net_name)
+
+        # update keys (since there was a change in network definition)
+        ae_keys = list(self.ae_net.state_dict().keys())
+        for i in range(len(ae_net_dict)):
+            k, v = ae_net_dict.popitem(False)
+            new_key = ae_keys[i]
+            ae_net_dict[new_key] = v
+            i += 1
+
+        self.ae_net.load_state_dict(ae_net_dict)
+        self.ae_net.eval()
+
+    def save_model(self, export_path):
+        """Save KDE model to export_path."""
+        pass
+
+    def load_model(self, import_path, device: str = 'cpu'):
+        """Load KDE model from import_path."""
+        pass
+
+    def save_results(self, export_json):
+        """Save results dict to a JSON-file."""
+        with open(export_json, 'w') as fp:
+            json.dump(self.results, fp)
--- a/Deep-SAD-PyTorch/src/baselines/ocsvm.py
+++ b/Deep-SAD-PyTorch/src/baselines/ocsvm.py
@@ -0,0 +1,221 @@
+import json
+import logging
+import time
+import torch
+import numpy as np
+
+from torch.utils.data import DataLoader
+from sklearn.svm import OneClassSVM
+from sklearn.metrics import roc_auc_score
+from base.base_dataset import BaseADDataset
+from networks.main import build_autoencoder
+
+
+class OCSVM(object):
+    """A class for One-Class SVM models."""
+
+    def __init__(self, kernel='rbf', nu=0.1, hybrid=False):
+        """Init OCSVM instance."""
+        self.kernel = kernel
+        self.nu = nu
+        self.rho = None
+        self.gamma = None
+
+        self.model = OneClassSVM(kernel=kernel, nu=nu)
+
+        self.hybrid = hybrid
+        self.ae_net = None  # autoencoder network for the case of a hybrid model
+        self.linear_model = None  # also init a model with linear kernel if hybrid approach
+
+        self.results = {
+            'train_time': None,
+            'test_time': None,
+            'test_auc': None,
+            'test_scores': None,
+            'train_time_linear': None,
+            'test_time_linear': None,
+            'test_auc_linear': None
+        }
+
+    def train(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
+        """Trains the OC-SVM model on the training data."""
+        logger = logging.getLogger()
+
+        # do not drop last batch for non-SGD optimization shallow_ssad
+        train_loader = DataLoader(dataset=dataset.train_set, batch_size=128, shuffle=True,
+                                  num_workers=n_jobs_dataloader, drop_last=False)
+
+        # Get data from loader
+        X = ()
+        for data in train_loader:
+            inputs, _, _, _ = data
+            inputs = inputs.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X += (X_batch.cpu().data.numpy(),)
+        X = np.concatenate(X)
+
+        # Training
+        logger.info('Starting training...')
+
+        # Select model via hold-out test set of 1000 samples
+        gammas = np.logspace(-7, 2, num=10, base=2)
+        best_auc = 0.0
+
+        # Sample hold-out set from test set
+        _, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader)
+
+        X_test = ()
+        labels = []
+        for data in test_loader:
+            inputs, label_batch, _, _ = data
+            inputs, label_batch = inputs.to(device), label_batch.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X_test += (X_batch.cpu().data.numpy(),)
+            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()
+        X_test, labels = np.concatenate(X_test), np.array(labels)
+        n_test, n_normal, n_outlier = len(X_test), np.sum(labels == 0), np.sum(labels == 1)
+        n_val = int(0.1 * n_test)
+        n_val_normal, n_val_outlier = int(n_val * (n_normal/n_test)), int(n_val * (n_outlier/n_test))
+        perm = np.random.permutation(n_test)
+        X_val = np.concatenate((X_test[perm][labels[perm] == 0][:n_val_normal],
+                                X_test[perm][labels[perm] == 1][:n_val_outlier]))
+        labels = np.array([0] * n_val_normal + [1] * n_val_outlier)
+
+        i = 1
+        for gamma in gammas:
+
+            # Model candidate
+            model = OneClassSVM(kernel=self.kernel, nu=self.nu, gamma=gamma)
+
+            # Train
+            start_time = time.time()
+            model.fit(X)
+            train_time = time.time() - start_time
+
+            # Test on small hold-out set from test set
+            scores = (-1.0) * model.decision_function(X_val)
+            scores = scores.flatten()
+
+            # Compute AUC
+            auc = roc_auc_score(labels, scores)
+
+            logger.info(f'  | Model {i:02}/{len(gammas):02} | Gamma: {gamma:.8f} | Train Time: {train_time:.3f}s '
+                        f'| Val AUC: {100. * auc:.2f} |')
+
+            if auc > best_auc:
+                best_auc = auc
+                self.model = model
+                self.gamma = gamma
+                self.results['train_time'] = train_time
+
+            i += 1
+
+        # If hybrid, also train a model with linear kernel
+        if self.hybrid:
+            self.linear_model = OneClassSVM(kernel='linear', nu=self.nu)
+            start_time = time.time()
+            self.linear_model.fit(X)
+            train_time = time.time() - start_time
+            self.results['train_time_linear'] = train_time
+
+        logger.info(f'Best Model: | Gamma: {self.gamma:.8f} | AUC: {100. * best_auc:.2f}')
+        logger.info('Training Time: {:.3f}s'.format(self.results['train_time']))
+        logger.info('Finished training.')
+
+    def test(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
+        """Tests the OC-SVM model on the test data."""
+        logger = logging.getLogger()
+
+        _, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader)
+
+        # Get data from loader
+        idx_label_score = []
+        X = ()
+        idxs = []
+        labels = []
+        for data in test_loader:
+            inputs, label_batch, _, idx = data
+            inputs, label_batch, idx = inputs.to(device), label_batch.to(device), idx.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X += (X_batch.cpu().data.numpy(),)
+            idxs += idx.cpu().data.numpy().astype(np.int64).tolist()
+            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()
+        X = np.concatenate(X)
+
+        # Testing
+        logger.info('Starting testing...')
+        start_time = time.time()
+
+        scores = (-1.0) * self.model.decision_function(X)
+
+        self.results['test_time'] = time.time() - start_time
+        scores = scores.flatten()
+        self.rho = -self.model.intercept_[0]
+
+        # Save triples of (idx, label, score) in a list
+        idx_label_score += list(zip(idxs, labels, scores.tolist()))
+        self.results['test_scores'] = idx_label_score
+
+        # Compute AUC
+        _, labels, scores = zip(*idx_label_score)
+        labels = np.array(labels)
+        scores = np.array(scores)
+        self.results['test_auc'] = roc_auc_score(labels, scores)
+
+        # If hybrid, also test model with linear kernel
+        if self.hybrid:
+            start_time = time.time()
+            scores_linear = (-1.0) * self.linear_model.decision_function(X)
+            self.results['test_time_linear'] = time.time() - start_time
+            scores_linear = scores_linear.flatten()
+            self.results['test_auc_linear'] = roc_auc_score(labels, scores_linear)
+            logger.info('Test AUC linear model: {:.2f}%'.format(100. * self.results['test_auc_linear']))
+            logger.info('Test Time linear model: {:.3f}s'.format(self.results['test_time_linear']))
+
+        # Log results
+        logger.info('Test AUC: {:.2f}%'.format(100. * self.results['test_auc']))
+        logger.info('Test Time: {:.3f}s'.format(self.results['test_time']))
+        logger.info('Finished testing.')
+
+    def load_ae(self, dataset_name, model_path):
+        """Load pretrained autoencoder from model_path for feature extraction in a hybrid OC-SVM model."""
+
+        model_dict = torch.load(model_path, map_location='cpu')
+        ae_net_dict = model_dict['ae_net_dict']
+        if dataset_name in ['mnist', 'fmnist', 'cifar10']:
+            net_name = dataset_name + '_LeNet'
+        else:
+            net_name = dataset_name + '_mlp'
+
+        if self.ae_net is None:
+            self.ae_net = build_autoencoder(net_name)
+
+        # update keys (since there was a change in network definition)
+        ae_keys = list(self.ae_net.state_dict().keys())
+        for i in range(len(ae_net_dict)):
+            k, v = ae_net_dict.popitem(False)
+            new_key = ae_keys[i]
+            ae_net_dict[new_key] = v
+            i += 1
+
+        self.ae_net.load_state_dict(ae_net_dict)
+        self.ae_net.eval()
+
+    def save_model(self, export_path):
+        """Save OC-SVM model to export_path."""
+        pass
+
+    def load_model(self, import_path, device: str = 'cpu'):
+        """Load OC-SVM model from import_path."""
+        pass
+
+    def save_results(self, export_json):
+        """Save results dict to a JSON-file."""
+        with open(export_json, 'w') as fp:
+            json.dump(self.results, fp)
--- a/Deep-SAD-PyTorch/src/baselines/shallow_ssad/init.py
+++ b/Deep-SAD-PyTorch/src/baselines/shallow_ssad/init.py
@@ -0,0 +1 @@
+from .ssad_convex import ConvexSSAD
--- a/Deep-SAD-PyTorch/src/baselines/shallow_ssad/ssad_convex.py
+++ b/Deep-SAD-PyTorch/src/baselines/shallow_ssad/ssad_convex.py
@@ -0,0 +1,186 @@
+########################################################################################################################
+# Acknowledgements: https://github.com/nicococo/tilitools
+########################################################################################################################
+import numpy as np
+
+from cvxopt import matrix, spmatrix, sparse, spdiag
+from cvxopt.solvers import qp
+
+
+class ConvexSSAD:
+    """ Convex semi-supervised anomaly detection with hinge-loss and L2 regularizer
+        as described in Goernitz et al., Towards Supervised Anomaly Detection, JAIR, 2013
+
+             minimize  0.5 ||w||^2_2 - rho - kappa*gamma + eta_u sum_i xi_i + eta_l sum_j xi_j
+        {w,rho,gamma>=0,xi>=0}
+        subject to   <w,phi(x_i)> >= rho - xi_i
+                  y_j<w,phi(x_j)> >= y_j*rho + gamma - xi_j
+
+        And the corresponding dual optimization problem:
+
+            maximize -0.5 sum_(i,j) alpha_i alpha_j y_i y_j k(x_i,x_j)
+        {0<=alpha_i<=eta_i}
+            subject to 	kappa <= sum_j alpha_j  (for all labeled examples)
+                        1 = sum_j y_i alpha_j  (for all examples)
+
+        We introduce labels y_i = +1 for all unlabeled examples which enables us to combine sums.
+
+        Note: Only dual solution is supported.
+
+        Written by: Nico Goernitz, TU Berlin, 2013/14
+    """
+    PRECISION = 1e-9  # important: effects the threshold, support vectors and speed!
+
+    def __init__(self, kernel, y, kappa=1.0, Cp=1.0, Cu=1.0, Cn=1.0):
+        assert(len(y.shape) == 1)
+        self.kernel = kernel
+        self.y = y  # (vector) corresponding labels (+1,-1 and 0 for unlabeled)
+        self.kappa = kappa  # (scalar) regularizer for importance of the margin
+        self.Cp = Cp  # (scalar) the regularization constant for positively labeled samples > 0
+        self.Cu = Cu  # (scalar) the regularization constant for unlabeled samples > 0
+        self.Cn = Cn  # (scalar) the regularization constant for outliers > 0
+        self.samples = y.size
+        self.labeled = np.sum(np.abs(y))
+
+        # cy: (vector) converted label vector (+1 for pos and unlabeled, -1 for outliers)
+        self.cy = y.copy().reshape((y.size, 1))
+        self.cy[y == 0] = 1  # cy=+1.0 (unlabeled,pos) & cy=-1.0 (neg)
+
+        # cl: (vector) converted label vector (+1 for labeled examples, 0.0 for unlabeled)
+        self.cl = np.abs(y.copy())  # cl=+1.0 (labeled) cl=0.0 (unlabeled)
+
+        # (vector) converted upper bound box constraint for each example
+        self.cC = np.zeros(y.size)  # cC=Cu (unlabeled) cC=Cp (pos) cC=Cn (neg)
+        self.cC[y == 0] = Cu
+        self.cC[y == 1] = Cp
+        self.cC[y ==-1] = Cn
+
+        self.alphas = None
+        self.svs = None  # (vector) list of support vector (contains indices)
+        self.threshold = 0.0  # (scalar) the optimized threshold (rho)
+
+        # if there are no labeled examples, then set kappa to 0.0 otherwise
+        # the dual constraint kappa <= sum_{i \in labeled} alpha_i = 0.0 will
+        # prohibit a solution
+        if self.labeled == 0:
+            print('There are no labeled examples hence, setting kappa=0.0')
+            self.kappa = 0.0
+        print('Convex semi-supervised anomaly detection with {0} samples ({1} labeled).'.format(self.samples, self.labeled))
+
+    def set_train_kernel(self, kernel):
+        dim1, dim2 = kernel.shape
+        print([dim1, dim2])
+        assert(dim1 == dim2 and dim1 == self.samples)
+        self.kernel = kernel
+
+    def fit(self, check_psd_eigs=False):
+        # number of training examples
+        N = self.samples
+
+        # generate the label kernel
+        Y = self.cy.dot(self.cy.T)
+
+        # generate the final PDS kernel
+        P = matrix(self.kernel*Y)
+
+        # check for PSD
+        if check_psd_eigs:
+            eigs = np.linalg.eigvalsh(np.array(P))
+            if eigs[0] < 0.0:
+                print('Smallest eigenvalue is {0}'.format(eigs[0]))
+                P += spdiag([-eigs[0] for i in range(N)])
+
+        # there is no linear part of the objective
+        q = matrix(0.0, (N, 1))
+
+        # sum_i y_i alpha_i = A alpha = b = 1.0
+        A = matrix(self.cy, (1, self.samples), 'd')
+        b = matrix(1.0, (1, 1))
+
+        # inequality constraints: G alpha <= h
+        # 1) alpha_i  <= C_i
+        # 2) -alpha_i <= 0
+        G12 = spmatrix(1.0, range(N), range(N))
+        h1 = matrix(self.cC)
+        h2 = matrix(0.0, (N, 1))
+        G = sparse([G12, -G12])
+        h = matrix([h1, h2])
+        if self.labeled > 0:
+            # 3) kappa <= \sum_i labeled_i alpha_i -> -cl' alpha <= -kappa
+            print('Labeled data found.')
+            G3 = -matrix(self.cl, (1, self.cl.size), 'd')
+            h3 = -matrix(self.kappa, (1, 1))
+            G = sparse([G12, -G12, G3])
+            h = matrix([h1, h2, h3])
+
+        # solve the quadratic programm
+        sol = qp(P, -q, G, h, A, b)
+
+        # store solution
+        self.alphas = np.array(sol['x'])
+
+        # 1. find all support vectors, i.e. 0 < alpha_i <= C
+        # 2. store all support vector with alpha_i < C in 'margins'
+        self.svs = np.where(self.alphas >= ConvexSSAD.PRECISION)[0]
+
+        # these should sum to one
+        print('Validate solution:')
+        print('- found {0} support vectors'.format(len(self.svs)))
+        print('0 <= alpha_i : {0} of {1}'.format(np.sum(0. <= self.alphas), N))
+        print('- sum_(i) alpha_i cy_i = {0} = 1.0'.format(np.sum(self.alphas*self.cy)))
+        print('- sum_(i in sv) alpha_i cy_i = {0} ~ 1.0 (approx error)'.format(np.sum(self.alphas[self.svs]*self.cy[self.svs])))
+        print('- sum_(i in labeled) alpha_i = {0} >= {1} = kappa'.format(np.sum(self.alphas[self.cl == 1]), self.kappa))
+        print('- sum_(i in unlabeled) alpha_i = {0}'.format(np.sum(self.alphas[self.y == 0])))
+        print('- sum_(i in positives) alpha_i = {0}'.format(np.sum(self.alphas[self.y == 1])))
+        print('- sum_(i in negatives) alpha_i = {0}'.format(np.sum(self.alphas[self.y ==-1])))
+
+        # infer threshold (rho)
+        psvs = np.where(self.y[self.svs] == 0)[0]
+        # case 1: unlabeled support vectors available
+        self.threshold = 0.
+        unl_threshold = -1e12
+        lbl_threshold = -1e12
+        if psvs.size > 0:
+            k = self.kernel[:, self.svs]
+            k = k[self.svs[psvs], :]
+            unl_threshold = np.max(self.apply(k))
+
+        if np.sum(self.cl) > 1e-12:
+        # case 2: only labeled examples available
+            k = self.kernel[:, self.svs]
+            k = k[self.svs, :]
+            thres = self.apply(k)
+            pinds = np.where(self.y[self.svs] == +1)[0]
+            ninds = np.where(self.y[self.svs] == -1)[0]
+            # only negatives is not possible
+            if ninds.size > 0 and pinds.size == 0:
+                print('ERROR: Check pre-defined PRECISION.')
+                lbl_threshold = np.max(thres[ninds])
+            elif ninds.size == 0:
+                lbl_threshold = np.max(thres[pinds])
+            else:
+                # smallest negative + largest positive
+                p = np.max(thres[pinds])
+                n = np.min(thres[ninds])
+                lbl_threshold = (n+p)/2.
+        self.threshold = np.max((unl_threshold, lbl_threshold))
+
+    def get_threshold(self):
+        return self.threshold
+
+    def get_support_dual(self):
+        return self.svs
+
+    def get_alphas(self):
+        return self.alphas
+
+    def apply(self, kernel):
+        """ Application of dual trained ssad.
+            kernel = get_kernel(Y, X[:, cssad.svs], kernel_type, kernel_param)
+        """
+        if kernel.shape[1] == self.samples:
+            # if kernel is not restricted to support vectors
+            ay = self.alphas * self.cy
+        else:
+            ay = self.alphas[self.svs] * self.cy[self.svs]
+        return ay.T.dot(kernel.T).T - self.threshold
--- a/Deep-SAD-PyTorch/src/baselines/ssad.py
+++ b/Deep-SAD-PyTorch/src/baselines/ssad.py
@@ -0,0 +1,244 @@
+import json
+import logging
+import time
+import torch
+import numpy as np
+
+from torch.utils.data import DataLoader
+from .shallow_ssad.ssad_convex import ConvexSSAD
+from sklearn.metrics import roc_auc_score
+from sklearn.metrics.pairwise import pairwise_kernels
+from base.base_dataset import BaseADDataset
+from networks.main import build_autoencoder
+
+
+class SSAD(object):
+    """
+    A class for kernel SSAD models as described in Goernitz et al., Towards Supervised Anomaly Detection, JAIR, 2013.
+    """
+
+    def __init__(self, kernel='rbf', kappa=1.0, Cp=1.0, Cu=1.0, Cn=1.0, hybrid=False):
+        """Init SSAD instance."""
+        self.kernel = kernel
+        self.kappa = kappa
+        self.Cp = Cp
+        self.Cu = Cu
+        self.Cn = Cn
+        self.rho = None
+        self.gamma = None
+
+        self.model = None
+        self.X_svs = None
+
+        self.hybrid = hybrid
+        self.ae_net = None  # autoencoder network for the case of a hybrid model
+        self.linear_model = None  # also init a model with linear kernel if hybrid approach
+        self.linear_X_svs = None
+
+        self.results = {
+            'train_time': None,
+            'test_time': None,
+            'test_auc': None,
+            'test_scores': None,
+            'train_time_linear': None,
+            'test_time_linear': None,
+            'test_auc_linear': None
+        }
+
+    def train(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
+        """Trains the SSAD model on the training data."""
+        logger = logging.getLogger()
+
+        # do not drop last batch for non-SGD optimization shallow_ssad
+        train_loader = DataLoader(dataset=dataset.train_set, batch_size=128, shuffle=True,
+                                  num_workers=n_jobs_dataloader, drop_last=False)
+
+        # Get data from loader
+        X = ()
+        semi_targets = []
+        for data in train_loader:
+            inputs, _, semi_targets_batch, _ = data
+            inputs, semi_targets_batch = inputs.to(device), semi_targets_batch.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X += (X_batch.cpu().data.numpy(),)
+            semi_targets += semi_targets_batch.cpu().data.numpy().astype(np.int).tolist()
+        X, semi_targets = np.concatenate(X), np.array(semi_targets)
+
+        # Training
+        logger.info('Starting training...')
+
+        # Select model via hold-out test set of 1000 samples
+        gammas = np.logspace(-7, 2, num=10, base=2)
+        best_auc = 0.0
+
+        # Sample hold-out set from test set
+        _, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader)
+
+        X_test = ()
+        labels = []
+        for data in test_loader:
+            inputs, label_batch, _, _ = data
+            inputs, label_batch = inputs.to(device), label_batch.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X_test += (X_batch.cpu().data.numpy(),)
+            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()
+        X_test, labels = np.concatenate(X_test), np.array(labels)
+        n_test, n_normal, n_outlier = len(X_test), np.sum(labels == 0), np.sum(labels == 1)
+        n_val = int(0.1 * n_test)
+        n_val_normal, n_val_outlier = int(n_val * (n_normal/n_test)), int(n_val * (n_outlier/n_test))
+        perm = np.random.permutation(n_test)
+        X_val = np.concatenate((X_test[perm][labels[perm] == 0][:n_val_normal],
+                                X_test[perm][labels[perm] == 1][:n_val_outlier]))
+        labels = np.array([0] * n_val_normal + [1] * n_val_outlier)
+
+        i = 1
+        for gamma in gammas:
+
+            # Build the training kernel
+            kernel = pairwise_kernels(X, X, metric=self.kernel, gamma=gamma)
+
+            # Model candidate
+            model = ConvexSSAD(kernel, semi_targets, Cp=self.Cp, Cu=self.Cu, Cn=self.Cn)
+
+            # Train
+            start_time = time.time()
+            model.fit()
+            train_time = time.time() - start_time
+
+            # Test on small hold-out set from test set
+            kernel_val = pairwise_kernels(X_val, X[model.svs, :], metric=self.kernel, gamma=gamma)
+            scores = (-1.0) * model.apply(kernel_val)
+            scores = scores.flatten()
+
+            # Compute AUC
+            auc = roc_auc_score(labels, scores)
+
+            logger.info(f'  | Model {i:02}/{len(gammas):02} | Gamma: {gamma:.8f} | Train Time: {train_time:.3f}s '
+                        f'| Val AUC: {100. * auc:.2f} |')
+
+            if auc > best_auc:
+                best_auc = auc
+                self.model = model
+                self.gamma = gamma
+                self.results['train_time'] = train_time
+
+            i += 1
+
+        # Get support vectors for testing
+        self.X_svs = X[self.model.svs, :]
+
+        # If hybrid, also train a model with linear kernel
+        if self.hybrid:
+            linear_kernel = pairwise_kernels(X, X, metric='linear')
+            self.linear_model = ConvexSSAD(linear_kernel, semi_targets, Cp=self.Cp, Cu=self.Cu, Cn=self.Cn)
+            start_time = time.time()
+            self.linear_model.fit()
+            train_time = time.time() - start_time
+            self.results['train_time_linear'] = train_time
+            self.linear_X_svs = X[self.linear_model.svs, :]
+
+        logger.info(f'Best Model: | Gamma: {self.gamma:.8f} | AUC: {100. * best_auc:.2f}')
+        logger.info('Training Time: {:.3f}s'.format(self.results['train_time']))
+        logger.info('Finished training.')
+
+    def test(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
+        """Tests the SSAD model on the test data."""
+        logger = logging.getLogger()
+
+        _, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader)
+
+        # Get data from loader
+        idx_label_score = []
+        X = ()
+        idxs = []
+        labels = []
+        for data in test_loader:
+            inputs, label_batch, _, idx = data
+            inputs, label_batch, idx = inputs.to(device), label_batch.to(device), idx.to(device)
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
+            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
+            X += (X_batch.cpu().data.numpy(),)
+            idxs += idx.cpu().data.numpy().astype(np.int64).tolist()
+            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()
+        X = np.concatenate(X)
+
+        # Testing
+        logger.info('Starting testing...')
+        start_time = time.time()
+
+        # Build kernel
+        kernel = pairwise_kernels(X, self.X_svs, metric=self.kernel, gamma=self.gamma)
+
+        scores = (-1.0) * self.model.apply(kernel)
+
+        self.results['test_time'] = time.time() - start_time
+        scores = scores.flatten()
+        self.rho = -self.model.threshold
+
+        # Save triples of (idx, label, score) in a list
+        idx_label_score += list(zip(idxs, labels, scores.tolist()))
+        self.results['test_scores'] = idx_label_score
+
+        # Compute AUC
+        _, labels, scores = zip(*idx_label_score)
+        labels = np.array(labels)
+        scores = np.array(scores)
+        self.results['test_auc'] = roc_auc_score(labels, scores)
+
+        # If hybrid, also test model with linear kernel
+        if self.hybrid:
+            start_time = time.time()
+            linear_kernel = pairwise_kernels(X, self.linear_X_svs, metric='linear')
+            scores_linear = (-1.0) * self.linear_model.apply(linear_kernel)
+            self.results['test_time_linear'] = time.time() - start_time
+            scores_linear = scores_linear.flatten()
+            self.results['test_auc_linear'] = roc_auc_score(labels, scores_linear)
+            logger.info('Test AUC linear model: {:.2f}%'.format(100. * self.results['test_auc_linear']))
+            logger.info('Test Time linear model: {:.3f}s'.format(self.results['test_time_linear']))
+
+        # Log results
+        logger.info('Test AUC: {:.2f}%'.format(100. * self.results['test_auc']))
+        logger.info('Test Time: {:.3f}s'.format(self.results['test_time']))
+        logger.info('Finished testing.')
+
+    def load_ae(self, dataset_name, model_path):
+        """Load pretrained autoencoder from model_path for feature extraction in a hybrid SSAD model."""
+
+        model_dict = torch.load(model_path, map_location='cpu')
+        ae_net_dict = model_dict['ae_net_dict']
+        if dataset_name in ['mnist', 'fmnist', 'cifar10']:
+            net_name = dataset_name + '_LeNet'
+        else:
+            net_name = dataset_name + '_mlp'
+
+        if self.ae_net is None:
+            self.ae_net = build_autoencoder(net_name)
+
+        # update keys (since there was a change in network definition)
+        ae_keys = list(self.ae_net.state_dict().keys())
+        for i in range(len(ae_net_dict)):
+            k, v = ae_net_dict.popitem(False)
+            new_key = ae_keys[i]
+            ae_net_dict[new_key] = v
+            i += 1
+
+        self.ae_net.load_state_dict(ae_net_dict)
+        self.ae_net.eval()
+
+    def save_model(self, export_path):
+        """Save SSAD model to export_path."""
+        pass
+
+    def load_model(self, import_path, device: str = 'cpu'):
+        """Load SSAD model from import_path."""
+        pass
+
+    def save_results(self, export_json):
+        """Save results dict to a JSON-file."""
+        with open(export_json, 'w') as fp:
+            json.dump(self.results, fp)