# -*- coding: utf-8 -*- """These are test functions for MDP classifiers. """ from _tools import * from mdp import ClassifierNode from mdp.nodes import (SignumClassifier, PerceptronClassifier, SimpleMarkovClassifier, DiscreteHopfieldClassifier, KMeansClassifier) def _sigmoid(t): return 1.0 / (1.0 + numx.exp(-t)) class _BogusClassifier(ClassifierNode): @staticmethod def is_trainable(): return False def _label(self, x): return [r[0] for r in self.rank(x)] def _prob(self, x): return [{-1: _sigmoid(sum(xi)), \ 1: 1 - _sigmoid(sum(xi))} for xi in x] def testClassifierNode_ranking(): bc = _BogusClassifier() test_data = numx_rand.random((30, 20)) - 0.5 for r, p in zip(bc.rank(test_data), bc.prob(test_data)): # check that the ranking order is correct assert p[r[0]] >= p[r[1]], "Rank returns labels in incorrect order" # check that the probabilities sum up to 100 assert 0.999 < p[r[0]] + p[r[1]] < 1.001 def testClassifier_execute_method(): """Test that the execute result has the correct format when execute_method is used. """ bc = _BogusClassifier(execute_method="label") data = numx_rand.random((5, 20)) - 0.5 result = bc.execute(data) assert isinstance(result, list) assert isinstance(result[0], int) bc.execute_method = "prob" result = bc.execute(data) assert isinstance(result, list) assert isinstance(result[0], dict) bc.execute_method = "rank" result = bc.execute(data) assert isinstance(result, list) assert isinstance(result[0], list) def testSignumClassifier(): c = SignumClassifier() res = c.label(mdp.numx.array([[1, 2, -3, -4], [1, 2, 3, 4]])) assert c.input_dim == 4 assert res.tolist() == [-1, 1] def testPerceptronClassifier(): or_Classifier = PerceptronClassifier() for i in xrange(100): or_Classifier.train(mdp.numx.array([[0., 0.]]), -1) or_Classifier.train(mdp.numx.array([[0., 1.], [1., 0.], [1., 1.]]), 1) assert or_Classifier.input_dim == 2 res = or_Classifier.label(mdp.numx.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]])) assert res.tolist() == [-1, 1, 1, 1] and_Classifier = PerceptronClassifier() for i in xrange(100): and_Classifier.train(mdp.numx.array([[0., 0.], [0., 1.], [1., 0.]]), -1) and_Classifier.train(mdp.numx.array([[1., 1.]]), 1) res = and_Classifier.label(mdp.numx.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]])) assert res.tolist() == [-1, -1, -1, 1] xor_Classifier = PerceptronClassifier() for i in xrange(100): xor_Classifier.train(mdp.numx.array([[0., 0.], [1., 1.]]), -1) xor_Classifier.train(mdp.numx.array([[0., 1.], [1., 0.]]), 1) res = xor_Classifier.label(mdp.numx.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]])) assert res.tolist() != [-1, 1, 1, -1], \ "Something must be wrong here. XOR is impossible in a single-layered perceptron." def testSimpleMarkovClassifier(): mc = SimpleMarkovClassifier(dtype="c") text = "after the letter e follows either space or the letters r t or i" for word in text.split(): word = word.lower() features = zip(" " + word) labels = list(word + " ") mc.train(mdp.numx.array(features), labels) assert mc.input_dim == 1 num_transitions = 0 features = mc.features for feature, count in features.items(): if count: prob = mc.prob(mdp.numx.array([feature])) prob_sum = 0 for p in prob: for k, v in p.items(): prob_sum += v if v: num_transitions += 1 assert abs(prob_sum - 1.0) < 1e-5 # calculate the number of transitions (the negative set deletes the artefact of two spaces) trans = len(set((zip(" ".join(text.split()) + " ", \ " " + " ".join(text.split())))) - set([(' ', ' ')])) assert num_transitions == trans letters_following_e = [' ', 'r', 't', 'i'] letters_prob = mc.prob(mdp.numx.array([['e']]))[0] prob_sum = 0 for letter, prob in letters_prob.items(): prob_sum += prob if prob > 1e-5: assert letter in letters_following_e assert abs(prob_sum - 1.0) < 1e-5 def testDiscreteHopfieldClassifier(): h = DiscreteHopfieldClassifier() memory_size = 100 patterns = numx.array( [numx.sin(numx.linspace(0, 100 * numx.pi, memory_size)) > 0, numx.sin(numx.linspace(0, 50 * numx.pi, memory_size)) > 0, numx.sin(numx.linspace(0, 20 * numx.pi, memory_size)) > 0, numx.sin(numx.linspace(0, 15 * numx.pi, memory_size)) > 0, numx.sin(numx.linspace(0, 10 * numx.pi, memory_size)) > 0, numx.sin(numx.linspace(0, 5 * numx.pi, memory_size)) > 0, numx.sin(numx.linspace(0, 2 * numx.pi, memory_size)) > 0 ]) h.train(patterns) h.input_dim = memory_size for p in patterns: # check if patterns are fixpoints assert numx.all(p == h.label(numx.array([p]))) for p in patterns: # check, if a noisy pattern is recreated noisy = numx.array(p) for i in xrange(len(noisy)): if numx.random.random() > 0.95: noisy[i] = not noisy[i] retrieved = h.label(numx.array([noisy])) # Hopfield nets are blind for inversion, need to check either case assert numx.all(retrieved == p) or numx.all(retrieved != p) def testKMeansClassifier(): num_centroids = 3 k = KMeansClassifier(num_centroids) a = numx.random.rand(50, 2) k.train(a) res = k.label(a) # check that the number of centroids is correct assert len(set(res)) == num_centroids k = KMeansClassifier(2) a1 = numx.random.rand(50, 2) - 1 a2 = numx.random.rand(50, 2) + 1 k.train(a1) k.train(a2) res1 = k.label(a1) res2 = k.label(a2) # check that both clusters are completely identified and different assert (len(set(res1)) == 1 and len(set(res2)) == 1 and set(res1) != set(res2) ), ("Error in K-Means classifier. " "This might be a bug or just a local minimum.")