Multi-label learning for BP
发布时间
阅读量:
阅读量
# -*- coding: utf-8 -*-
"""
Created on 2017/4/5 9:52 2017
5. @author: Randolph.Lee
"""
from __future__ import division
from pybrain.structure import *
from Evaluation_metrics import *
from Threshold_function import get_threshold
from pybrain.datasets import SupervisedDataSet
from pybrain.supervised.trainers import BackpropTrainer
import scipy.io as scio
import numpy as np
class MLBP:
def __init__(self, hidden_neuron, alpha=0.05, epochs=100, in_type=2, out_type=2, cost=0.1, min_max=None):
"""
:param hidden_neuron: Number of hidden neurons used in the network
:param alpha: Learning rate for updating weights and biases, default=0.05
:param epochs: Maximum number of training epochs, default=100
:param in_type: The type of activation function used for the hidden neurons, 1 for 'logsig', 2 for 'tansig'
:param out_type: The type of activation function used for the output neurons, 1 for 'logsig', 2 for 'tansig'
:param cost: Cost parameter used for regularization, default=0.1
:param min_max: min_max for data standardization
"""
self.hidden_neuron = hidden_neuron
self.alpha = alpha
self.epochs = epochs
self.in_type = in_type
self.out_type = out_type
self.cost = cost
self.min_max = min_max
self.output = None
self.hamming_loss = 0.0
self.ranking_loss = 0.0
self.one_error = 0.0
self.coverage = 0.0
self.average_precision = 0.0
self.fnn = None
def set_activation(self):
if self.in_type == 1:
hidden_activate = SigmoidLayer
else:
hidden_activate = TanhLayer
if self.out_type == 1:
out_activate = SigmoidLayer
else:
out_activate = TanhLayer
return hidden_activate, out_activate
def train_mlbp(self, data_train, dimension, num_class):
"""
Build BP neural network and train it
:param data_train: data_input and data_target
:param dimension: the number of features -> input
:param num_class: the number of class -> output
:return: BP network
"""
self.cost *= 2
'''Initialize the multi-label neural network'''
# build the feed_forward network
self.fnn = FeedForwardNetwork()
# set activation function
hidden_activate, out_activate = self.set_activation()
# set three layers: input_layer, hidden_layer, output_layer
input_layer = LinearLayer(dimension, name="input_layer")
hidden_layer = hidden_activate(self.hidden_neuron, name="hidden_layer")
output_layer = out_activate(num_class, name="output_layer")
self.fnn.addInputModule(input_layer)
self.fnn.addModule(hidden_layer)
self.fnn.addOutputModule(output_layer)
# build the connection between layers
input_to_hidden = FullConnection(input_layer, hidden_layer)
hidden_to_output = FullConnection(hidden_layer, output_layer)
# add the connection into the network
self.fnn.addConnection(input_to_hidden)
self.fnn.addConnection(hidden_to_output)
# make neural network come into effect
self.fnn.sortModules()
'''Training the multi-label neural network using Back-propagation'''
trainer = BackpropTrainer(self.fnn, data_train, verbose=True, learningrate=self.alpha, weightdecay=self.cost)
trainer.trainUntilConvergence(maxEpochs=self.epochs)
def test_mlbp(self, train_data, train_target, test_data, test_target):
"""
BPMLL_test tests a multi-label neural network.
:param train_data: An M1xN array, the ith instance of training instance is stored in train_data(i,:)
:param train_target: A M1xQ array, if the ith training instance belongs to the jth class,
then train_target[i,j] equals +1, otherwise train_target[i,j] equals -1
:param test_data: An M2xN array, the ith instance of testing instance is stored in test_data(i,:)
:param test_target: the label set of testing instances
:return: threshold
"""
'''Initializing'''
num_training, dimension = train_data.shape
num_testing, num_class = test_target.shape
data_set = SupervisedDataSet(inp=dimension, target=num_class)
if self.min_max is None:
self.min_max = np.zeros((2, dimension))
self.min_max[0, :] = train_data.min(axis=0)
self.min_max[1, :] = train_data.max(axis=0)
# standardize the training data
for i in xrange(num_training):
for j in xrange(dimension):
train_data[i, j] = (train_data[i, j] - self.min_max[0, j]) / (self.min_max[1, j] - self.min_max[0, j])
data_set.addSample(inp=train_data[i], target=train_target[i])
# standardize the testing data
for i in xrange(num_testing):
for j in xrange(dimension):
test_data[i, j] = (test_data[i, j] - self.min_max[0, j]) / (self.min_max[1, j] - self.min_max[0, j])
# train the multi-label back_propagation
self.train_mlbp(data_train=data_set, dimension=dimension, num_class=num_class)
threshold, self.output = get_threshold(train_data, train_target, test_data, self.fnn)
return threshold
def evaluation(self, test_target, threshold):
"""
compute the different evaluation indicators
:param test_target: the label set of testing instances
:param threshold: the threshold of testing instances
:return: None
"""
num_testing, num_class = test_target.shape
pre_labels = np.zeros((num_testing, num_class))
for i in xrange(num_testing):
for j in xrange(num_class):
if self.output[i, j] >= threshold[i]:
pre_labels[i, j] = 1
else:
pre_labels[i, j] = -1
'''evaluation metrics'''
self.hamming_loss = compute_hamming_loss(pre_labels, test_target)
self.ranking_loss = compute_ranking_loss(self.output, test_target)
self.one_error = compute_one_error(self.output, test_target)
self.coverage = compute_coverage(self.output, test_target)
self.average_precision = compute_average_precision(self.output, test_target)
if __name__ == "__main__":
# read data from dat files
path = r"D:\Randolph\Learning Programming\myPython\Thesis_reading\datasets\sample_data.mat"
data = scio.loadmat(path)
training_data = data["train_data"]
training_target = data["train_target"].transpose()
testing_data = data["test_data"]
testing_target = data["test_target"].transpose()
# set the basic parameters
hidden_neuron = 60
ml_bp = MLBP(hidden_neuron)
testing_threshold = ml_bp.test_mlbp(training_data, training_target, testing_data, testing_target)
ml_bp.evaluation(testing_target, testing_threshold)
# print the result of ML_KNN
print "average_precision: ", ml_bp.average_precision
print "ranking_loss: ", ml_bp.ranking_loss
print "hamming_loss: ", ml_bp.hamming_loss
print "coverage: ", ml_bp.coverage
output = ml_bp.output
全部评论 (0)
还没有任何评论哟~
