Advertisement

dilated convolutions

阅读量:
复制代码
 # encoding = utf-8

    
 import numpy as np
    
 import tensorflow as tf
    
 from tensorflow.contrib.crf import crf_log_likelihood
    
 from tensorflow.contrib.crf import viterbi_decode
    
 from tensorflow.contrib.layers.python.layers import initializers
    
  
    
 from utils import result_to_json
    
 from data_utils import create_input, iobes_iob,iob_iobes
    
  
    
  
    
 class Model(object):
    
     #初始化模型参数
    
     def __init__(self, config):
    
  
    
     self.config = config
    
     
    
     self.lr = config["lr"]
    
     self.char_dim = config["char_dim"]
    
     self.lstm_dim = config["lstm_dim"]
    
     self.seg_dim = config["seg_dim"]
    
  
    
     self.num_tags = config["num_tags"]
    
     self.num_chars = config["num_chars"]#样本中总字数
    
     self.num_segs = 4
    
  
    
     self.global_step = tf.Variable(0, trainable=False)
    
     self.best_dev_f1 = tf.Variable(0.0, trainable=False)
    
     self.best_test_f1 = tf.Variable(0.0, trainable=False)
    
     self.initializer = initializers.xavier_initializer()
    
     
    
     
    
  
    
     # add placeholders for the model
    
  
    
     self.char_inputs = tf.placeholder(dtype=tf.int32,
    
                                       shape=[None, None],
    
                                       name="ChatInputs")
    
     self.seg_inputs = tf.placeholder(dtype=tf.int32,
    
                                      shape=[None, None],
    
                                      name="SegInputs")
    
  
    
     self.targets = tf.placeholder(dtype=tf.int32,
    
                                   shape=[None, None],
    
                                   name="Targets")
    
     # dropout keep prob
    
     self.dropout = tf.placeholder(dtype=tf.float32,
    
                                   name="Dropout")
    
  
    
     used = tf.sign(tf.abs(self.char_inputs))
    
     length = tf.reduce_sum(used, reduction_indices=1)
    
     self.lengths = tf.cast(length, tf.int32)
    
     self.batch_size = tf.shape(self.char_inputs)[0]
    
     self.num_steps = tf.shape(self.char_inputs)[-1]
    
     
    
     
    
     #Add model type by crownpku bilstm or idcnn
    
     self.model_type = config['model_type']
    
     #parameters for idcnn
    
     self.layers = [
    
         {
    
             'dilation': 1
    
         },
    
         {
    
             'dilation': 1
    
         },
    
         {
    
             'dilation': 2
    
         },
    
     ]
    
     self.filter_width = 3
    
     self.num_filter = self.lstm_dim 
    
     self.embedding_dim = self.char_dim + self.seg_dim
    
     self.repeat_times = 4
    
     self.cnn_output_width = 0
    
     
    
     # embeddings for chinese character and segmentation representation
    
     embedding = self.embedding_layer(self.char_inputs, self.seg_inputs, config)
    
  
    
     if self.model_type == 'bilstm':
    
         # apply dropout before feed to lstm layer
    
         model_inputs = tf.nn.dropout(embedding, self.dropout)
    
  
    
         # bi-directional lstm layer
    
         model_outputs = self.biLSTM_layer(model_inputs, self.lstm_dim, self.lengths)
    
  
    
         # logits for tags
    
         self.logits = self.project_layer_bilstm(model_outputs)
    
     
    
     elif self.model_type == 'idcnn':
    
         # apply dropout before feed to idcnn layer
    
         model_inputs = tf.nn.dropout(embedding, self.dropout)
    
  
    
         # ldcnn layer
    
         model_outputs = self.IDCNN_layer(model_inputs)
    
  
    
         # logits for tags
    
         self.logits = self.project_layer_idcnn(model_outputs)
    
     
    
     else:
    
         raise KeyError
    
  
    
     # loss of the model
    
     self.loss = self.loss_layer(self.logits, self.lengths)
    
  
    
     with tf.variable_scope("optimizer"):
    
         optimizer = self.config["optimizer"]
    
         if optimizer == "sgd":
    
             self.opt = tf.train.GradientDescentOptimizer(self.lr)
    
         elif optimizer == "adam":
    
             self.opt = tf.train.AdamOptimizer(self.lr)
    
         elif optimizer == "adgrad":
    
             self.opt = tf.train.AdagradOptimizer(self.lr)
    
         else:
    
             raise KeyError
    
  
    
         # apply grad clip to avoid gradient explosion
    
         grads_vars = self.opt.compute_gradients(self.loss)
    
         capped_grads_vars = [[tf.clip_by_value(g, -self.config["clip"], self.config["clip"]), v]
    
                              for g, v in grads_vars]  #config["clip"]梯度截断【-5,5】范围内
    
         self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step)
    
  
    
     # saver of the model
    
     self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
    
  
    
     def embedding_layer(self, char_inputs, seg_inputs, config, name=None):
    
     """
    
     :param char_inputs: one-hot encoding of sentence
    
     :param seg_inputs: segmentation feature
    
     :param config: wither use segmentation feature
    
     :return: [1, num_steps, embedding size], 
    
     """
    
     #高:3 血:22 糖:23 和:24 高:3 血:22 压:25 char_inputs=[3,22,23,24,3,22,25]
    
     #高血糖 和 高血压 seg_inputs 高血糖=[1,2,3] 和=[0] 高血压=[1,2,3]  seg_inputs=[1,2,3,0,1,2,3]
    
     embedding = []
    
     self.char_inputs_test=char_inputs
    
     self.seg_inputs_test=seg_inputs
    
     with tf.variable_scope("char_embedding" if not name else name), tf.device('/cpu:0'):
    
         self.char_lookup = tf.get_variable(
    
                 name="char_embedding",
    
                 shape=[self.num_chars, self.char_dim],
    
                 initializer=self.initializer)
    
         #输入char_inputs='常' 对应的字典的索引/编号/value为:8
    
         #self.char_lookup=[2677*100]的向量,char_inputs字对应在字典的索引/编号/key=[1]
    
         embedding.append(tf.nn.embedding_lookup(self.char_lookup, char_inputs))
    
         #self.embedding1.append(tf.nn.embedding_lookup(self.char_lookup, char_inputs))
    
         if config["seg_dim"]:
    
             with tf.variable_scope("seg_embedding"), tf.device('/cpu:0'):
    
                 self.seg_lookup = tf.get_variable(
    
                     name="seg_embedding",
    
                     #shape=[4*20]
    
                     shape=[self.num_segs, self.seg_dim],
    
                     initializer=self.initializer)
    
                 embedding.append(tf.nn.embedding_lookup(self.seg_lookup, seg_inputs))
    
         embed = tf.concat(embedding, axis=-1)
    
     self.embed_test=embed
    
     self.embedding_test=embedding
    
     return embed
    
  
    
     def biLSTM_layer(self, model_inputs, lstm_dim, lengths, name=None):
    
     """
    
     :param lstm_inputs: [batch_size, num_steps, emb_size]
    
     :return: [batch_size, num_steps, 2*lstm_dim]
    
     """
    
     with tf.variable_scope("char_BiLSTM" if not name else name):
    
         lstm_cell = {}
    
         for direction in ["forward", "backward"]:
    
             with tf.variable_scope(direction):
    
                 lstm_cell[direction] = tf.contrib.rnn.CoupledInputForgetGateLSTMCell(
    
                     lstm_dim,
    
                     use_peepholes=True,
    
                     initializer=self.initializer,
    
                     state_is_tuple=True)
    
         outputs, final_states = tf.nn.bidirectional_dynamic_rnn(
    
             lstm_cell["forward"],
    
             lstm_cell["backward"],
    
             model_inputs,
    
             dtype=tf.float32,
    
             sequence_length=lengths)
    
     return tf.concat(outputs, axis=2)
    
  
    
     #IDCNN layer
    
     def IDCNN_layer(self, model_inputs, 
    
                 name=None):
    
     """
    
     :param idcnn_inputs: [batch_size, num_steps, emb_size] 
    
     :return: [batch_size, num_steps, cnn_output_width]
    
     """
    
     #tf.expand_dims会向tensor中插入一个维度,插入位置就是参数代表的位置(维度从0开始)。
    
     model_inputs = tf.expand_dims(model_inputs, 1)
    
     self.model_inputs_test=model_inputs
    
     reuse = False
    
     if self.dropout == 1.0:
    
         reuse = True
    
     with tf.variable_scope("idcnn" if not name else name):
    
         #shape=[1*3*120*100]
    
         # shape=[1, self.filter_width, self.embedding_dim,
    
         #            self.num_filter]
    
         # print(shape)
    
         filter_weights = tf.get_variable(
    
             "idcnn_filter",
    
             shape=[1, self.filter_width, self.embedding_dim,
    
                    self.num_filter],
    
             initializer=self.initializer)
    
         
    
         """
    
         shape of input = [batch, in_height, in_width, in_channels]
    
         shape of filter = [filter_height, filter_width, in_channels, out_channels]
    
         """
    
         layerInput = tf.nn.conv2d(model_inputs,
    
                                   filter_weights,
    
                                   strides=[1, 1, 1, 1],
    
                                   padding="SAME",
    
                                   name="init_layer",use_cudnn_on_gpu=False)
    
         self.layerInput_test=layerInput
    
         finalOutFromLayers = []
    
         
    
         totalWidthForLastDim = 0
    
         for j in range(self.repeat_times):
    
             for i in range(len(self.layers)):
    
                 #1,1,2
    
                 dilation = self.layers[i]['dilation']
    
                 isLast = True if i == (len(self.layers) - 1) else False
    
                 with tf.variable_scope("atrous-conv-layer-%d" % i,
    
                                        reuse=True
    
                                        if (reuse or j > 0) else False):
    
                     #w 卷积核的高度,卷积核的宽度,图像通道数,卷积核个数
    
                     w = tf.get_variable(
    
                         "filterW",
    
                         shape=[1, self.filter_width, self.num_filter,
    
                                self.num_filter],
    
                         initializer=tf.contrib.layers.xavier_initializer())
    
                     if j==1 and i==1:
    
                         self.w_test_1=w
    
                     if j==2 and i==1:
    
                         self.w_test_2=w                            
    
                     b = tf.get_variable("filterB", shape=[self.num_filter])
    
 #tf.nn.atrous_conv2d(value,filters,rate,padding,name=None)
    
     #除去name参数用以指定该操作的name,与方法有关的一共四个参数:                  
    
     #value: 
    
     #指需要做卷积的输入图像,要求是一个4维Tensor,具有[batch, height, width, channels]这样的shape,具体含义是[训练时一个batch的图片数量, 图片高度, 图片宽度, 图像通道数] 
    
     #filters: 
    
     #相当于CNN中的卷积核,要求是一个4维Tensor,具有[filter_height, filter_width, channels, out_channels]这样的shape,具体含义是[卷积核的高度,卷积核的宽度,图像通道数,卷积核个数],同理这里第三维channels,就是参数value的第四维
    
     #rate: 
    
     #要求是一个int型的正数,正常的卷积操作应该会有stride(即卷积核的滑动步长),但是空洞卷积是没有stride参数的,
    
     #这一点尤其要注意。取而代之,它使用了新的rate参数,那么rate参数有什么用呢?它定义为我们在输入
    
     #图像上卷积时的采样间隔,你可以理解为卷积核当中穿插了(rate-1)数量的“0”,
    
     #把原来的卷积核插出了很多“洞洞”,这样做卷积时就相当于对原图像的采样间隔变大了。
    
     #具体怎么插得,可以看后面更加详细的描述。此时我们很容易得出rate=1时,就没有0插入,
    
     #此时这个函数就变成了普通卷积。  
    
     #padding: 
    
     #string类型的量,只能是”SAME”,”VALID”其中之一,这个值决定了不同边缘填充方式。
    
     #ok,完了,到这就没有参数了,或许有的小伙伴会问那“stride”参数呢。其实这个函数已经默认了stride=1,也就是滑动步长无法改变,固定为1。
    
     #结果返回一个Tensor,填充方式为“VALID”时,返回[batch,height-2*(filter_width-1),width-2*(filter_height-1),out_channels]的Tensor,填充方式为“SAME”时,返回[batch, height, width, out_channels]的Tensor,这个结果怎么得出来的?先不急,我们通过一段程序形象的演示一下空洞卷积。                        
    
                     conv = tf.nn.atrous_conv2d(layerInput,
    
                                                w,
    
                                                rate=dilation,
    
                                                padding="SAME")
    
                     self.conv_test=conv 
    
                     conv = tf.nn.bias_add(conv, b)
    
                     conv = tf.nn.relu(conv)
    
                     if isLast:
    
                         finalOutFromLayers.append(conv)
    
                         totalWidthForLastDim += self.num_filter
    
                     layerInput = conv
    
         finalOut = tf.concat(axis=3, values=finalOutFromLayers)
    
         keepProb = 1.0 if reuse else 0.5
    
         finalOut = tf.nn.dropout(finalOut, keepProb)
    
         #Removes dimensions of size 1 from the shape of a tensor. 
    
             #从tensor中删除所有大小是1的维度
    
         
    
             #Given a tensor input, this operation returns a tensor of the same type with all dimensions of size 1 removed. If you don’t want to remove all size 1 dimensions, you can remove specific size 1 dimensions by specifying squeeze_dims. 
    
         
    
             #给定张量输入,此操作返回相同类型的张量,并删除所有尺寸为1的尺寸。 如果不想删除所有尺寸1尺寸,可以通过指定squeeze_dims来删除特定尺寸1尺寸。
    
         finalOut = tf.squeeze(finalOut, [1])
    
         finalOut = tf.reshape(finalOut, [-1, totalWidthForLastDim])
    
         self.cnn_output_width = totalWidthForLastDim
    
         return finalOut
    
  
    
     def project_layer_bilstm(self, lstm_outputs, name=None):
    
     """
    
     hidden layer between lstm layer and logits
    
     :param lstm_outputs: [batch_size, num_steps, emb_size] 
    
     :return: [batch_size, num_steps, num_tags]
    
     """
    
     with tf.variable_scope("project"  if not name else name):
    
         with tf.variable_scope("hidden"):
    
             W = tf.get_variable("W", shape=[self.lstm_dim*2, self.lstm_dim],
    
                                 dtype=tf.float32, initializer=self.initializer)
    
  
    
             b = tf.get_variable("b", shape=[self.lstm_dim], dtype=tf.float32,
    
                                 initializer=tf.zeros_initializer())
    
             output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim*2])
    
             hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b))
    
  
    
         # project to score of tags
    
         with tf.variable_scope("logits"):
    
             W = tf.get_variable("W", shape=[self.lstm_dim, self.num_tags],
    
                                 dtype=tf.float32, initializer=self.initializer)
    
  
    
             b = tf.get_variable("b", shape=[self.num_tags], dtype=tf.float32,
    
                                 initializer=tf.zeros_initializer())
    
  
    
             pred = tf.nn.xw_plus_b(hidden, W, b)
    
  
    
         return tf.reshape(pred, [-1, self.num_steps, self.num_tags])
    
     
    
     #Project layer for idcnn by crownpku
    
     #Delete the hidden layer, and change bias initializer
    
     def project_layer_idcnn(self, idcnn_outputs, name=None):
    
     """
    
     :param lstm_outputs: [batch_size, num_steps, emb_size] 
    
     :return: [batch_size, num_steps, num_tags]
    
     """
    
     with tf.variable_scope("project"  if not name else name):
    
         
    
         # project to score of tags
    
         with tf.variable_scope("logits"):
    
             W = tf.get_variable("W", shape=[self.cnn_output_width, self.num_tags],
    
                                 dtype=tf.float32, initializer=self.initializer)
    
  
    
             b = tf.get_variable("b",  initializer=tf.constant(0.001, shape=[self.num_tags]))
    
  
    
             pred = tf.nn.xw_plus_b(idcnn_outputs, W, b)
    
  
    
         return tf.reshape(pred, [-1, self.num_steps, self.num_tags])
    
  
    
     def loss_layer(self, project_logits, lengths, name=None):
    
     """
    
     calculate crf loss
    
     :param project_logits: [1, num_steps, num_tags]
    
     :return: scalar loss
    
     """
    
     with tf.variable_scope("crf_loss"  if not name else name):
    
         small = -1000.0
    
         # pad logits for crf loss
    
         start_logits = tf.concat(
    
             [small * tf.ones(shape=[self.batch_size, 1, self.num_tags]), tf.zeros(shape=[self.batch_size, 1, 1])], axis=-1)
    
         pad_logits = tf.cast(small * tf.ones([self.batch_size, self.num_steps, 1]), tf.float32)
    
         logits = tf.concat([project_logits, pad_logits], axis=-1)
    
         logits = tf.concat([start_logits, logits], axis=1)
    
         targets = tf.concat(
    
             [tf.cast(self.num_tags*tf.ones([self.batch_size, 1]), tf.int32), self.targets], axis=-1)
    
  
    
         self.trans = tf.get_variable(
    
             "transitions",
    
             shape=[self.num_tags + 1, self.num_tags + 1],
    
             initializer=self.initializer)
    
         #crf_log_likelihood在一个条件随机场里面计算标签序列的log-likelihood
    
         #inputs: 一个形状为[batch_size, max_seq_len, num_tags] 的tensor,
    
         #一般使用BILSTM处理之后输出转换为他要求的形状作为CRF层的输入. 
    
         #tag_indices: 一个形状为[batch_size, max_seq_len] 的矩阵,其实就是真实标签. 
    
         #sequence_lengths: 一个形状为 [batch_size] 的向量,表示每个序列的长度. 
    
         #transition_params: 形状为[num_tags, num_tags] 的转移矩阵    
    
         #log_likelihood: 标量,log-likelihood 
    
         #transition_params: 形状为[num_tags, num_tags] 的转移矩阵               
    
         log_likelihood, self.trans = crf_log_likelihood(
    
             inputs=logits,
    
             tag_indices=targets,
    
             transition_params=self.trans,
    
             sequence_lengths=lengths+1)
    
         return tf.reduce_mean(-log_likelihood)
    
  
    
     def create_feed_dict(self, is_train, batch):
    
     """
    
     :param is_train: Flag, True for train batch
    
     :param batch: list train/evaluate data 
    
     :return: structured data to feed
    
     """
    
     _, chars, segs, tags = batch
    
     feed_dict = {
    
         self.char_inputs: np.asarray(chars),
    
         self.seg_inputs: np.asarray(segs),
    
         self.dropout: 1.0,
    
     }
    
     if is_train:
    
         feed_dict[self.targets] = np.asarray(tags)
    
         feed_dict[self.dropout] = self.config["dropout_keep"]
    
     return feed_dict
    
  
    
     def run_step(self, sess, is_train, batch):
    
     """
    
     :param sess: session to run the batch
    
     :param is_train: a flag indicate if it is a train batch
    
     :param batch: a dict containing batch data
    
     :return: batch result, loss of the batch or logits
    
     """
    
     feed_dict = self.create_feed_dict(is_train, batch)
    
     if is_train:
    
         global_step, loss,_,char_lookup_out,seg_lookup_out,char_inputs_test,seg_inputs_test,embed_test,embedding_test,\
    
             model_inputs_test,layerInput_test,conv_test,w_test_1,w_test_2,char_inputs_test= sess.run(
    
             [self.global_step, self.loss, self.train_op,self.char_lookup,self.seg_lookup,self.char_inputs_test,self.seg_inputs_test,\
    
              self.embed_test,self.embedding_test,self.model_inputs_test,self.layerInput_test,self.conv_test,self.w_test_1,self.w_test_2,self.char_inputs],
    
             feed_dict)
    
         return global_step, loss
    
     else:
    
         lengths, logits = sess.run([self.lengths, self.logits], feed_dict)
    
         return lengths, logits
    
  
    
     def decode(self, logits, lengths, matrix):
    
     """
    
     :param logits: [batch_size, num_steps, num_tags]float32, logits
    
     :param lengths: [batch_size]int32, real length of each sequence
    
     :param matrix: transaction matrix for inference
    
     :return:
    
     """
    
     # inference final labels usa viterbi Algorithm
    
     paths = []
    
     small = -1000.0
    
     start = np.asarray([[small]*self.num_tags +[0]])
    
     for score, length in zip(logits, lengths):
    
         score = score[:length]
    
         pad = small * np.ones([length, 1])
    
         logits = np.concatenate([score, pad], axis=1)
    
         logits = np.concatenate([start, logits], axis=0)
    
         path, _ = viterbi_decode(logits, matrix)
    
  
    
         paths.append(path[1:])
    
     return paths
    
  
    
     def evaluate(self, sess, data_manager, id_to_tag):
    
     """
    
     :param sess: session  to run the model 
    
     :param data: list of data
    
     :param id_to_tag: index to tag name
    
     :return: evaluate result
    
     """
    
     results = []
    
     trans = self.trans.eval()
    
     for batch in data_manager.iter_batch():
    
         strings = batch[0]
    
         tags = batch[-1]
    
         lengths, scores = self.run_step(sess, False, batch)
    
         batch_paths = self.decode(scores, lengths, trans)
    
         for i in range(len(strings)):
    
             result = []
    
             string = strings[i][:lengths[i]]
    
             gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
    
             pred = iobes_iob([id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])
    
             #gold = iob_iobes([id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
    
             #pred = iob_iobes([id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])                
    
             for char, gold, pred in zip(string, gold, pred):
    
                 result.append(" ".join([char, gold, pred]))
    
             results.append(result)
    
     return results
    
  
    
     def evaluate_line(self, sess, inputs, id_to_tag):
    
     trans = self.trans.eval(session=sess)
    
     lengths, scores = self.run_step(sess, False, inputs)
    
     batch_paths = self.decode(scores, lengths, trans)
    
     tags = [id_to_tag[idx] for idx in batch_paths[0]]
    
     return result_to_json(inputs[0][0], tags)
    
    
    
    
    代码解读
复制代码
 # encoding=utf8

    
  
    
  
    
 import pickle
    
 import itertools
    
 from collections import OrderedDict
    
 import os
    
 import tensorflow as tf
    
 import numpy as np
    
 from model import Model
    
 from loader import load_sentences, update_tag_scheme
    
 from loader import char_mapping, tag_mapping
    
 from loader import augment_with_pretrained, prepare_dataset
    
 from utils import get_logger, make_path, clean, create_model, save_model
    
 from utils import print_config, save_config, load_config, test_ner
    
 from data_utils import load_word2vec, create_input, input_from_line, BatchManager
    
 root_path=os.getcwd()+os.sep
    
 flags = tf.app.flags
    
 flags.DEFINE_boolean("clean",       True,      "clean train folder")
    
 flags.DEFINE_boolean("train",       False,      "Whether train the model")
    
 # configurations for the model
    
 flags.DEFINE_integer("seg_dim",     20,         "Embedding size for segmentation, 0 if not used")
    
 flags.DEFINE_integer("char_dim",    100,        "Embedding size for characters")
    
 flags.DEFINE_integer("lstm_dim",    100,        "Num of hidden units in LSTM, or num of filters in IDCNN")
    
 flags.DEFINE_string("tag_schema",   "iobes",    "tagging schema iobes or iob")
    
  
    
 # configurations for training
    
 flags.DEFINE_float("clip",          5,          "Gradient clip")
    
 flags.DEFINE_float("dropout",       0.5,        "Dropout rate")
    
 flags.DEFINE_float("batch_size",    60,         "batch size")
    
 flags.DEFINE_float("lr",            0.001,      "Initial learning rate")
    
 flags.DEFINE_string("optimizer",    "adam",     "Optimizer for training")
    
 flags.DEFINE_boolean("pre_emb",     True,       "Wither use pre-trained embedding")
    
 flags.DEFINE_boolean("zeros",       True,      "Wither replace digits with zero")
    
 flags.DEFINE_boolean("lower",       False,       "Wither lower case")
    
  
    
 flags.DEFINE_integer("max_epoch",   100,        "maximum training epochs")
    
 flags.DEFINE_integer("steps_check", 100,        "steps per checkpoint")
    
 flags.DEFINE_string("ckpt_path",    "ckpt",      "Path to save model")
    
 flags.DEFINE_string("summary_path", "summary",      "Path to store summaries")
    
 flags.DEFINE_string("log_file",     "train.log",    "File for log")
    
 flags.DEFINE_string("map_file",     "./config/maps.pkl",     "file for maps")
    
 flags.DEFINE_string("vocab_file",   "vocab.json",   "File for vocab")
    
 flags.DEFINE_string("config_file",  "./config/config_file",  "File for config")
    
 flags.DEFINE_string("tag_to_id_path",  "./config/tag_to_id.txt",  "File for tag_to_id.txt")
    
 flags.DEFINE_string("id_to_tag_path",  "./config/id_to_tag.txt",  "File for id_to_tag.txt")
    
 flags.DEFINE_string("script",       "conlleval",    "evaluation script")
    
 flags.DEFINE_string("result_path",  "result",       "Path for results")
    
 flags.DEFINE_string("emb_file",     os.path.join(root_path+"data", "vec.txt"),  "Path for pre_trained embedding")
    
 flags.DEFINE_string("train_file",   os.path.join(root_path+"data", "example.dev"),  "Path for train data")  # example.train
    
 flags.DEFINE_string("dev_file",     os.path.join(root_path+"data", "example.dev"),    "Path for dev data")   # 注意,这里的数据集我都做了修改,为了在我的机器上运行
    
 flags.DEFINE_string("test_file",    os.path.join(root_path+"data", "example.dev"),   "Path for test data")   # example.test
    
  
    
 flags.DEFINE_string("model_type", "idcnn", "Model type, can be idcnn or bilstm")
    
 #flags.DEFINE_string("model_type", "bilstm", "Model type, can be idcnn or bilstm")
    
  
    
 FLAGS = tf.app.flags.FLAGS
    
 assert FLAGS.clip < 5.1, "gradient clip should't be too much"
    
 assert 0 <= FLAGS.dropout < 1, "dropout rate between 0 and 1"
    
 assert FLAGS.lr > 0, "learning rate must larger than zero"
    
 assert FLAGS.optimizer in ["adam", "sgd", "adagrad"]
    
  
    
  
    
 # config for the model
    
 def config_model(char_to_id, tag_to_id):
    
     config = OrderedDict()
    
     config["model_type"] = FLAGS.model_type
    
     config["num_chars"] = len(char_to_id)
    
     config["char_dim"] = FLAGS.char_dim
    
     config["num_tags"] = len(tag_to_id)
    
     config["seg_dim"] = FLAGS.seg_dim
    
     config["lstm_dim"] = FLAGS.lstm_dim
    
     config["batch_size"] = FLAGS.batch_size
    
  
    
     config["emb_file"] = FLAGS.emb_file
    
     config["clip"] = FLAGS.clip
    
     config["dropout_keep"] = 1.0 - FLAGS.dropout
    
     config["optimizer"] = FLAGS.optimizer
    
     config["lr"] = FLAGS.lr
    
     config["tag_schema"] = FLAGS.tag_schema
    
     config["pre_emb"] = FLAGS.pre_emb
    
     config["zeros"] = FLAGS.zeros
    
     config["lower"] = FLAGS.lower
    
     return config
    
  
    
  
    
 def evaluate(sess, model, name, data, id_to_tag, logger):
    
     logger.info("evaluate:{}".format(name))
    
     ner_results = model.evaluate(sess, data, id_to_tag)
    
     eval_lines = test_ner(ner_results, FLAGS.result_path)
    
     for line in eval_lines:
    
     logger.info(line)
    
     f1 = float(eval_lines[1].strip().split()[-1])
    
  
    
     if name == "dev":
    
     best_test_f1 = model.best_dev_f1.eval()
    
     if f1 > best_test_f1:
    
         tf.assign(model.best_dev_f1, f1).eval()
    
         logger.info("new best dev f1 score:{:>.3f}".format(f1))
    
     return f1 > best_test_f1
    
     elif name == "test":
    
     best_test_f1 = model.best_test_f1.eval()
    
     if f1 > best_test_f1:
    
         tf.assign(model.best_test_f1, f1).eval()
    
         logger.info("new best test f1 score:{:>.3f}".format(f1))
    
     return f1 > best_test_f1
    
  
    
  
    
 def train():
    
     # load data sets
    
     train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower, FLAGS.zeros)
    
     dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    
     test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)
    
  
    
     # Use selected tagging scheme (IOB / IOBES)               I:中间,O:其他,B:开始 | E:结束,S:单个
    
     update_tag_scheme(train_sentences, FLAGS.tag_schema)
    
     update_tag_scheme(test_sentences, FLAGS.tag_schema)
    
     update_tag_scheme(dev_sentences, FLAGS.tag_schema)
    
     # create maps if not exist
    
     if not os.path.isfile(FLAGS.map_file):                  # 配置文件:char_to_id, id_to_char, tag_to_id, id_to_tag的数据
    
     # create dictionary for word
    
     if FLAGS.pre_emb:
    
         dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
    
         dico_chars, char_to_id, id_to_char = augment_with_pretrained(
    
             dico_chars_train.copy(),
    
             FLAGS.emb_file,
    
             list(itertools.chain.from_iterable(
    
                 [[w[0] for w in s] for s in test_sentences])
    
             )
    
         )
    
     else:
    
         _c, char_to_id, id_to_char = char_mapping(train_sentences, FLAGS.lower)
    
  
    
     # Create a dictionary and a mapping for tags
    
     _t, tag_to_id, id_to_tag = tag_mapping(train_sentences,FLAGS.id_to_tag_path,FLAGS.tag_to_id_path)
    
     #with open('maps.txt','w',encoding='utf8') as f1:
    
         #f1.writelines(str(char_to_id)+" "+id_to_char+" "+str(tag_to_id)+" "+id_to_tag+'\n')
    
     with open(FLAGS.map_file, "wb") as f:
    
         pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    
     else:
    
     with open(FLAGS.map_file, "rb") as f:
    
         char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)        #
    
  
    
     # prepare data, get a collection of list containing index
    
     train_data = prepare_dataset(                                    # train_data[0][0]:一句话;train_data[0][1]:单个字的编号;train_data[0][2]:切词之后,切词特征:词的大小是一个字的话是0,词的大小是2以上的话:1,2.。。,2,3; train_data[0][3]:每个字的标签
    
     train_sentences, char_to_id, tag_to_id, FLAGS.lower
    
     )
    
     dev_data = prepare_dataset(
    
     dev_sentences, char_to_id, tag_to_id, FLAGS.lower
    
     )
    
     test_data = prepare_dataset(
    
     test_sentences, char_to_id, tag_to_id, FLAGS.lower
    
     )
    
     print("%i / %i / %i sentences in train / dev / test." % (
    
     len(train_data), 0, len(test_data)))
    
  
    
     train_manager = BatchManager(train_data, FLAGS.batch_size)        # 将数据拆分成以60句话为一个batch
    
     dev_manager = BatchManager(dev_data, 100)
    
     test_manager = BatchManager(test_data, 100)
    
     # make path for store log and model if not exist
    
     make_path(FLAGS)
    
     if os.path.isfile(FLAGS.config_file):
    
     config = load_config(FLAGS.config_file)
    
     else:
    
     config = config_model(char_to_id, tag_to_id)
    
     save_config(config, FLAGS.config_file)
    
     make_path(FLAGS)
    
  
    
     log_path = os.path.join("log", FLAGS.log_file)
    
     logger = get_logger(log_path)
    
     print_config(config, logger)
    
  
    
     # limit GPU memory
    
     tf_config = tf.ConfigProto()
    
     tf_config.gpu_options.allow_growth = True
    
     steps_per_epoch = train_manager.len_data
    
     with tf.Session(config=tf_config) as sess:
    
     model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
    
     logger.info("start training")
    
     loss = []
    
     with tf.device("/cpu:0"):
    
         for i in range(100):
    
             for batch in train_manager.iter_batch(shuffle=True):
    
                 step, batch_loss = model.run_step(sess, True, batch)          # 按批次训练模型 这个是训练的开始,可以从这里倒着找整个网络怎么训练
    
                 loss.append(batch_loss)
    
                 if step % FLAGS.steps_check == 0:
    
                     iteration = step // steps_per_epoch + 1
    
                     logger.info("iteration:{} step:{}/{}, "
    
                                 "NER loss:{:>9.6f}".format(
    
                         iteration, step%steps_per_epoch, steps_per_epoch, np.mean(loss)))
    
                     loss = []
    
     
    
            # best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
    
             if i%7==0:
    
                 save_model(sess, model, FLAGS.ckpt_path, logger)
    
         #evaluate(sess, model, "test", test_manager, id_to_tag, logger)
    
  
    
  
    
 def evaluate_line():
    
     config = load_config(FLAGS.config_file)
    
     logger = get_logger(FLAGS.log_file)
    
     # limit GPU memory
    
     tf_config = tf.ConfigProto()
    
     tf_config.gpu_options.allow_growth = True
    
     with open(FLAGS.map_file, "rb") as f:
    
     char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    
     with tf.Session(config=tf_config) as sess:
    
     model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec, config, id_to_char, logger)
    
     while True:
    
         # try:
    
         #     line = input("请输入测试句子:")
    
         #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
    
         #     print(result)
    
         # except Exception as e:
    
         #     logger.info(e)
    
  
    
             line = input("请输入测试句子:")
    
             result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
    
             print(result)
    
  
    
  
    
 def main(is_train):
    
  
    
     if is_train:
    
     if FLAGS.clean:
    
         clean(FLAGS)
    
     train()                       # 训练模式
    
     else:
    
     evaluate_line()               # 测试模式
    
  
    
  
    
 if __name__ == "__main__":
    
     is_train = True
    
     tf.app.run(main(is_train))
    
  
    
  
    
  
    
    
    
    
    代码解读

全部评论 (0)

还没有任何评论哟~