Advertisement

斯坦福词性标注Demo

阅读量:

第一种:针对单个单词进行词性标注

复制代码
 import java.io.IOException;

    
 import edu.stanford.nlp.tagger.maxent.MaxentTagger; 
    
  
    
 public class tagger 
    
 {    
    
 	public static void main(String[] args) throws IOException,ClassNotFoundException 
    
 	{       
    
 		// Initialize the tagger        
    
 		MaxentTagger tagger = new MaxentTagger("F:/stanford-postagger-2014-06-16/models/english-left3words-distsim.tagger");         
    
 		// The sample string        
    
 		String sample = "text";         
    
 		// The tagged string        
    
 		String tagged = tagger.tagString(sample); 
    
 		
    
 		// Output the result        
    
 		System.out.println(tagged);   
    
     }
    
  
    
 }

第二种:对单句话进行词性标注

复制代码
 class TaggerDemo {

    
  
    
 	private TaggerDemo() {}
    
  
    
 	public static void main(String[] args) throws Exception 
    
     {
    
 		if (args.length != 2) 
    
 	    {
    
 			System.err.println("usage: java TaggerDemo modelFile fileToTag");
    
 		    
    
 			return;
    
     }
    
     MaxentTagger tagger = new MaxentTagger("F:\ stanford-postagger-2014-06-16\ models\ english-bidirectional-distsim.tagger");
    
       
    
     List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("F:\trigger.txt")));
    
     
    
     for (List<HasWord> sentence : sentences) 
    
     {
    
     	List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    
     	  
    
     	    System.out.println(Sentence.listToString(tSentence, false));
    
     }
    
     }
    
  
    
 }

第三种:读取文本文件,对文件进行词性标注

复制代码
 import java.io.BufferedReader;

    
 import java.io.FileInputStream;
    
 import java.io.InputStreamReader;
    
 import java.io.OutputStreamWriter;
    
 import java.io.PrintWriter;
    
 import java.util.List;
    
  
    
 import edu.stanford.nlp.ling.Sentence;
    
 import edu.stanford.nlp.ling.TaggedWord;
    
 import edu.stanford.nlp.ling.HasWord;
    
 import edu.stanford.nlp.ling.CoreLabel;
    
 import edu.stanford.nlp.process.CoreLabelTokenFactory;
    
 import edu.stanford.nlp.process.DocumentPreprocessor;
    
 import edu.stanford.nlp.process.PTBTokenizer;
    
 import edu.stanford.nlp.process.TokenizerFactory;
    
 import edu.stanford.nlp.tagger.maxent.MaxentTagger;
    
  
    
 /** This demo shows user-provided sentences (i.e., {@code List<HasWord>})
    
  *  being tagged by the tagger. The sentences are generated by direct use
    
  *  of the DocumentPreprocessor class.
    
  * *  @author Christopher Manning
    
  */
    
 class TaggerDemo2 {
    
  
    
 	private TaggerDemo2() {}
    
  
    
     public static void main(String[] args) throws Exception 
    
     {
    
 	
    
     	if (args.length != 2) 
    
     	{
    
     		System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
    
         
    
     		return;
    
     }
    
     
    
     	MaxentTagger tagger = new MaxentTagger(args[0]);
    
     	
    
     	TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
    
 									   "untokenizable=noneKeep");
    
     	
    
     	BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
    
     	
    
     	PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
    
     	
    
     	DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
    
     	
    
     	documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
    
     	
    
     	for (List<HasWord> sentence : documentPreprocessor)
    
     	{
    
     		List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    
     		
    
     		pw.println(Sentence.listToString(tSentence, false));
    
     	}
    
  
    
     	// print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
    
     	List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
    
     	
    
     	List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    
     	
    
     	for (TaggedWord tw : taggedSent) 
    
     	{
    
     		if (tw.tag().startsWith("JJ")) 
    
     		{
    
     			pw.println(tw.word());
    
     		}
    
     	}
    
  
    
     	pw.close();
    
     }
    
  
    
 }

全部评论 (0)

还没有任何评论哟~