Advertisement

Stanford CoreNLP在Android中的使用

阅读量:

下载

斯坦福核心NLPer

jar包导入与处理

在这里插入图片描述

因为只实现部分内容,为了使apk不致过大,第二个包进行删减。

在这里插入图片描述

解决导包的各种报错:build.gradle(app)

复制代码
    android {//配置项目构建的各种属性
    defaultConfig {
        ...
        multiDexEnabled true//解决第三方包导入报错
    }
    //添加jdk1.8支持
    compileOptions {
        targetCompatibility JavaVersion.VERSION_1_8
        sourceCompatibility JavaVersion.VERSION_1_8
    }
    packagingOptions {
        //去除重复的文件
        exclude 'edu/stanford/nlp/pipeline/demo/*'
    }
    }
    dependencies {
    ...
    //nlp工具包
    implementation files('libs/stanford-corenlp-3.9.2-models.jar')
    implementation files('libs/stanford-corenlp-3.9.2.jar')
    //解决第三方包导入报错
    implementation 'com.android.support:multidex:1.0.3'
    
    
    }
    
    
    
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
    代码解读

主要代码

复制代码
    //nlp自然语言处理
    private void nlp() {
    StringBuilder s= new StringBuilder();//显示内容
    final String text=s_value.getText().toString();//获取输入内容
    // set up pipeline properties
    Properties props = new Properties();
    // set the list of annotators to run
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma");
    // build pipeline
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    // run all Annotators on this text
    pipeline.annotate(document);
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    
    for (CoreMap sentence : sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            String word = token.get(CoreAnnotations.TextAnnotation.class);//分词
            //判断是否为标点符号
            if(check(word)){
                continue;
            }
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);//词性
            pos = partOfSpeech(pos);//词性用中文表示
            String lema = token.get(CoreAnnotations.LemmaAnnotation.class);  // 获取对应上面word的词元信息,即我所需要的词形还原后的单词
            s.append(String.format("句中单词:%s\n词形还原:%s\n词性分析:%s\n\n", word, lema, pos));//设置显示格式
        }
    }
    s_result.setText(s.toString());
    }
    //词性用中文表示
    private String partOfSpeech(String p){
    switch (p){
        case "CC":
            p = "连接词";
            break;
        case "CD":
            p = "基数词";
            break;
        case "DT":
            p = "限定词";
            break;
        case "WP":
            p = "疑问代词";
            break;
        case "VBZ":
            p = "动词第三人称单数";
            break;
        case "NNP":
            p = "单数专有名词";
            break;
        case "IN":
            p = "介词或从属连词";
            break;
        case "RB":
            p = "副词";
            break;
        case "EX":
            p = "存在句";
            break;
        case "FW":
            p = "外来词";
            break;
        case "JJ":
            p = " 形容词或序数词";
            break;
        case "JJR":
            p = "形容词比较级";
            break;
        case "JJS":
            p = "形容词最高级";
            break;
        case "LS":
            p = "列表标记";
            break;
        case "MD":
            p = "情态词";
            break;
        case "NN":
            p = "单数名词或不可数名词";
            break;
        case "NNS":
            p = "复数名词";
            break;
        case "NNPS":
            p = "复数专有名词";
            break;
        case "PDT":
            p = "前限定词";
            break;
        case "POS":
            p = "所有格结束词";
            break;
        case "PRP":
            p = "人称代词";
            break;
        case "PRP$":
            p = "所有格代词";
            break;
        case "RBR":
            p = "副词比较级";
            break;
        case "RBS":
            p = "副词最高级";
            break;
        case "RP":
            p = "小品词";
            break;
        case "SYM":
            p = "符号";
            break;
        case "TO":
            p = "to作为介词或不定式格式";
            break;
        case "UH":
            p = "感叹词";
            break;
        case "VB":
            p = "动词";
            break;
        case "VBD":
            p = "动词过去式";
            break;
        case "VBG":
            p = "动名词和现在分词";
            break;
        case "VBN":
            p = "过去分词";
            break;
        case "VBP":
            p = "动词非第三人称单数";
            break;
        case "WDT":
            p = "疑问限定词";
            break;
        case "WP$":
            p = "所有格代词";
            break;
        case "WRB":
            p = "疑问代词";
            break;
    }
    return p;
    }
    /** * 该函数判断一个字符串是否包含标点符号(中文英文标点符号)。
     * 原理是原字符串做一次清洗,清洗掉所有标点符号。
     * 此时,如果原字符串包含标点符号,那么清洗后的长度和原字符串长度不同。返回true。
     * 如果原字符串未包含标点符号,则清洗后长度不变。返回false。
     */
    public boolean check(String s) {
    boolean b = false;
    
    String tmp = s;
    tmp = tmp.replaceAll("\ p{P}", "");
    if (s.length() != tmp.length()) {
        b = true;
    }
    return b;
    }
    
    
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
    代码解读

未完成功能——语法树,依存句法

在Eclipse环境中正常运行,在Android系统上却会出现崩溃现象。可能是某个特定的文件加载过程中出现了问题,并未找到有效的解决方案。涉及Stanford CoreNLP 3.9.2模型组件的部分

在这里插入图片描述

下面是java中的代码

复制代码
    public class BasicPipelineExample {
    
    public static void main(String[] args) {
    
        
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    
        
        String text = "I love you.";
    
        
        Annotation document = new Annotation(text);
    
        
        pipeline.annotate(document);
    		List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    		
    		for(CoreMap sentence: sentences) {
    		   
    		
    		   Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
    		
    		   System.out.println("语法树:");
    		
    		   System.out.println(tree.toString());
    		
    		
    		   SemanticGraph dependencies = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
    		
    		   System.out.println("依存句法:");
    		
    		   System.out.println(dependencies.toString());
    		}
    
    
    }
    
    }
    
    
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
    代码解读
在这里插入图片描述

最后一行在android中无法加载。

在这里插入图片描述

全部评论 (0)

还没有任何评论哟~