Stanford CoreNLP在Android中的使用
发布时间
阅读量:
阅读量
下载
斯坦福核心NLPer
jar包导入与处理

因为只实现部分内容,为了使apk不致过大,第二个包进行删减。

解决导包的各种报错:build.gradle(app)
android {//配置项目构建的各种属性
defaultConfig {
...
multiDexEnabled true//解决第三方包导入报错
}
//添加jdk1.8支持
compileOptions {
targetCompatibility JavaVersion.VERSION_1_8
sourceCompatibility JavaVersion.VERSION_1_8
}
packagingOptions {
//去除重复的文件
exclude 'edu/stanford/nlp/pipeline/demo/*'
}
}
dependencies {
...
//nlp工具包
implementation files('libs/stanford-corenlp-3.9.2-models.jar')
implementation files('libs/stanford-corenlp-3.9.2.jar')
//解决第三方包导入报错
implementation 'com.android.support:multidex:1.0.3'
}
代码解读
主要代码
//nlp自然语言处理
private void nlp() {
StringBuilder s= new StringBuilder();//显示内容
final String text=s_value.getText().toString();//获取输入内容
// set up pipeline properties
Properties props = new Properties();
// set the list of annotators to run
props.setProperty("annotators", "tokenize,ssplit,pos,lemma");
// build pipeline
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// create an empty Annotation just with the given text
Annotation document = new Annotation(text);
// run all Annotators on this text
pipeline.annotate(document);
// these are all the sentences in this document
// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
// traversing the words in the current sentence
// a CoreLabel is a CoreMap with additional token-specific methods
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);//分词
//判断是否为标点符号
if(check(word)){
continue;
}
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);//词性
pos = partOfSpeech(pos);//词性用中文表示
String lema = token.get(CoreAnnotations.LemmaAnnotation.class); // 获取对应上面word的词元信息,即我所需要的词形还原后的单词
s.append(String.format("句中单词:%s\n词形还原:%s\n词性分析:%s\n\n", word, lema, pos));//设置显示格式
}
}
s_result.setText(s.toString());
}
//词性用中文表示
private String partOfSpeech(String p){
switch (p){
case "CC":
p = "连接词";
break;
case "CD":
p = "基数词";
break;
case "DT":
p = "限定词";
break;
case "WP":
p = "疑问代词";
break;
case "VBZ":
p = "动词第三人称单数";
break;
case "NNP":
p = "单数专有名词";
break;
case "IN":
p = "介词或从属连词";
break;
case "RB":
p = "副词";
break;
case "EX":
p = "存在句";
break;
case "FW":
p = "外来词";
break;
case "JJ":
p = " 形容词或序数词";
break;
case "JJR":
p = "形容词比较级";
break;
case "JJS":
p = "形容词最高级";
break;
case "LS":
p = "列表标记";
break;
case "MD":
p = "情态词";
break;
case "NN":
p = "单数名词或不可数名词";
break;
case "NNS":
p = "复数名词";
break;
case "NNPS":
p = "复数专有名词";
break;
case "PDT":
p = "前限定词";
break;
case "POS":
p = "所有格结束词";
break;
case "PRP":
p = "人称代词";
break;
case "PRP$":
p = "所有格代词";
break;
case "RBR":
p = "副词比较级";
break;
case "RBS":
p = "副词最高级";
break;
case "RP":
p = "小品词";
break;
case "SYM":
p = "符号";
break;
case "TO":
p = "to作为介词或不定式格式";
break;
case "UH":
p = "感叹词";
break;
case "VB":
p = "动词";
break;
case "VBD":
p = "动词过去式";
break;
case "VBG":
p = "动名词和现在分词";
break;
case "VBN":
p = "过去分词";
break;
case "VBP":
p = "动词非第三人称单数";
break;
case "WDT":
p = "疑问限定词";
break;
case "WP$":
p = "所有格代词";
break;
case "WRB":
p = "疑问代词";
break;
}
return p;
}
/** * 该函数判断一个字符串是否包含标点符号(中文英文标点符号)。
* 原理是原字符串做一次清洗,清洗掉所有标点符号。
* 此时,如果原字符串包含标点符号,那么清洗后的长度和原字符串长度不同。返回true。
* 如果原字符串未包含标点符号,则清洗后长度不变。返回false。
*/
public boolean check(String s) {
boolean b = false;
String tmp = s;
tmp = tmp.replaceAll("\ p{P}", "");
if (s.length() != tmp.length()) {
b = true;
}
return b;
}
代码解读
未完成功能——语法树,依存句法
在Eclipse环境中正常运行,在Android系统上却会出现崩溃现象。可能是某个特定的文件加载过程中出现了问题,并未找到有效的解决方案。涉及Stanford CoreNLP 3.9.2模型组件的部分

下面是java中的代码
public class BasicPipelineExample {
public static void main(String[] args) {
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
String text = "I love you.";
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
System.out.println("语法树:");
System.out.println(tree.toString());
SemanticGraph dependencies = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
System.out.println("依存句法:");
System.out.println(dependencies.toString());
}
}
}
代码解读

最后一行在android中无法加载。

全部评论 (0)
还没有任何评论哟~
