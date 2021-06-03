Provide a high level wrapper for kuromoji.js.
npm install kuromojin
Export two API.
getTokenizer() return
Promise that is resolved with kuromoji.js's
tokenizer instance.
tokenize() return
Promise that is resolved with analyzed tokens.
import {tokenize, getTokenizer} from "kuromojin";
getTokenizer().then(tokenizer => {
// kuromoji.js's `tokenizer` instance
});
tokenize(text).then(tokens => {
console.log(tokens)
/*
[ {
word_id: 509800, // 辞書内での単語ID
word_type: 'KNOWN', // 単語タイプ(辞書に登録されている単語ならKNOWN, 未知語ならUNKNOWN)
word_position: 1, // 単語の開始位置
surface_form: '黒文字', // 表層形
pos: '名詞', // 品詞
pos_detail_1: '一般', // 品詞細分類1
pos_detail_2: '*', // 品詞細分類2
pos_detail_3: '*', // 品詞細分類3
conjugated_type: '*', // 活用型
conjugated_form: '*', // 活用形
basic_form: '黒文字', // 基本形
reading: 'クロモジ', // 読み
pronunciation: 'クロモジ' // 発音
} ]
*/
});
If
window.kuromojin.dicPath is defined, kuromojin use it as default dict path.
import {getTokenizer} from "kuromojin";
// Affect all module that are used kuromojin.
window.kuromojin = {
dicPath: "https://cdn.jsdelivr.net/npm/kuromoji@0.1.2/dict"
};
// this `getTokenizer` function use "https://kuromojin.netlify.com/dict"
getTokenizer();
// ===
getTokenizer({dicPath: "https://cdn.jsdelivr.net/npm/kuromoji@0.1.2/dict"})
📝 Test dictionary URL
kuromojin v1.1.0 export
tokenize as default function.
kuromojin v2.0.0 remove the default function.
import kuromojin from "kuromojin";
// kuromojin === tokenize
Recommended: use
import {tokenize} from "kuromojin" instead of it
import {tokenize} from "kuromojin";
kuromojin pin kuromoji's version.
It aim to dedupe kuromoji's dictionary. The dictionary is large and avoid to duplicated dictionary.
