import { builder, IpadicFeatures, Tokenizer } from 'kuromoji';

export const nGram = (sentence: string, n: number): string[] =>
  sentence.length <= n? [sentence]
    : Array(1 + sentence.length - n)
      .fill(undefined)
      .map(() => 0)
      .map((_, i) => sentence.slice(i, n + i));

export const nGramToVector = (left: string[], right: string[]): number[] => left.map(r => right.includes(r)? 1 : 0)

export const getTokenizer = (): Promise<Tokenizer<IpadicFeatures>> =>
  new Promise((resolve, reject) => {
    builder({ dicPath: process.env.NODE_ENV === 'test'? 'public/dict' : '/dict' }).build((err, tokenizer) => {
      if (err) reject(err);
      resolve(tokenizer);
    });
  });

// Note: Convert A1A１かなカナｶﾅ漢字 into A1かな
export const normalizeSentence = (sentence: string, tokenizer: Tokenizer<IpadicFeatures>): string =>
  kataToHira(kanjiToKata(halfKataToFullKata(fullAlphanumericToHalfAlphanumeric(sentence)), tokenizer))

const fullAlphanumericToHalfAlphanumeric = (sentence: string) =>
  sentence.replace(/[Ａ-Ｚａ-ｚ０-９]/g, (match) => String.fromCharCode(match.charCodeAt(0) - 0xFEE0))

const halfKataToFullKata = (sentence: string) => {
  const halfKataToFullKataMap = {
    'ｶﾞ': 'ガ', 'ｷﾞ': 'ギ', 'ｸﾞ': 'グ', 'ｹﾞ': 'ゲ', 'ｺﾞ': 'ゴ',
    'ｻﾞ': 'ザ', 'ｼﾞ': 'ジ', 'ｽﾞ': 'ズ', 'ｾﾞ': 'ゼ', 'ｿﾞ': 'ゾ',
    'ﾀﾞ': 'ダ', 'ﾁﾞ': 'ヂ', 'ﾂﾞ': 'ヅ', 'ﾃﾞ': 'デ', 'ﾄﾞ': 'ド',
    'ﾊﾞ': 'バ', 'ﾋﾞ': 'ビ', 'ﾌﾞ': 'ブ', 'ﾍﾞ': 'ベ', 'ﾎﾞ': 'ボ',
    'ﾊﾟ': 'パ', 'ﾋﾟ': 'ピ', 'ﾌﾟ': 'プ', 'ﾍﾟ': 'ペ', 'ﾎﾟ': 'ポ',
    'ｳﾞ': 'ヴ', 'ﾜﾞ': 'ヷ', 'ｦﾞ': 'ヺ',
    'ｱ': 'ア', 'ｲ': 'イ', 'ｳ': 'ウ', 'ｴ': 'エ', 'ｵ': 'オ',
    'ｶ': 'カ', 'ｷ': 'キ', 'ｸ': 'ク', 'ｹ': 'ケ', 'ｺ': 'コ',
    'ｻ': 'サ', 'ｼ': 'シ', 'ｽ': 'ス', 'ｾ': 'セ', 'ｿ': 'ソ',
    'ﾀ': 'タ', 'ﾁ': 'チ', 'ﾂ': 'ツ', 'ﾃ': 'テ', 'ﾄ': 'ト',
    'ﾅ': 'ナ', 'ﾆ': 'ニ', 'ﾇ': 'ヌ', 'ﾈ': 'ネ', 'ﾉ': 'ノ',
    'ﾊ': 'ハ', 'ﾋ': 'ヒ', 'ﾌ': 'フ', 'ﾍ': 'ヘ', 'ﾎ': 'ホ',
    'ﾏ': 'マ', 'ﾐ': 'ミ', 'ﾑ': 'ム', 'ﾒ': 'メ', 'ﾓ': 'モ',
    'ﾔ': 'ヤ', 'ﾕ': 'ユ', 'ﾖ': 'ヨ',
    'ﾗ': 'ラ', 'ﾘ': 'リ', 'ﾙ': 'ル', 'ﾚ': 'レ', 'ﾛ': 'ロ',
    'ﾜ': 'ワ', 'ｦ': 'ヲ', 'ﾝ': 'ン',
    'ｧ': 'ァ', 'ｨ': 'ィ', 'ｩ': 'ゥ', 'ｪ': 'ェ', 'ｫ': 'ォ',
    'ｯ': 'ッ', 'ｬ': 'ャ', 'ｭ': 'ュ', 'ｮ': 'ョ',
    '｡': '。', '､': '、', 'ｰ': 'ー', '｢': '「', '｣': '」', '･': '・'
  };
  const reg =  new RegExp('(' + Object.keys(halfKataToFullKataMap).join('|') + ')', 'g');
  return sentence
    .replace(reg, (match) => halfKataToFullKataMap[match as keyof typeof halfKataToFullKataMap])
    .replace(/゛/g, 'ﾞ')
    .replace(/゜/g, 'ﾟ');
}

// Note: Use getTokenizer on this file to get tokenizer.
const kanjiToKata = (sentence: string, tokenizer: Tokenizer<IpadicFeatures>) =>
  tokenizer.tokenize(sentence)
    .map(token => token.word_type === 'KNOWN'? token.reading : token.surface_form)
    .filter(reading => reading !== undefined)
    .join('')

const kataToHira = (sentence: string) =>
  sentence.replace(/[\u30A1-\u30FA]/g, ch => String.fromCharCode(ch.charCodeAt(0) - 0x60));
 