package org.elasticsearch.index.analysis;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.elasticsearch.analysis.PinyinConfig;
import org.nlpcn.commons.lang.pinyin.Pinyin;

/* loaded from: input_file:org/elasticsearch/index/analysis/PinyinTokenizer.class */
public class PinyinTokenizer extends Tokenizer {
    private static final int DEFAULT_BUFFER_SIZE = 256;
    private final CharTermAttribute termAtt;
    private boolean done;
    private boolean processedCandidate;
    private boolean processedSortCandidate;
    private boolean processedFirstLetter;
    private boolean processedFullPinyinLetter;
    private boolean processedOriginal;
    protected int position;
    protected int lastOffset;
    private OffsetAttribute offsetAtt;
    private PositionIncrementAttribute positionAttr;
    private PinyinConfig config;
    ArrayList<TermItem> candidate;
    protected int candidateOffset;
    private HashSet<String> termsFilter;
    StringBuilder firstLetters;
    StringBuilder fullPinyinLetters;
    private int lastIncrementPosition;
    String source;

    public PinyinTokenizer(PinyinConfig pinyinConfig) {
        this(DEFAULT_BUFFER_SIZE);
        this.config = pinyinConfig;
        if (!pinyinConfig.keepFirstLetter && !pinyinConfig.keepSeparateFirstLetter && !pinyinConfig.keepFullPinyin && !pinyinConfig.keepJoinedFullPinyin) {
            throw new ConfigErrorException("pinyin config error, can't disable separate_first_letter, first_letter and full_pinyin at the same time.");
        }
        this.candidate = new ArrayList<>();
        this.termsFilter = new HashSet<>();
        this.firstLetters = new StringBuilder();
        this.fullPinyinLetters = new StringBuilder();
    }

    public PinyinTokenizer(int i) {
        this.termAtt = addAttribute(CharTermAttribute.class);
        this.done = false;
        this.processedCandidate = false;
        this.processedSortCandidate = false;
        this.processedFirstLetter = false;
        this.processedFullPinyinLetter = false;
        this.processedOriginal = false;
        this.position = 0;
        this.lastOffset = 0;
        this.offsetAtt = addAttribute(OffsetAttribute.class);
        this.positionAttr = addAttribute(PositionIncrementAttribute.class);
        this.candidateOffset = 0;
        this.lastIncrementPosition = 0;
        this.termAtt.resizeBuffer(i);
    }

    void addCandidate(TermItem termItem) {
        String str = termItem.term;
        if (this.config.lowercase) {
            str = str.toLowerCase();
        }
        if (this.config.trimWhitespace) {
            str = str.trim();
        }
        termItem.term = str;
        if (str.length() == 0) {
            return;
        }
        String str2 = str + termItem.position;
        if (this.config.removeDuplicateTerm) {
            str2 = str;
        }
        if (this.termsFilter.contains(str2)) {
            return;
        }
        this.termsFilter.add(str2);
        this.candidate.add(termItem);
    }

    void setTerm(String str, int i, int i2, int i3) {
        if (this.config.lowercase) {
            str = str.toLowerCase();
        }
        if (this.config.trimWhitespace) {
            str = str.trim();
        }
        if (str.length() == 0) {
            return;
        }
        this.termAtt.setEmpty();
        this.termAtt.append(str);
        if (i < 0) {
            i = 0;
        }
        if (i2 < i) {
            i2 = i + str.length();
        }
        if (!this.config.ignorePinyinOffset) {
            this.offsetAtt.setOffset(correctOffset(i), correctOffset(i2));
        }
        int i4 = i3 - this.lastIncrementPosition;
        if (i4 < 0) {
            i4 = 0;
        }
        this.positionAttr.setPositionIncrement(i4);
        this.lastIncrementPosition = i3;
    }

    public final boolean incrementToken() throws IOException {
        clearAttributes();
        if (this.done) {
            return false;
        }
        if (!this.processedCandidate) {
            this.processedCandidate = true;
            int i = 0;
            char[] buffer = this.termAtt.buffer();
            while (true) {
                int read = this.input.read(buffer, i, buffer.length - i);
                if (read == -1) {
                    break;
                }
                i += read;
                if (i == buffer.length) {
                    buffer = this.termAtt.resizeBuffer(1 + buffer.length);
                }
            }
            this.termAtt.setLength(i);
            this.source = this.termAtt.toString();
            List pinyin = Pinyin.pinyin(this.source);
            if (pinyin.size() == 0) {
                return false;
            }
            StringBuilder sb = new StringBuilder();
            int i2 = 0;
            int i3 = 0;
            this.position = 0;
            for (int i4 = 0; i4 < this.source.length(); i4++) {
                char charAt = this.source.charAt(i4);
                if (charAt < 128) {
                    if (sb.length() <= 0) {
                        i2 = i4 + 1;
                    }
                    if ((charAt > '`' && charAt < '{') || ((charAt > '@' && charAt < '[') || (charAt > '/' && charAt < ':'))) {
                        if (this.config.keepNoneChinese && this.config.keepNoneChinese) {
                            if (this.config.keepNoneChineseTogether) {
                                sb.append(charAt);
                                i3++;
                            } else {
                                addCandidate(new TermItem(String.valueOf(charAt), i4, i4 + 1, i2));
                            }
                        }
                        if (this.config.keepNoneChineseInFirstLetter) {
                            this.firstLetters.append(charAt);
                        }
                        if (this.config.keepNoneChineseInJoinedFullPinyin) {
                            this.fullPinyinLetters.append(charAt);
                        }
                    }
                } else {
                    if (sb.length() > 0) {
                        i3 = parseBuff(sb, i3, i2);
                    }
                    String str = (String) pinyin.get(i4);
                    if (str != null && str.length() > 0) {
                        this.position++;
                        this.firstLetters.append(str.charAt(0));
                        if (this.config.keepSeparateFirstLetter & (str.length() > 1)) {
                            addCandidate(new TermItem(String.valueOf(str.charAt(0)), i4, i4 + 1, this.position));
                        }
                        if (this.config.keepFullPinyin) {
                            addCandidate(new TermItem(str, i4, i4 + 1, this.position));
                        }
                        if (this.config.keepJoinedFullPinyin) {
                            this.fullPinyinLetters.append(str);
                        }
                    }
                }
                this.lastOffset = i4;
            }
            if (sb.length() > 0) {
                parseBuff(sb, i3, i2);
            }
        }
        if (this.config.keepOriginal && !this.processedOriginal) {
            this.processedOriginal = true;
            addCandidate(new TermItem(this.source, 0, this.source.length(), 1));
        }
        if (this.config.keepJoinedFullPinyin && !this.processedFullPinyinLetter && this.fullPinyinLetters.length() > 0) {
            this.processedFullPinyinLetter = true;
            addCandidate(new TermItem(this.fullPinyinLetters.toString(), 0, this.source.length(), 1));
            this.fullPinyinLetters.setLength(0);
        }
        if (this.config.keepFirstLetter && this.firstLetters.length() > 0 && !this.processedFirstLetter) {
            this.processedFirstLetter = true;
            String sb2 = (this.firstLetters.length() <= this.config.LimitFirstLetterLength || this.config.LimitFirstLetterLength <= 0) ? this.firstLetters.toString() : this.firstLetters.substring(0, this.config.LimitFirstLetterLength);
            if (this.config.lowercase) {
                sb2 = sb2.toLowerCase();
            }
            if (!this.config.keepSeparateFirstLetter || sb2.length() > 1) {
                addCandidate(new TermItem(sb2, 0, sb2.length(), 1));
            }
        }
        if (!this.processedSortCandidate) {
            this.processedSortCandidate = true;
            Collections.sort(this.candidate);
        }
        if (this.candidateOffset >= this.candidate.size()) {
            this.done = true;
            return false;
        }
        TermItem termItem = this.candidate.get(this.candidateOffset);
        this.candidateOffset++;
        setTerm(termItem.term, termItem.startOffset, termItem.endOffset, termItem.position);
        return true;
    }

    private int parseBuff(StringBuilder sb, int i, int i2) {
        int i3;
        int length;
        if (this.config.keepNoneChinese) {
            if (this.config.noneChinesePinyinTokenize) {
                List<String> walk = PinyinAlphabetTokenizer.walk(sb.toString());
                int i4 = (this.lastOffset - i) + 1;
                for (int i5 = 0; i5 < walk.size(); i5++) {
                    String str = walk.get(i5);
                    if (this.config.fixedPinyinOffset) {
                        i3 = i4;
                        length = 1;
                    } else {
                        i3 = i4;
                        length = str.length();
                    }
                    int i6 = i3 + length;
                    int i7 = this.position + 1;
                    this.position = i7;
                    addCandidate(new TermItem(walk.get(i5), i4, i6, i7));
                    i4 = i6;
                }
            } else if (this.config.keepFirstLetter || this.config.keepSeparateFirstLetter || this.config.keepFullPinyin || !this.config.keepNoneChineseInJoinedFullPinyin) {
                String sb2 = sb.toString();
                int i8 = this.lastOffset - i;
                int i9 = this.lastOffset;
                int i10 = this.position + 1;
                this.position = i10;
                addCandidate(new TermItem(sb2, i8, i9, i10));
            }
        }
        sb.setLength(0);
        return 0;
    }

    public final void end() throws IOException {
        super.end();
    }

    public void reset() throws IOException {
        super.reset();
        this.position = 0;
        this.candidateOffset = 0;
        this.done = false;
        this.processedCandidate = false;
        this.processedFirstLetter = false;
        this.processedFullPinyinLetter = false;
        this.processedOriginal = false;
        this.processedSortCandidate = false;
        this.firstLetters.setLength(0);
        this.fullPinyinLetters.setLength(0);
        this.termsFilter.clear();
        this.candidate.clear();
        this.source = null;
        this.lastIncrementPosition = 0;
    }
}
