package gov.nih.nlm.nls.nlp.tokenizer;

import gov.nih.nlm.nls.nlp.textfeatures.Category;
import gov.nih.nlm.nls.utils.Debug;
import gov.nih.nlm.nls.utils.GlobalBehavior;
import gov.nih.nlm.nls.utils.U;
import java.util.StringTokenizer;

/* loaded from: input_file:gov/nih/nlm/nls/nlp/tokenizer/TokenizerFactory.class */
public class TokenizerFactory {
    private static final int DT12920 = 12920;
    private static final int DF12921 = 12921;

    public static TokenizerInterface build(GlobalBehavior globalBehavior) throws Exception {
        TokenizerInterface tokenizerInterface = null;
        Debug.dfname("build:WithOutPeek");
        Debug.denter(DT12920);
        String string = globalBehavior.getString("--inputType");
        Debug.dpr(DF12921, new StringBuffer().append("The input type = ").append(string).toString());
        if (string == null) {
            if (globalBehavior.getApplicationName().equals("MMTx")) {
                Debug.dpr(DF12921, "Setting the text Type to free text");
            }
            Debug.dpr(DF12921, "Setting the text Type to free text");
            tokenizerInterface = new FreeTextTokenizer(globalBehavior);
            Debug.dpr(DF12921, "Setting the text Type to free text");
            if (tokenizerInterface == null) {
                Debug.warning("The freeTextTokenizer is null");
            }
        } else if (string.compareToIgnoreCase("medlineCitations") == 0) {
            if (globalBehavior.getApplicationName().equals("MMTx")) {
                Debug.dpr(DF12921, "Setting the text type to medLine citations");
            }
            tokenizerInterface = new PubMedMEDLINEFormatTokenizer(globalBehavior);
        } else if (string.compareToIgnoreCase("fieldedText") == 0) {
            if (globalBehavior.getApplicationName().equals("MMTx")) {
                Debug.dpr(DF12921, "Setting the text Type to delimited text");
            }
            tokenizerInterface = new DelimitedTextTokenizer(globalBehavior);
        } else if (string.compareToIgnoreCase("mrcon") == 0) {
            if (globalBehavior.getApplicationName().equals("MMTx")) {
                Debug.dpr(DF12921, "Setting the text Type to delimited text");
            }
            tokenizerInterface = new DelimitedTextTokenizer(globalBehavior);
        } else if (string.compareToIgnoreCase("freeText") == 0) {
            if (globalBehavior.getApplicationName().equals("MMTx")) {
                Debug.dpr(DF12921, "Setting the text Type to free text");
            }
            tokenizerInterface = new FreeTextTokenizer(globalBehavior);
        } else if (string.compareToIgnoreCase("autodetect") == 0) {
            if (globalBehavior.getApplicationName().equals("MMTx")) {
                Debug.dpr(DF12921, "Setting the text Type to free text");
            }
            tokenizerInterface = new FreeTextTokenizer(globalBehavior);
        }
        Debug.dexit(DT12920);
        return tokenizerInterface;
    }

    public static TokenizerInterface build(GlobalBehavior globalBehavior, String str) throws Exception {
        TokenizerInterface tokenizerInterface = null;
        Debug.dfname("build:WithPeek");
        Debug.denter(DT12920);
        String string = globalBehavior.getString("--inputType");
        if (string == null || string.equals("NULL") || string.equals("autodetect")) {
            String str2 = null;
            String str3 = null;
            int i = 100;
            Debug.dpr(DF12921, new StringBuffer().append("About to peek, The input type == ").append(string).toString());
            if (str != null && str.length() > 0) {
                if (str.length() < 100) {
                    i = str.length() - 1;
                }
                str2 = new String(str.substring(0, i));
                Debug.dpr(DF12921, new StringBuffer().append("The first 100 chars = |").append(str2).append(Category.CATEGORY_BAR2).toString());
                StringTokenizer stringTokenizer = new StringTokenizer(str2);
                while (str3 == null && stringTokenizer.hasMoreTokens()) {
                    str3 = stringTokenizer.nextToken();
                }
            }
            if (str3 != null) {
                String replace = str3.replace('-', ' ');
                if (CitationSectionHeadings.get(replace.trim()) != null) {
                    tokenizerInterface = new PubMedMEDLINEFormatTokenizer(globalBehavior);
                    globalBehavior.set("--inputType=medlineCitations");
                    Debug.dpr(DF12921, "Setting the text type medLine citations");
                } else if (replace.trim().equals("1:")) {
                    tokenizerInterface = new PubMedCitationFormatTokenizer(globalBehavior);
                }
            }
            if (tokenizerInterface == null) {
                if (str2 != null && str2.indexOf(Category.CATEGORY_BAR2) > 0) {
                    tokenizerInterface = new DelimitedTextTokenizer(globalBehavior);
                    globalBehavior.set("--inputType=fieldedText");
                    Debug.dpr(DF12921, "Setting the text Type to delimited text");
                } else if (str2 == null || !looksLikeTermFile(str2)) {
                    tokenizerInterface = new FreeTextTokenizer(globalBehavior);
                    globalBehavior.set("--inputType=freeText");
                    Debug.dpr(DF12921, "Setting the text Type to free text");
                } else {
                    Debug.dpr(DF12921, "Setting the text Type to term file text");
                    globalBehavior.set("--inputType=freeText");
                    tokenizerInterface = new FreeTextTokenizer(globalBehavior);
                    tokenizerInterface.setInteractiveMode();
                }
            }
        } else {
            Debug.dpr(DF12921, new StringBuffer().append("The input type == ").append(string).toString());
            if (string.compareToIgnoreCase("medlineCitations") == 0) {
                if (globalBehavior.getApplicationName().equals("MMTx")) {
                    Debug.dpr(DF12921, "Setting the text type to medLine citations");
                }
                tokenizerInterface = new PubMedMEDLINEFormatTokenizer(globalBehavior);
            } else if (string.compareToIgnoreCase("fieldedText") == 0) {
                if (globalBehavior.getApplicationName().equals("MMTx")) {
                    Debug.dpr(DF12921, "Setting the text Type to delimited text");
                }
                tokenizerInterface = new DelimitedTextTokenizer(globalBehavior);
            } else if (string.compareToIgnoreCase("mrcon") == 0) {
                if (globalBehavior.getApplicationName().equals("MMTx")) {
                    Debug.dpr(DF12921, "Setting the text Type to delimited text");
                }
                tokenizerInterface = new DelimitedTextTokenizer(globalBehavior);
            } else if (string.compareToIgnoreCase("freeText") == 0) {
                if (globalBehavior.getApplicationName().equals("MMTx")) {
                    Debug.dpr(DF12921, "Setting the text Type to free text");
                }
                tokenizerInterface = new FreeTextTokenizer(globalBehavior);
            }
        }
        Debug.dexit(DT12920);
        return tokenizerInterface;
    }

    private static boolean looksLikeTermFile(String str) {
        Debug.dfname("looksLikeTermFile");
        Debug.denter(DT12920);
        char[] charArray = str.toCharArray();
        boolean z = false;
        for (int i = 0; i < charArray.length; i++) {
            if (U.isWindows()) {
                if (i + 3 < charArray.length && charArray[i] == '\r' && charArray[i + 1] == '\n' && charArray[i + 2] == '\r' && charArray[i + 3] == '\n') {
                    z = true;
                }
            } else if (i + 3 < charArray.length && charArray[i] == '\n' && charArray[i + 1] == '\n') {
                z = true;
            }
        }
        Debug.dexit(DT12920);
        return z;
    }
}
