package gov.nih.nlm.nls.nlp.nlsstrings;

import gov.nih.nlm.nls.nlp.textfeatures.TokenChars;
import gov.nih.nlm.nls.utils.StringUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.StringTokenizer;

/* loaded from: input_file:gov/nih/nlm/nls/nlp/nlsstrings/MetamapTokenization.class */
public final class MetamapTokenization {
    public static final String UTTER_TOKEN_DELIMITERS = " \t\n\r\f$@\\{|}:;~(),.-&/'+<>*^[]=#?%\"!_";
    public static final String TOKEN_DELIMITERS = " \t\n\r\f$|~";
    public static final String WORD_DELIMITERS = " \t\n\r\f$%\\{|~(),-&/";

    public static List tokenizeTextUtterly(String str) {
        ArrayList arrayList = new ArrayList();
        StringTokenizer stringTokenizer = new StringTokenizer(str, UTTER_TOKEN_DELIMITERS);
        while (stringTokenizer.hasMoreTokens()) {
            arrayList.add(stringTokenizer.nextToken());
        }
        return arrayList;
    }

    public static List tokenizeTextMM(String str) {
        return removePossessivesAndNonwords(tokenizeTextUtterly(str));
    }

    public static String normalizeText(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        StringTokenizer stringTokenizer = new StringTokenizer(str, WORD_DELIMITERS, true);
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (nextToken.length() != 1 || WORD_DELIMITERS.indexOf(nextToken) < 0) {
                stringBuffer.append(nextToken).append(TokenChars.SPACE_s);
            }
        }
        return stringBuffer.toString();
    }

    public static String removePossessives(String str) {
        int lastIndexOf = str.lastIndexOf("'s");
        if (lastIndexOf < 0 || lastIndexOf != str.length() - 2) {
            int lastIndexOf2 = str.lastIndexOf(TokenChars.SINGLE_QUOTE_S);
            if (lastIndexOf2 >= 0 && lastIndexOf2 == str.length() - 1 && lastIndexOf2 != 0 && str.charAt(lastIndexOf2 - 1) == 's') {
                return new StringBuffer().append(str.substring(0, lastIndexOf2)).append(str.substring(lastIndexOf2 + 1, str.length())).toString();
            }
        } else if (lastIndexOf - 1 >= 0 && Character.isLetterOrDigit(str.charAt(lastIndexOf - 1))) {
            return new StringBuffer().append(str.substring(0, lastIndexOf)).append(str.substring(lastIndexOf + 2, str.length())).toString();
        }
        return str;
    }

    public static boolean isWsWord(String str) {
        for (int i = 0; i < str.length(); i++) {
            if (Character.isDigit(str.charAt(i))) {
                return false;
            }
            if (!Character.isLetter(str.charAt(i)) && str.charAt(i) != '\'') {
                return false;
            }
        }
        return true;
    }

    public static String removePossessivesAndNonwords(String str) {
        return isWsWord(str) ? removePossessives(str) : str;
    }

    public static List removePossessivesAndNonwords(List list) {
        ListIterator listIterator = list.listIterator();
        while (listIterator.hasNext()) {
            String str = (String) listIterator.next();
            listIterator.remove();
            listIterator.add(removePossessivesAndNonwords(str));
        }
        return list;
    }

    public static void main(String[] strArr) {
        if (strArr.length > 0) {
            StringBuffer stringBuffer = new StringBuffer(strArr[0]);
            for (int i = 1; i < strArr.length; i++) {
                stringBuffer.append(TokenChars.SPACE_s).append(strArr[i]);
            }
            String stringBuffer2 = stringBuffer.toString();
            System.out.println(new StringBuffer().append(stringBuffer2).append(" -> tokenize_text_mm -> ").append(StringUtils.list(tokenizeTextMM(stringBuffer2))).toString());
            System.out.println(new StringBuffer().append(stringBuffer2).append(" -> tokenize_text_utterly -> ").append(StringUtils.list(tokenizeTextUtterly(stringBuffer2))).toString());
        }
    }
}
