package edu.cmu.meteor.util;

import java.util.regex.Pattern;

/* loaded from: input_file:edu/cmu/meteor/util/Normalizer.class */
public class Normalizer {
    private static Pattern r_skip = Pattern.compile("<skipped>", 2);
    private static Pattern r_dashnl = Pattern.compile("-\\n");
    private static Pattern r_nl = Pattern.compile("\\n");
    private static Pattern r_quote = Pattern.compile("&quot;", 2);
    private static Pattern r_amp = Pattern.compile("&amp;", 2);
    private static Pattern r_lt = Pattern.compile("&lt;", 2);
    private static Pattern r_gt = Pattern.compile("&gt;", 2);
    private static Pattern r_u1 = Pattern.compile("[‘’‚‛]");
    private static Pattern r_u2 = Pattern.compile("[“”„‟]");
    private static Pattern r_punct1 = Pattern.compile("([\\{-\\~\\[-\\` -\\&\\(-\\+\\:-\\@\\/])");
    private static Pattern r_punct2 = Pattern.compile("([^0-9])([\\.,])");
    private static Pattern r_punct3 = Pattern.compile("([\\.,])([^0-9])");
    private static Pattern r_punct4 = Pattern.compile("([0-9])(-)");
    private static Pattern r_punct5 = Pattern.compile("[_#]");
    private static Pattern r_nonalpha = Pattern.compile("[^a-z0-9À-ÿ ]");
    private static Pattern r_norm1 = Pattern.compile("\\s+");
    private static Pattern r_norm2 = Pattern.compile("^\\s+");
    private static Pattern r_norm3 = Pattern.compile("\\s+$");
    private static String space = " ";
    private static String quote = "\"";
    private static String amp = "&";
    private static String lt = "<";
    private static String gt = ">";
    private static String apos = "'";
    private static String punct1 = " $1 ";
    private static String punct2 = "$1 $2 ";
    private static String punct3 = " $1 $2";
    private static String punct4 = "$1 $2 ";

    public static String normalizeLine(String str, int i, boolean z) {
        String replaceAll = r_u2.matcher(r_u1.matcher(r_gt.matcher(r_lt.matcher(r_amp.matcher(r_quote.matcher(r_nl.matcher(r_dashnl.matcher(r_skip.matcher(str).replaceAll("")).replaceAll("")).replaceAll(space)).replaceAll(quote)).replaceAll(amp)).replaceAll(lt)).replaceAll(gt)).replaceAll(apos)).replaceAll(quote);
        if (i == 0 || i == 1 || i == 4 || i == 3 || i == 2) {
            String str2 = " " + str.toLowerCase() + " ";
            if (z) {
                replaceAll = r_punct5.matcher(r_punct4.matcher(r_punct3.matcher(r_punct2.matcher(r_punct1.matcher(str2).replaceAll(punct1)).replaceAll(punct2)).replaceAll(punct3)).replaceAll(punct4)).replaceAll(space);
            } else {
                replaceAll = r_nonalpha.matcher(str2).replaceAll(space);
            }
        }
        return r_norm3.matcher(r_norm2.matcher(r_norm1.matcher(replaceAll).replaceAll(space)).replaceAll("")).replaceAll("");
    }
}
