package edu.cmu.meteor.util;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/cmu/meteor/util/SGMData.class */
public class SGMData {
    private static Pattern r_doc1 = Pattern.compile("<DOC", 2);
    private static Pattern r_doc2 = Pattern.compile("docid=\"([^\"]*)\"", 2);
    private static Pattern r_doc3 = Pattern.compile("sysid=\"([^\"]*)", 2);
    private static Pattern r_doc4 = Pattern.compile("<\\/DOC>", 2);
    private static Pattern r_seg1 = Pattern.compile("<\\s*seg\\s*id\\s*=\\s*\"?\\s*(.+?)\\s*\"?\\s*>", 2);
    private static Pattern r_seg2 = Pattern.compile("<\\s*seg\\s*>", 2);
    private static Pattern r_seg3 = Pattern.compile("<\\s*tstset", 2);
    private static Pattern r_seg4 = Pattern.compile("setid\\s*=\\s*\"([^\"]+)\"", 2);
    private static Pattern r_seg5 = Pattern.compile("<\\s*\\/seg\\s*>", 2);
    private static Pattern r_sp = Pattern.compile("\\s", 2);
    private static Pattern r_txt = Pattern.compile("<\\s*seg.*?>(.*)<\\s*\\/seg\\s*>", 2);
    private static String space = " ";
    public String testSetID;
    public String firstSysID;
    public HashSet<String> systems = new HashSet<>();
    public HashSet<String> references = new HashSet<>();
    public HashSet<String> segKeys = new HashSet<>();
    public Hashtable<String, String> segText = new Hashtable<>();
    public HashSet<String> docKeys = new HashSet<>();

    public static void populate(SGMData sGMData, String str, boolean z) throws IOException {
        int parseInt;
        String str2 = "";
        String str3 = "";
        int i = 0;
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            String str4 = readLine;
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            if (r_doc1.matcher(str4).find()) {
                Matcher matcher = r_doc2.matcher(str4);
                if (!matcher.find()) {
                    throw new IOException("Couldn't read document id from line: " + str4);
                }
                str2 = matcher.group(1);
                sGMData.docKeys.add(str2);
                Matcher matcher2 = r_doc3.matcher(str4);
                if (!matcher2.find()) {
                    throw new IOException("Couldn't read system id from line: " + str4);
                }
                str3 = matcher2.group(1);
                if (z) {
                    sGMData.references.add(str3);
                } else {
                    if (sGMData.systems.size() == 0) {
                        sGMData.firstSysID = str3;
                    }
                    sGMData.systems.add(str3);
                }
                i = 0;
            } else if (r_doc4.matcher(str4).find()) {
                str2 = "";
                str3 = "";
                i = 0;
            } else {
                Matcher matcher3 = r_seg1.matcher(str4);
                if (matcher3.find()) {
                    parseInt = Integer.parseInt(matcher3.group(1));
                } else if (r_seg2.matcher(str4).find()) {
                    i++;
                    parseInt = i;
                } else if (r_seg3.matcher(str4).find()) {
                    Matcher matcher4 = r_seg4.matcher(str4);
                    matcher4.find();
                    sGMData.testSetID = matcher4.group(1);
                }
                while (!r_seg5.matcher(str4).find()) {
                    str4 = str4 + bufferedReader.readLine();
                }
                Matcher matcher5 = r_txt.matcher(r_sp.matcher(str4).replaceAll(space));
                if (!matcher5.find()) {
                    throw new IOException("Couldn't read segment from line: " + str4);
                }
                String trim = matcher5.group(1).trim();
                if (str2.equals("")) {
                    throw new IOException("The following seems to be outside a DOC block: " + str4);
                }
                String str5 = str2 + "::" + parseInt;
                sGMData.segKeys.add(str5);
                sGMData.segText.put(str5 + "::" + str3, trim);
            }
        }
    }
}
