/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.process;

import edu.stanford.nlp.ling.BasicDocument;
import edu.stanford.nlp.ling.Document;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.process.AbstractListProcessor;
import edu.stanford.nlp.process.StripTagsProcessor;
import edu.stanford.nlp.util.Generics;
import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class WordToSentenceProcessor<IN, L, F>
extends AbstractListProcessor<IN, List<IN>, L, F> {
    private static final boolean DEBUG = false;
    private Set<String> sentenceBoundaryTokens;
    private Set<String> sentenceBoundaryFollowers;
    private Set<String> sentenceBoundaryToDiscard;
    private Pattern sentenceRegionBeginPattern;
    private Pattern sentenceRegionEndPattern;

    @Override
    public List<List<IN>> process(List<IN> words) {
        ArrayList<List<IN>> sentences = Generics.newArrayList();
        ArrayList<IN> currentSentence = null;
        ArrayList<IN> lastSentence = null;
        boolean insideRegion = false;
        for (IN o : words) {
            String word;
            if (o instanceof HasWord) {
                HasWord h = (HasWord)o;
                word = h.word();
            } else if (o instanceof String) {
                word = (String)o;
            } else {
                throw new RuntimeException("Expected token to be either Word or String.");
            }
            if (currentSentence == null) {
                currentSentence = new ArrayList<IN>();
            }
            if (this.sentenceRegionBeginPattern != null && !insideRegion) {
                if (!this.sentenceRegionBeginPattern.matcher(word).matches()) continue;
                insideRegion = true;
                continue;
            }
            if (this.sentenceBoundaryFollowers.contains(word) && lastSentence != null && currentSentence.isEmpty()) {
                lastSentence.add(o);
                continue;
            }
            boolean newSent = false;
            if (this.sentenceBoundaryToDiscard.contains(word)) {
                newSent = true;
            } else if (this.sentenceRegionEndPattern != null && this.sentenceRegionEndPattern.matcher(word).matches()) {
                insideRegion = false;
                newSent = true;
            } else if (this.sentenceBoundaryTokens.contains(word)) {
                currentSentence.add(o);
                newSent = true;
            } else {
                currentSentence.add(o);
            }
            if (!newSent || currentSentence.size() <= 0) continue;
            sentences.add(currentSentence);
            lastSentence = currentSentence;
            currentSentence = null;
        }
        if (currentSentence != null && currentSentence.size() > 0) {
            sentences.add(currentSentence);
        }
        return sentences;
    }

    public WordToSentenceProcessor() {
        this(new HashSet<String>(Arrays.asList(".", "?", "!")));
    }

    public WordToSentenceProcessor(Set<String> boundaryTokens) {
        this(boundaryTokens, Generics.newHashSet(Arrays.asList(")", "]", "\"", "'", "''", "-RRB-", "-RSB-", "-RCB-")));
    }

    public WordToSentenceProcessor(Set<String> boundaryTokens, Set<String> boundaryFollowers) {
        this(boundaryTokens, boundaryFollowers, Collections.singleton("\n"));
    }

    public WordToSentenceProcessor(Set<String> boundaryTokens, Set<String> boundaryFollowers, Set<String> boundaryToDiscard) {
        this(boundaryTokens, boundaryFollowers, boundaryToDiscard, null, null);
    }

    public WordToSentenceProcessor(Pattern regionBeginPattern, Pattern regionEndPattern) {
        this(Collections.emptySet(), Collections.emptySet(), Collections.emptySet(), regionBeginPattern, regionEndPattern);
    }

    private WordToSentenceProcessor(Set<String> boundaryTokens, Set<String> boundaryFollowers, Set<String> boundaryToDiscard, Pattern regionBeginPattern, Pattern regionEndPattern) {
        this.sentenceBoundaryTokens = boundaryTokens;
        this.sentenceBoundaryFollowers = boundaryFollowers;
        this.sentenceBoundaryToDiscard = boundaryToDiscard;
        this.sentenceRegionBeginPattern = regionBeginPattern;
        this.sentenceRegionEndPattern = regionEndPattern;
    }

    public static void main(String[] args) {
        if (args.length == 0) {
            System.out.println("usage: java edu.stanford.nlp.process.WordToSentenceProcessor fileOrUrl");
            System.exit(0);
        }
        try {
            for (String filename : args) {
                Document d;
                if (filename.startsWith("http://")) {
                    BasicDocument dpre = new BasicDocument().init(new URL(filename));
                    StripTagsProcessor notags = new StripTagsProcessor();
                    d = notags.processDocument(dpre);
                } else {
                    d = new BasicDocument().init(new File(filename));
                }
                WordToSentenceProcessor proc = new WordToSentenceProcessor();
                Document sentd = proc.processDocument(d);
                for (Sentence sent : sentd) {
                    System.out.println(sent);
                }
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

