package org.apache.uima.examples.tagger.trainAndTest;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:org/apache/uima/examples/tagger/trainAndTest/BrownReader.class */
public class BrownReader implements CorpusReader {
    @Override // org.apache.uima.examples.tagger.trainAndTest.CorpusReader
    public List<Token> read_corpus(String str, MappingInterface mappingInterface) {
        String[] list = new File(str).list();
        String[] strArr = new String[list.length];
        for (int i = 0; i < list.length; i++) {
            strArr[i] = str + "/" + list[i];
        }
        List<Token> arrayList = new ArrayList();
        Pattern compile = Pattern.compile("[ ]+");
        int i2 = 0;
        for (String str2 : strArr) {
            try {
                BufferedReader bufferedReader = new BufferedReader(new FileReader(str2));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    if (readLine.trim().length() > 0) {
                        i2++;
                        String[] split = compile.split(readLine);
                        for (int i3 = 0; i3 < split.length; i3++) {
                            split[i3] = split[i3].replaceAll("[\\n\\t]+", "");
                            if (split[i3].startsWith("//")) {
                                split[i3] = split[i3].replace("//", "per/");
                            }
                            if (split[i3].startsWith("/", 0)) {
                                split[i3] = split[i3].substring(1);
                            }
                            if (split[i3].contains("//")) {
                                int indexOf = split[i3].indexOf("//");
                                split[i3] = split[i3].substring(0, indexOf) + split[i3].substring(indexOf + 1);
                            }
                            if (split[i3].indexOf("/") != split[i3].lastIndexOf("/")) {
                                String[] split2 = split[i3].split("/");
                                StringBuffer stringBuffer = new StringBuffer();
                                for (int i4 = 0; i4 < split2.length - 1; i4++) {
                                    stringBuffer.append(split2[i4]);
                                }
                                stringBuffer.append("/");
                                stringBuffer.append(split2[split2.length - 1]);
                                split[i3] = stringBuffer.toString();
                            }
                            String[] split3 = split[i3].split("/");
                            arrayList.add(new Token(split3[0], split3[1]));
                        }
                    }
                }
                bufferedReader.close();
            } catch (IOException e) {
                System.out.println(e);
                return null;
            }
        }
        System.out.println(i2 + " sentences in the corpus");
        if (mappingInterface != null) {
            arrayList = mappingInterface.map_tags(arrayList);
        }
        return arrayList;
    }
}
