package org.wikidata.wdtk.dumpfiles.wmf;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import org.wikidata.wdtk.dumpfiles.DumpContentType;
import org.wikidata.wdtk.dumpfiles.MwDumpFile;
import org.wikidata.wdtk.util.DirectoryManager;
import org.wikidata.wdtk.util.WebResourceFetcher;

/* loaded from: input_file:wdtk-dumpfiles-0.3.0.jar:org/wikidata/wdtk/dumpfiles/wmf/WmfDumpFileManager.class */
public class WmfDumpFileManager {
    static final Logger logger = LoggerFactory.getLogger(WmfDumpFileManager.class);
    static final String DATE_STAMP_PATTERN = "\\d\\d\\d\\d\\d\\d\\d\\d";
    public static final String DOWNLOAD_DIRECTORY_NAME = "dumpfiles";
    final String projectName;
    final DirectoryManager dumpfileDirectoryManager;
    final WebResourceFetcher webResourceFetcher;

    public WmfDumpFileManager(String str, DirectoryManager directoryManager, WebResourceFetcher webResourceFetcher) throws IOException {
        this.projectName = str;
        this.dumpfileDirectoryManager = directoryManager.getSubdirectoryManager(DOWNLOAD_DIRECTORY_NAME).getSubdirectoryManager(str);
        this.webResourceFetcher = webResourceFetcher;
        logger.info("Using download directory " + this.dumpfileDirectoryManager.toString());
    }

    public List<MwDumpFile> findAllRelevantRevisionDumps(boolean z) {
        MwDumpFile findMostRecentDump = z ? findMostRecentDump(DumpContentType.CURRENT) : findMostRecentDump(DumpContentType.FULL);
        if (findMostRecentDump == null) {
            return findAllDumps(DumpContentType.DAILY);
        }
        ArrayList arrayList = new ArrayList();
        for (MwDumpFile mwDumpFile : findAllDumps(DumpContentType.DAILY)) {
            if (mwDumpFile.getDateStamp().compareTo(findMostRecentDump.getDateStamp()) > 0) {
                arrayList.add(mwDumpFile);
            }
        }
        arrayList.add(findMostRecentDump);
        if (logger.isInfoEnabled()) {
            StringBuilder sb = new StringBuilder();
            sb.append("Found " + arrayList.size() + " relevant dumps to process:");
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                sb.append("\n * ").append(((MwDumpFile) it.next()).toString());
            }
            logger.info(sb.toString());
        }
        return arrayList;
    }

    public MwDumpFile findMostRecentDump(DumpContentType dumpContentType) {
        List<MwDumpFile> findAllDumps = findAllDumps(dumpContentType);
        for (int i = 0; i < findAllDumps.size(); i++) {
            if (findAllDumps.get(i).isAvailable()) {
                return findAllDumps.get(i);
            }
        }
        return null;
    }

    public List<MwDumpFile> findAllDumps(DumpContentType dumpContentType) {
        List<MwDumpFile> findDumpsLocally = findDumpsLocally(dumpContentType);
        return this.webResourceFetcher != null ? mergeDumpLists(findDumpsLocally, findDumpsOnline(dumpContentType)) : findDumpsLocally;
    }

    List<MwDumpFile> mergeDumpLists(List<MwDumpFile> list, List<MwDumpFile> list2) {
        ArrayList arrayList = new ArrayList(list);
        HashSet hashSet = new HashSet();
        Iterator<MwDumpFile> it = list.iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().getDateStamp());
        }
        for (MwDumpFile mwDumpFile : list2) {
            if (!hashSet.contains(mwDumpFile.getDateStamp())) {
                arrayList.add(mwDumpFile);
            }
        }
        Collections.sort(arrayList, Collections.reverseOrder(new MwDumpFile.DateComparator()));
        return arrayList;
    }

    List<MwDumpFile> findDumpsLocally(DumpContentType dumpContentType) {
        try {
            List<String> subdirectories = this.dumpfileDirectoryManager.getSubdirectories(WmfDumpFile.getDumpFileDirectoryName(dumpContentType, Marker.ANY_MARKER));
            ArrayList arrayList = new ArrayList();
            Iterator<String> it = subdirectories.iterator();
            while (it.hasNext()) {
                String dateStampFromDumpFileDirectoryName = WmfDumpFile.getDateStampFromDumpFileDirectoryName(dumpContentType, it.next());
                if (dateStampFromDumpFileDirectoryName.matches(DATE_STAMP_PATTERN)) {
                    WmfLocalDumpFile wmfLocalDumpFile = new WmfLocalDumpFile(dateStampFromDumpFileDirectoryName, this.projectName, this.dumpfileDirectoryManager, dumpContentType);
                    if (wmfLocalDumpFile.isAvailable()) {
                        arrayList.add(wmfLocalDumpFile);
                    } else {
                        logger.error("Incomplete local dump file data. Maybe delete " + wmfLocalDumpFile.getDumpfileDirectory() + " to attempt fresh download.");
                    }
                }
            }
            Collections.sort(arrayList, Collections.reverseOrder(new MwDumpFile.DateComparator()));
            return arrayList;
        } catch (IOException e) {
            logger.error("Unable to access dump directory: " + e.toString());
            return Collections.emptyList();
        }
    }

    List<MwDumpFile> findDumpsOnline(DumpContentType dumpContentType) {
        List<String> findDumpDatesOnline = findDumpDatesOnline(dumpContentType);
        ArrayList arrayList = new ArrayList();
        for (String str : findDumpDatesOnline) {
            if (dumpContentType == DumpContentType.DAILY) {
                arrayList.add(new WmfOnlineDailyDumpFile(str, this.projectName, this.webResourceFetcher, this.dumpfileDirectoryManager));
            } else if (dumpContentType == DumpContentType.JSON) {
                arrayList.add(new JsonOnlineDumpFile(str, this.projectName, this.webResourceFetcher, this.dumpfileDirectoryManager));
            } else {
                arrayList.add(new WmfOnlineStandardDumpFile(str, this.projectName, this.webResourceFetcher, this.dumpfileDirectoryManager, dumpContentType));
            }
        }
        return arrayList;
    }

    List<String> findDumpDatesOnline(DumpContentType dumpContentType) {
        ArrayList arrayList = new ArrayList();
        try {
            InputStream inputStreamForUrl = this.webResourceFetcher.getInputStreamForUrl(WmfDumpFile.getDumpFileWebDirectory(dumpContentType, this.projectName));
            Throwable th = null;
            try {
                try {
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStreamForUrl, StandardCharsets.UTF_8));
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        if (readLine.startsWith("<tr><td class=\"n\">")) {
                            String substring = readLine.substring(27, 35);
                            if (substring.matches(DATE_STAMP_PATTERN)) {
                                arrayList.add(substring);
                            }
                        }
                    }
                    bufferedReader.close();
                    if (inputStreamForUrl != null) {
                        if (0 != 0) {
                            try {
                                inputStreamForUrl.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            inputStreamForUrl.close();
                        }
                    }
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            logger.error("Failed to fetch available dump dates online.");
        }
        Collections.sort(arrayList, Collections.reverseOrder());
        return arrayList;
    }
}
