/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.net.urlnormalizer.basic;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.lang.invoke.MethodHandles;
import java.net.IDN;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.net.URLNormalizer;
import org.apache.nutch.util.NutchConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BasicURLNormalizer
implements URLNormalizer {
    private static final Logger LOG;
    public static final String NORM_HOST_IDN = "urlnormalizer.basic.host.idn";
    public static final String NORM_HOST_TRIM_TRAILING_DOT = "urlnormalizer.basic.host.trim-trailing-dot";
    private static final Pattern hasNormalizablePathPattern;
    private static final Pattern unescapeRulePattern;
    private static final Charset utf8;
    private static final boolean[] unescapedCharacters;
    private static final boolean[] escapedCharacters;
    private Configuration conf;
    private boolean hostIDNtoASCII;
    private boolean hostASCIItoIDN;
    private boolean hostTrimTrailingDot;

    private static boolean isAlphaNumeric(int c) {
        return 65 <= c && c <= 90 || 97 <= c && c <= 122 || 48 <= c && c <= 57;
    }

    private static boolean isHexCharacter(int c) {
        return 65 <= c && c <= 70 || 97 <= c && c <= 102 || 48 <= c && c <= 57;
    }

    private static boolean isAscii(String str) {
        char[] chars;
        for (char c : chars = str.toCharArray()) {
            if (c <= '\u007f') continue;
            return false;
        }
        return true;
    }

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        String normIdn = conf.get(NORM_HOST_IDN, "");
        if (normIdn.equalsIgnoreCase("toAscii")) {
            this.hostIDNtoASCII = true;
        } else if (normIdn.equalsIgnoreCase("toUnicode")) {
            this.hostASCIItoIDN = true;
        }
        this.hostTrimTrailingDot = conf.getBoolean(NORM_HOST_TRIM_TRAILING_DOT, false);
    }

    public String normalize(String urlString, String scope) throws MalformedURLException {
        if ("".equals(urlString)) {
            return urlString;
        }
        urlString = urlString.trim();
        URL url = new URL(urlString);
        String protocol = url.getProtocol();
        String host = url.getHost();
        int port = url.getPort();
        Object file = url.getFile();
        boolean changed = false;
        boolean normalizePath = false;
        if (!urlString.startsWith(protocol)) {
            changed = true;
        }
        if ("http".equals(protocol) || "https".equals(protocol) || "ftp".equals(protocol)) {
            if (host != null && url.getAuthority() != null) {
                String newHost = this.normalizeHostName(host);
                if (!host.equals(newHost)) {
                    host = newHost;
                    changed = true;
                } else if (!url.getAuthority().equals(newHost)) {
                    changed = true;
                }
            } else {
                changed = true;
            }
            if (port == url.getDefaultPort()) {
                port = -1;
                changed = true;
            }
            normalizePath = true;
            if (file == null || "".equals(file)) {
                file = "/";
                changed = true;
                normalizePath = false;
            } else if (!((String)file).startsWith("/")) {
                file = "/" + (String)file;
                changed = true;
                normalizePath = false;
            }
            if (url.getRef() != null) {
                changed = true;
            }
        } else if (protocol.equals("file")) {
            normalizePath = true;
        }
        String file2 = this.unescapePath((String)file);
        if (!((String)file).equals(file2 = this.escapePath(file2))) {
            changed = true;
            file = file2;
        }
        if (normalizePath) {
            if (changed) {
                URL u = new URL(protocol, host, port, (String)file);
                file2 = this.getFileWithNormalizedPath(u);
            } else {
                file2 = this.getFileWithNormalizedPath(url);
            }
            if (!((String)file).equals(file2)) {
                changed = true;
                file = file2;
            }
        }
        if (changed) {
            if (protocol.equals("http") || protocol.equals("https") || url.getUserInfo() == null) {
                url = new URL(protocol, host, port, (String)file);
                urlString = url.toString();
            } else {
                StringBuilder sb = new StringBuilder();
                sb.append(protocol).append("://").append(url.getUserInfo()).append('@').append(host);
                if (port != -1) {
                    sb.append(':').append(port);
                }
                sb.append((String)file);
                urlString = sb.toString();
            }
        }
        return urlString;
    }

    private String getFileWithNormalizedPath(URL url) throws MalformedURLException {
        Object file;
        if (hasNormalizablePathPattern.matcher(url.getPath()).find()) {
            try {
                file = url.toURI().normalize().toURL().getFile();
                int start = 0;
                while (((String)file).startsWith("/..", start) && (start + 3 == ((String)file).length() || ((String)file).charAt(3) == '/')) {
                    start += 3;
                }
                if (start > 0) {
                    file = ((String)file).substring(start);
                }
            }
            catch (URISyntaxException e) {
                file = url.getFile();
            }
        } else {
            file = url.getFile();
        }
        if (((String)file).isEmpty()) {
            file = "/";
        } else if (!((String)file).startsWith("/")) {
            file = "/" + (String)file;
        }
        return file;
    }

    private String unescapePath(String path) {
        int letter;
        StringBuilder sb = new StringBuilder();
        Matcher matcher = unescapeRulePattern.matcher(path);
        int end = -1;
        while (matcher.find()) {
            sb.append(path.substring(end + 1, matcher.start()));
            letter = Integer.valueOf(matcher.group().substring(1), 16);
            if (letter < 128 && unescapedCharacters[letter]) {
                sb.append(Character.valueOf((char)letter));
            } else {
                sb.append(matcher.group().toUpperCase(Locale.ROOT));
            }
            end = matcher.start() + 2;
        }
        letter = path.length();
        if (end <= letter - 1) {
            sb.append(path.substring(end + 1, letter));
        }
        return sb.toString();
    }

    private String escapePath(String path) {
        StringBuilder sb = new StringBuilder(path.length());
        byte[] bytes = path.getBytes(utf8);
        for (int i = 0; i < bytes.length; ++i) {
            byte b = bytes[i];
            if (b < 0 || escapedCharacters[b]) {
                sb.append('%');
                String hex = Integer.toHexString(b & 0xFF).toUpperCase(Locale.ROOT);
                if (hex.length() % 2 != 0) {
                    sb.append('0');
                    sb.append(hex);
                    continue;
                }
                sb.append(hex);
                continue;
            }
            if (b == 37) {
                if (i + 2 >= bytes.length) {
                    sb.append("%25");
                    continue;
                }
                byte e1 = bytes[i + 1];
                byte e2 = bytes[i + 2];
                if (BasicURLNormalizer.isHexCharacter(e1) && BasicURLNormalizer.isHexCharacter(e2)) {
                    i += 2;
                    sb.append((char)b);
                    sb.append((char)e1);
                    sb.append((char)e2);
                    continue;
                }
                sb.append("%25");
                continue;
            }
            sb.append((char)b);
        }
        return sb.toString();
    }

    private String normalizeHostName(String host) throws MalformedURLException {
        if (host.indexOf(37) != -1) {
            try {
                host = URLDecoder.decode(host, StandardCharsets.UTF_8.toString());
            }
            catch (UnsupportedEncodingException | IllegalArgumentException e) {
                LOG.debug("Failed to convert percent-encoded host name {}: ", (Object)host, (Object)e);
                throw (MalformedURLException)new MalformedURLException("Invalid percent-encoded host name " + host + ": " + e.getMessage()).initCause(e);
            }
        }
        host = host.toLowerCase(Locale.ROOT);
        if (this.hostIDNtoASCII && !BasicURLNormalizer.isAscii(host)) {
            try {
                host = IDN.toASCII(host);
            }
            catch (IllegalArgumentException | IndexOutOfBoundsException e) {
                LOG.debug("Failed to convert IDN host {}: ", (Object)host, (Object)e);
                throw (MalformedURLException)new MalformedURLException("Invalid IDN " + host + ": " + e.getMessage()).initCause(e);
            }
        } else if (this.hostASCIItoIDN && host.contains("xn--")) {
            host = IDN.toUnicode(host);
        }
        if (this.hostTrimTrailingDot && host.endsWith(".")) {
            host = host.substring(0, host.length() - 1);
        }
        return host;
    }

    public static void main(String[] args) throws IOException {
        String line;
        BasicURLNormalizer normalizer = new BasicURLNormalizer();
        normalizer.setConf(NutchConfiguration.create());
        String scope = "default";
        if (args.length >= 1) {
            scope = args[0];
            System.out.println("Scope: " + scope);
        }
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in, utf8));
        while ((line = in.readLine()) != null) {
            try {
                String normUrl = normalizer.normalize(line, scope);
                System.out.println(normUrl);
            }
            catch (MalformedURLException e) {
                System.out.println("failed: " + line);
            }
        }
        System.exit(0);
    }

    static {
        int c;
        LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
        hasNormalizablePathPattern = Pattern.compile("/[./]|[.]/");
        unescapeRulePattern = Pattern.compile("%([0-9A-Fa-f]{2})");
        utf8 = StandardCharsets.UTF_8;
        unescapedCharacters = new boolean[128];
        for (c = 0; c < 128; ++c) {
            BasicURLNormalizer.unescapedCharacters[c] = BasicURLNormalizer.isAlphaNumeric(c) || c == 45 || c == 46 || c == 95 || c == 126;
        }
        escapedCharacters = new boolean[128];
        for (c = 0; c < 128; ++c) {
            if (unescapedCharacters[c]) {
                BasicURLNormalizer.escapedCharacters[c] = false;
                continue;
            }
            if (c < 33 || c == 34 || c == 60 || c == 62 || c == 91 || c == 93 || c == 94 || c == 96 || c == 123 || c == 124 || c == 125 || c == 127) {
                BasicURLNormalizer.escapedCharacters[c] = true;
                continue;
            }
            LOG.debug("Character {} ({}) not handled as escaped or unescaped", (Object)c, (Object)Character.valueOf((char)c));
        }
    }
}

