package com.qianjiang.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:com/qianjiang/util/HtmlParser.class */
public class HtmlParser {
    String htmlUrl;
    Collection<Url> hrefList = new ArrayList();
    String charSet;

    public HtmlParser(String str) {
        this.htmlUrl = str;
    }

    public Collection<Url> getHrefList() throws IOException {
        parser();
        return this.hrefList;
    }

    private void parser() throws IOException {
        HttpURLConnection httpURLConnection = (HttpURLConnection) new URL(this.htmlUrl).openConnection();
        httpURLConnection.setDoOutput(true);
        this.charSet = getCharset(httpURLConnection.getContentType());
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(httpURLConnection.getInputStream(), this.charSet));
        String str = "";
        ArrayList arrayList = new ArrayList();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                String str2 = str + "";
                bufferedReader.close();
                httpURLConnection.disconnect();
                return;
            } else {
                str = str + bufferedReader.readLine();
                String href = getHref(readLine);
                if (href != null && !arrayList.contains(href)) {
                    arrayList.add(href);
                    this.hrefList.add(new Url(href));
                }
            }
        }
    }

    private String getCharset(String str) {
        Matcher matcher = Pattern.compile("charset=.*").matcher(str);
        if (matcher.find()) {
            return matcher.group(0).split("charset=")[1];
        }
        return null;
    }

    private String getHref(String str) {
        Pattern compile = Pattern.compile("<a href=.*</a>");
        Pattern compile2 = Pattern.compile("list.*html");
        Matcher matcher = compile.matcher(str);
        Matcher matcher2 = compile2.matcher(str);
        String str2 = "";
        if (matcher.find()) {
            str2 = matcher.group().split("href=\"")[1];
            int indexOf = str2.indexOf("\"");
            if (indexOf != -1) {
                str2 = str2.substring(0, indexOf);
                if ("".equals(str2)) {
                    return null;
                }
            }
        } else if (matcher2.find()) {
            str2 = matcher2.group();
        }
        if ("".equals(str2)) {
            return null;
        }
        String str3 = this.htmlUrl;
        String str4 = str3;
        int indexOf2 = str3.indexOf("/");
        int i = 0;
        int i2 = 0;
        while (true) {
            if (indexOf2 <= -1) {
                break;
            }
            i = indexOf2 + i + 1;
            i2++;
            str3 = str3.substring(indexOf2 + 1, str3.length());
            indexOf2 = str3.indexOf("/");
            if (i2 == 3) {
                str4 = str4.substring(0, i - 1);
                break;
            }
        }
        String str5 = str4;
        if (str2.contains("http://")) {
            return str2;
        }
        if (!str2.contains(str5) && !str2.contains("javascript:;")) {
            String str6 = str2.startsWith("/") ? str4 : str4 + "/";
            str2 = str2.contains(".html") ? str6 + str2 : str6 + str2 + ".html";
        }
        if (str2.contains("javascript:;") || !checkLink(str2)) {
            return null;
        }
        return str2;
    }

    private boolean checkLink(String str) {
        return Pattern.matches("(https?|ftp):\\/\\/(((([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:)*@)?(((\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]))|((([a-z]|\\d|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(([a-z]|\\d|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])*([a-z]|\\d|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])))\\.)+(([a-z]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(([a-z]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])*([a-z]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])))\\.?)(:\\d*)?)(\\/((([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:|@)+(\\/(([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:|@)*)*)?)?(\\?((([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:|@)|[\\uE000-\\uF8FF]|\\/|\\?)*)?(\\#((([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:|@)|\\/|\\?)*)?", str);
    }
}
