/*
 * Decompiled with CFR 0.152.
 */
package com.qianjiang.util;

import com.qianjiang.util.Url;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlParser {
    String htmlUrl;
    Collection<Url> hrefList = new ArrayList<Url>();
    String charSet;

    public HtmlParser(String htmlUrl) {
        this.htmlUrl = htmlUrl;
    }

    public Collection<Url> getHrefList() throws IOException {
        this.parser();
        return this.hrefList;
    }

    private void parser() throws IOException {
        URL url = new URL(this.htmlUrl);
        HttpURLConnection connection = (HttpURLConnection)url.openConnection();
        connection.setDoOutput(true);
        String contenttype = connection.getContentType();
        this.charSet = this.getCharset(contenttype);
        InputStreamReader isr = new InputStreamReader(connection.getInputStream(), this.charSet);
        BufferedReader br = new BufferedReader(isr);
        String str = null;
        String rs = null;
        String flag = "";
        ArrayList<String> flagstr = new ArrayList<String>();
        while ((str = br.readLine()) != null) {
            flag = flag + br.readLine();
            rs = this.getHref(str);
            if (rs == null || flagstr.contains(rs)) continue;
            flagstr.add(rs);
            this.hrefList.add(new Url(rs));
        }
        flag = flag + "";
        br.close();
        connection.disconnect();
    }

    private String getCharset(String str) {
        Pattern pattern = Pattern.compile("charset=.*");
        Matcher matcher = pattern.matcher(str);
        if (matcher.find()) {
            return matcher.group(0).split("charset=")[1];
        }
        return null;
    }

    private String getHref(String str) {
        String urlflag;
        Pattern pattern = Pattern.compile("<a href=.*</a>");
        Pattern pattern2 = Pattern.compile("list.*html");
        Matcher matcher = pattern.matcher(str);
        Matcher matcher2 = pattern2.matcher(str);
        String term = "";
        if (matcher.find()) {
            String[] terms = matcher.group().split("href=\"");
            term = terms[1];
            int index = term.indexOf("\"");
            if (index != -1 && "".equals(term = term.substring(0, index))) {
                return null;
            }
        } else if (matcher2.find()) {
            term = matcher2.group();
        }
        if ("".equals(term)) {
            return null;
        }
        String flag = urlflag = this.htmlUrl;
        int a = urlflag.indexOf("/");
        int b = 0;
        int c = 0;
        while (a > -1) {
            b = a + b + 1;
            urlflag = urlflag.substring(a + 1, urlflag.length());
            a = urlflag.indexOf("/");
            if (++c != 3) continue;
            flag = flag.substring(0, b - 1);
            break;
        }
        String start = flag;
        if (term.contains("http://")) {
            return term;
        }
        if (!term.contains(start) && !term.contains("javascript:;")) {
            start = term.startsWith("/") ? flag : flag + "/";
            term = term.contains(".html") ? start + term : start + term + ".html";
        }
        if (!term.contains("javascript:;") && this.checkLink(term)) {
            return term;
        }
        return null;
    }

    private boolean checkLink(String a) {
        String regex = "(https?|ftp):\\/\\/(((([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:)*@)?(((\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]))|((([a-z]|\\d|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(([a-z]|\\d|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])*([a-z]|\\d|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])))\\.)+(([a-z]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(([a-z]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])*([a-z]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])))\\.?)(:\\d*)?)(\\/((([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:|@)+(\\/(([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:|@)*)*)?)?(\\?((([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:|@)|[\\uE000-\\uF8FF]|\\/|\\?)*)?(\\#((([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(%[\\da-f]{2})|[!\\$&'\\(\\)\\*\\+,;=]|:|@)|\\/|\\?)*)?";
        return Pattern.matches(regex, a);
    }
}

