001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2017 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.ArrayDeque;
023import java.util.Arrays;
024import java.util.Collections;
025import java.util.Deque;
026import java.util.List;
027import java.util.Locale;
028import java.util.Set;
029import java.util.TreeSet;
030import java.util.regex.Pattern;
031import java.util.stream.Collectors;
032
033import com.puppycrawl.tools.checkstyle.JavadocDetailNodeParser;
034import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
035import com.puppycrawl.tools.checkstyle.api.DetailAST;
036import com.puppycrawl.tools.checkstyle.api.FileContents;
037import com.puppycrawl.tools.checkstyle.api.Scope;
038import com.puppycrawl.tools.checkstyle.api.TextBlock;
039import com.puppycrawl.tools.checkstyle.api.TokenTypes;
040import com.puppycrawl.tools.checkstyle.utils.CheckUtils;
041import com.puppycrawl.tools.checkstyle.utils.CommonUtils;
042import com.puppycrawl.tools.checkstyle.utils.ScopeUtils;
043
044/**
045 * Custom Checkstyle Check to validate Javadoc.
046 *
047 * @author Chris Stillwell
048 * @author Daniel Grenner
049 * @author Travis Schneeberger
050 */
051public class JavadocStyleCheck
052    extends AbstractCheck {
053
054    /** Message property key for the Unclosed HTML message. */
055    public static final String MSG_JAVADOC_MISSING = "javadoc.missing";
056
057    /** Message property key for the Unclosed HTML message. */
058    public static final String MSG_EMPTY = "javadoc.empty";
059
060    /** Message property key for the Unclosed HTML message. */
061    public static final String MSG_NO_PERIOD = "javadoc.noPeriod";
062
063    /** Message property key for the Unclosed HTML message. */
064    public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";
065
066    /** Message property key for the Unclosed HTML message. */
067    public static final String MSG_UNCLOSED_HTML = JavadocDetailNodeParser.MSG_UNCLOSED_HTML_TAG;
068
069    /** Message property key for the Extra HTML message. */
070    public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";
071
072    /** HTML tags that do not require a close tag. */
073    private static final Set<String> SINGLE_TAGS = Collections.unmodifiableSortedSet(
074        Arrays.stream(new String[] {"br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th", })
075            .collect(Collectors.toCollection(TreeSet::new)));
076
077    /** HTML tags that are allowed in java docs.
078     * From https://www.w3schools.com/tags/default.asp
079     * The forms and structure tags are not allowed
080     */
081    private static final Set<String> ALLOWED_TAGS = Collections.unmodifiableSortedSet(
082        Arrays.stream(new String[] {
083            "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
084            "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
085            "del", "div", "dfn", "dl", "dt", "em", "fieldset", "font", "h1",
086            "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
087            "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
088            "style", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
089            "thead", "tr", "tt", "u", "ul", "var", })
090        .collect(Collectors.toCollection(TreeSet::new)));
091
092    /** The scope to check. */
093    private Scope scope = Scope.PRIVATE;
094
095    /** The visibility scope where Javadoc comments shouldn't be checked. **/
096    private Scope excludeScope;
097
098    /** Format for matching the end of a sentence. */
099    private Pattern endOfSentenceFormat = Pattern.compile("([.?!][ \t\n\r\f<])|([.?!]$)");
100
101    /**
102     * Indicates if the first sentence should be checked for proper end of
103     * sentence punctuation.
104     */
105    private boolean checkFirstSentence = true;
106
107    /**
108     * Indicates if the HTML within the comment should be checked.
109     */
110    private boolean checkHtml = true;
111
112    /**
113     * Indicates if empty javadoc statements should be checked.
114     */
115    private boolean checkEmptyJavadoc;
116
117    @Override
118    public int[] getDefaultTokens() {
119        return getAcceptableTokens();
120    }
121
122    @Override
123    public int[] getAcceptableTokens() {
124        return new int[] {
125            TokenTypes.ANNOTATION_DEF,
126            TokenTypes.ANNOTATION_FIELD_DEF,
127            TokenTypes.CLASS_DEF,
128            TokenTypes.CTOR_DEF,
129            TokenTypes.ENUM_CONSTANT_DEF,
130            TokenTypes.ENUM_DEF,
131            TokenTypes.INTERFACE_DEF,
132            TokenTypes.METHOD_DEF,
133            TokenTypes.PACKAGE_DEF,
134            TokenTypes.VARIABLE_DEF,
135        };
136    }
137
138    @Override
139    public int[] getRequiredTokens() {
140        return CommonUtils.EMPTY_INT_ARRAY;
141    }
142
143    @Override
144    public void visitToken(DetailAST ast) {
145        if (shouldCheck(ast)) {
146            final FileContents contents = getFileContents();
147            // Need to start searching for the comment before the annotations
148            // that may exist. Even if annotations are not defined on the
149            // package, the ANNOTATIONS AST is defined.
150            final TextBlock textBlock =
151                contents.getJavadocBefore(ast.getFirstChild().getLineNo());
152
153            checkComment(ast, textBlock);
154        }
155    }
156
157    /**
158     * Whether we should check this node.
159     * @param ast a given node.
160     * @return whether we should check a given node.
161     */
162    private boolean shouldCheck(final DetailAST ast) {
163        boolean check = false;
164
165        if (ast.getType() == TokenTypes.PACKAGE_DEF) {
166            check = getFileContents().inPackageInfo();
167        }
168        else if (!ScopeUtils.isInCodeBlock(ast)) {
169            final Scope customScope;
170
171            if (ScopeUtils.isInInterfaceOrAnnotationBlock(ast)
172                    || ast.getType() == TokenTypes.ENUM_CONSTANT_DEF) {
173                customScope = Scope.PUBLIC;
174            }
175            else {
176                customScope = ScopeUtils.getScopeFromMods(ast.findFirstToken(TokenTypes.MODIFIERS));
177            }
178            final Scope surroundingScope = ScopeUtils.getSurroundingScope(ast);
179
180            check = customScope.isIn(scope)
181                    && (surroundingScope == null || surroundingScope.isIn(scope))
182                    && (excludeScope == null
183                        || !customScope.isIn(excludeScope)
184                        || surroundingScope != null
185                            && !surroundingScope.isIn(excludeScope));
186        }
187        return check;
188    }
189
190    /**
191     * Performs the various checks against the Javadoc comment.
192     *
193     * @param ast the AST of the element being documented
194     * @param comment the source lines that make up the Javadoc comment.
195     *
196     * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
197     * @see #checkHtmlTags(DetailAST, TextBlock)
198     */
199    private void checkComment(final DetailAST ast, final TextBlock comment) {
200        if (comment == null) {
201            // checking for missing docs in JavadocStyleCheck is not consistent
202            // with the rest of CheckStyle...  Even though, I didn't think it
203            // made sense to make another check just to ensure that the
204            // package-info.java file actually contains package Javadocs.
205            if (getFileContents().inPackageInfo()) {
206                log(ast.getLineNo(), MSG_JAVADOC_MISSING);
207            }
208        }
209        else {
210            if (checkFirstSentence) {
211                checkFirstSentenceEnding(ast, comment);
212            }
213
214            if (checkHtml) {
215                checkHtmlTags(ast, comment);
216            }
217
218            if (checkEmptyJavadoc) {
219                checkJavadocIsNotEmpty(comment);
220            }
221        }
222    }
223
224    /**
225     * Checks that the first sentence ends with proper punctuation.  This method
226     * uses a regular expression that checks for the presence of a period,
227     * question mark, or exclamation mark followed either by whitespace, an
228     * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
229     * comments for TokenTypes that are valid for {_AT_inheritDoc}.
230     *
231     * @param ast the current node
232     * @param comment the source lines that make up the Javadoc comment.
233     */
234    private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
235        final String commentText = getCommentText(comment.getText());
236
237        if (!commentText.isEmpty()
238            && !endOfSentenceFormat.matcher(commentText).find()
239            && !(commentText.startsWith("{@inheritDoc}")
240            && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
241            log(comment.getStartLineNo(), MSG_NO_PERIOD);
242        }
243    }
244
245    /**
246     * Checks that the Javadoc is not empty.
247     *
248     * @param comment the source lines that make up the Javadoc comment.
249     */
250    private void checkJavadocIsNotEmpty(TextBlock comment) {
251        final String commentText = getCommentText(comment.getText());
252
253        if (commentText.isEmpty()) {
254            log(comment.getStartLineNo(), MSG_EMPTY);
255        }
256    }
257
258    /**
259     * Returns the comment text from the Javadoc.
260     * @param comments the lines of Javadoc.
261     * @return a comment text String.
262     */
263    private static String getCommentText(String... comments) {
264        final StringBuilder builder = new StringBuilder(1024);
265        for (final String line : comments) {
266            final int textStart = findTextStart(line);
267
268            if (textStart != -1) {
269                if (line.charAt(textStart) == '@') {
270                    //we have found the tag section
271                    break;
272                }
273                builder.append(line.substring(textStart));
274                trimTail(builder);
275                builder.append('\n');
276            }
277        }
278
279        return builder.toString().trim();
280    }
281
282    /**
283     * Finds the index of the first non-whitespace character ignoring the
284     * Javadoc comment start and end strings (&#47** and *&#47) as well as any
285     * leading asterisk.
286     * @param line the Javadoc comment line of text to scan.
287     * @return the int index relative to 0 for the start of text
288     *         or -1 if not found.
289     */
290    private static int findTextStart(String line) {
291        int textStart = -1;
292        int index = 0;
293        while (index < line.length()) {
294            if (!Character.isWhitespace(line.charAt(index))) {
295                if (line.regionMatches(index, "/**", 0, "/**".length())) {
296                    index += 2;
297                }
298                else if (line.regionMatches(index, "*/", 0, 2)) {
299                    index++;
300                }
301                else if (line.charAt(index) != '*') {
302                    textStart = index;
303                    break;
304                }
305            }
306            index++;
307        }
308        return textStart;
309    }
310
311    /**
312     * Trims any trailing whitespace or the end of Javadoc comment string.
313     * @param builder the StringBuilder to trim.
314     */
315    private static void trimTail(StringBuilder builder) {
316        int index = builder.length() - 1;
317        while (true) {
318            if (Character.isWhitespace(builder.charAt(index))) {
319                builder.deleteCharAt(index);
320            }
321            else if (index > 0 && builder.charAt(index) == '/'
322                    && builder.charAt(index - 1) == '*') {
323                builder.deleteCharAt(index);
324                builder.deleteCharAt(index - 1);
325                index--;
326                while (builder.charAt(index - 1) == '*') {
327                    builder.deleteCharAt(index - 1);
328                    index--;
329                }
330            }
331            else {
332                break;
333            }
334            index--;
335        }
336    }
337
338    /**
339     * Checks the comment for HTML tags that do not have a corresponding close
340     * tag or a close tag that has no previous open tag.  This code was
341     * primarily copied from the DocCheck checkHtml method.
342     *
343     * @param ast the node with the Javadoc
344     * @param comment the {@code TextBlock} which represents
345     *                 the Javadoc comment.
346     * @noinspection MethodWithMultipleReturnPoints
347     */
348    // -@cs[ReturnCount] Too complex to break apart.
349    private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
350        final int lineNo = comment.getStartLineNo();
351        final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
352        final String[] text = comment.getText();
353
354        final TagParser parser = new TagParser(text, lineNo);
355
356        while (parser.hasNextTag()) {
357            final HtmlTag tag = parser.nextTag();
358
359            if (tag.isIncompleteTag()) {
360                log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
361                    text[tag.getLineNo() - lineNo]);
362                return;
363            }
364            if (tag.isClosedTag()) {
365                //do nothing
366                continue;
367            }
368            if (tag.isCloseTag()) {
369                // We have found a close tag.
370                if (isExtraHtml(tag.getId(), htmlStack)) {
371                    // No corresponding open tag was found on the stack.
372                    log(tag.getLineNo(),
373                        tag.getPosition(),
374                        MSG_EXTRA_HTML,
375                        tag.getText());
376                }
377                else {
378                    // See if there are any unclosed tags that were opened
379                    // after this one.
380                    checkUnclosedTags(htmlStack, tag.getId());
381                }
382            }
383            else {
384                //We only push html tags that are allowed
385                if (isAllowedTag(tag)) {
386                    htmlStack.push(tag);
387                }
388            }
389        }
390
391        // Identify any tags left on the stack.
392        // Skip multiples, like <b>...<b>
393        String lastFound = "";
394        final List<String> typeParameters = CheckUtils.getTypeParameterNames(ast);
395        for (final HtmlTag htmlTag : htmlStack) {
396            if (!isSingleTag(htmlTag)
397                && !htmlTag.getId().equals(lastFound)
398                && !typeParameters.contains(htmlTag.getId())) {
399                log(htmlTag.getLineNo(), htmlTag.getPosition(),
400                        MSG_UNCLOSED_HTML, htmlTag.getText());
401                lastFound = htmlTag.getId();
402            }
403        }
404    }
405
406    /**
407     * Checks to see if there are any unclosed tags on the stack.  The token
408     * represents a html tag that has been closed and has a corresponding open
409     * tag on the stack.  Any tags, except single tags, that were opened
410     * (pushed on the stack) after the token are missing a close.
411     *
412     * @param htmlStack the stack of opened HTML tags.
413     * @param token the current HTML tag name that has been closed.
414     */
415    private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
416        final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
417        HtmlTag lastOpenTag = htmlStack.pop();
418        while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
419            // Find unclosed elements. Put them on a stack so the
420            // output order won't be back-to-front.
421            if (isSingleTag(lastOpenTag)) {
422                lastOpenTag = htmlStack.pop();
423            }
424            else {
425                unclosedTags.push(lastOpenTag);
426                lastOpenTag = htmlStack.pop();
427            }
428        }
429
430        // Output the unterminated tags, if any
431        // Skip multiples, like <b>..<b>
432        String lastFound = "";
433        for (final HtmlTag htag : unclosedTags) {
434            lastOpenTag = htag;
435            if (lastOpenTag.getId().equals(lastFound)) {
436                continue;
437            }
438            lastFound = lastOpenTag.getId();
439            log(lastOpenTag.getLineNo(),
440                lastOpenTag.getPosition(),
441                MSG_UNCLOSED_HTML,
442                lastOpenTag.getText());
443        }
444    }
445
446    /**
447     * Determines if the HtmlTag is one which does not require a close tag.
448     *
449     * @param tag the HtmlTag to check.
450     * @return {@code true} if the HtmlTag is a single tag.
451     */
452    private static boolean isSingleTag(HtmlTag tag) {
453        // If its a singleton tag (<p>, <br>, etc.), ignore it
454        // Can't simply not put them on the stack, since singletons
455        // like <dt> and <dd> (unhappily) may either be terminated
456        // or not terminated. Both options are legal.
457        return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
458    }
459
460    /**
461     * Determines if the HtmlTag is one which is allowed in a javadoc.
462     *
463     * @param tag the HtmlTag to check.
464     * @return {@code true} if the HtmlTag is an allowed html tag.
465     */
466    private static boolean isAllowedTag(HtmlTag tag) {
467        return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
468    }
469
470    /**
471     * Determines if the given token is an extra HTML tag. This indicates that
472     * a close tag was found that does not have a corresponding open tag.
473     *
474     * @param token an HTML tag id for which a close was found.
475     * @param htmlStack a Stack of previous open HTML tags.
476     * @return {@code false} if a previous open tag was found
477     *         for the token.
478     */
479    private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
480        boolean isExtra = true;
481        for (final HtmlTag tag : htmlStack) {
482            // Loop, looking for tags that are closed.
483            // The loop is needed in case there are unclosed
484            // tags on the stack. In that case, the stack would
485            // not be empty, but this tag would still be extra.
486            if (token.equalsIgnoreCase(tag.getId())) {
487                isExtra = false;
488                break;
489            }
490        }
491
492        return isExtra;
493    }
494
495    /**
496     * Sets the scope to check.
497     * @param scope a scope.
498     */
499    public void setScope(Scope scope) {
500        this.scope = scope;
501    }
502
503    /**
504     * Set the excludeScope.
505     * @param excludeScope a scope.
506     */
507    public void setExcludeScope(Scope excludeScope) {
508        this.excludeScope = excludeScope;
509    }
510
511    /**
512     * Set the format for matching the end of a sentence.
513     * @param pattern a pattern.
514     */
515    public void setEndOfSentenceFormat(Pattern pattern) {
516        endOfSentenceFormat = pattern;
517    }
518
519    /**
520     * Sets the flag that determines if the first sentence is checked for
521     * proper end of sentence punctuation.
522     * @param flag {@code true} if the first sentence is to be checked
523     */
524    public void setCheckFirstSentence(boolean flag) {
525        checkFirstSentence = flag;
526    }
527
528    /**
529     * Sets the flag that determines if HTML checking is to be performed.
530     * @param flag {@code true} if HTML checking is to be performed.
531     */
532    public void setCheckHtml(boolean flag) {
533        checkHtml = flag;
534    }
535
536    /**
537     * Sets the flag that determines if empty Javadoc checking should be done.
538     * @param flag {@code true} if empty Javadoc checking should be done.
539     */
540    public void setCheckEmptyJavadoc(boolean flag) {
541        checkEmptyJavadoc = flag;
542    }
543}