// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/autofill/content/renderer/form_autofill_util.h"

#include <algorithm>
#include <limits>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>

#include "base/check_op.h"
#include "base/command_line.h"
#include "base/i18n/case_conversion.h"
#include "base/metrics/field_trial.h"
#include "base/metrics/histogram_macros.h"
#include "base/no_destructor.h"
#include "base/notreached.h"
#include "base/stl_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "components/autofill/core/common/autofill_data_validation.h"
#include "components/autofill/core/common/autofill_features.h"
#include "components/autofill/core/common/autofill_regexes.h"
#include "components/autofill/core/common/autofill_switches.h"
#include "components/autofill/core/common/autofill_util.h"
#include "components/autofill/core/common/field_data_manager.h"
#include "components/autofill/core/common/form_data.h"
#include "components/autofill/core/common/form_field_data.h"
#include "content/public/renderer/render_frame.h"
#include "third_party/blink/public/platform/url_conversion.h"
#include "third_party/blink/public/platform/web_string.h"
#include "third_party/blink/public/platform/web_vector.h"
#include "third_party/blink/public/web/web_document.h"
#include "third_party/blink/public/web/web_element.h"
#include "third_party/blink/public/web/web_element_collection.h"
#include "third_party/blink/public/web/web_form_control_element.h"
#include "third_party/blink/public/web/web_form_element.h"
#include "third_party/blink/public/web/web_input_element.h"
#include "third_party/blink/public/web/web_label_element.h"
#include "third_party/blink/public/web/web_local_frame.h"
#include "third_party/blink/public/web/web_node.h"
#include "third_party/blink/public/web/web_option_element.h"
#include "third_party/blink/public/web/web_select_element.h"

using blink::WebAutofillState;
using blink::WebDocument;
using blink::WebElement;
using blink::WebElementCollection;
using blink::WebFormControlElement;
using blink::WebFormElement;
using blink::WebInputElement;
using blink::WebLabelElement;
using blink::WebLocalFrame;
using blink::WebNode;
using blink::WebOptionElement;
using blink::WebSelectElement;
using blink::WebString;
using blink::WebVector;

namespace autofill {

using mojom::ButtonTitleType;

namespace form_util {

namespace {

// Maximal length of a button's title.
const int kMaxLengthForSingleButtonTitle = 30;
// Maximal length of all button titles.
const int kMaxLengthForAllButtonTitles = 200;

// Text features to detect form submission buttons. Features are selected based
// on analysis of real forms and their buttons.
// TODO(crbug.com/910546): Consider to add more features (e.g. non-English
// features).
const char* const kButtonFeatures[] = {"button", "btn", "submit",
                                       "boton" /* "button" in Spanish */};

// A bit field mask for FillForm functions to not fill some fields.
enum FieldFilterMask {
  FILTER_NONE = 0,
  FILTER_DISABLED_ELEMENTS = 1 << 0,
  FILTER_READONLY_ELEMENTS = 1 << 1,
  // Filters non-focusable elements with the exception of select elements, which
  // are sometimes made non-focusable because they are present for accessibility
  // while a prettier, non-<select> dropdown is shown. We still want to autofill
  // the non-focusable <select>.
  FILTER_NON_FOCUSABLE_ELEMENTS = 1 << 2,
  FILTER_ALL_NON_EDITABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS |
                                     FILTER_READONLY_ELEMENTS |
                                     FILTER_NON_FOCUSABLE_ELEMENTS,
};

// Returns whether sending autofill field metadata to the server is enabled.
// TODO(crbug.com/938804): Remove this when button titles are crowdsourced in
// all channels.
bool IsAutofillFieldMetadataEnabled() {
  static base::NoDestructor<std::string> kGroupName(
      base::FieldTrialList::FindFullName("AutofillFieldMetadata"));
  return base::StartsWith(*kGroupName, "Enabled", base::CompareCase::SENSITIVE);
}

void TruncateString(base::string16* str, size_t max_length) {
  if (str->length() > max_length)
    str->resize(max_length);
}

bool IsOptionElement(const WebElement& element) {
  static base::NoDestructor<WebString> kOption("option");
  return element.HasHTMLTagName(*kOption);
}

bool IsScriptElement(const WebElement& element) {
  static base::NoDestructor<WebString> kScript("script");
  return element.HasHTMLTagName(*kScript);
}

bool IsNoScriptElement(const WebElement& element) {
  static base::NoDestructor<WebString> kNoScript("noscript");
  return element.HasHTMLTagName(*kNoScript);
}

bool HasTagName(const WebNode& node, const blink::WebString& tag) {
  return node.IsElementNode() && node.ToConst<WebElement>().HasHTMLTagName(tag);
}

bool IsElementInControlElementSet(
    const WebElement& element,
    const std::vector<WebFormControlElement>& control_elements) {
  if (!element.IsFormControlElement())
    return false;
  const WebFormControlElement form_control_element =
      element.ToConst<WebFormControlElement>();
  return base::Contains(control_elements, form_control_element);
}

bool IsElementInsideFormOrFieldSet(const WebElement& element,
                                   bool consider_fieldset_tags) {
  for (WebNode parent_node = element.ParentNode(); !parent_node.IsNull();
       parent_node = parent_node.ParentNode()) {
    if (!parent_node.IsElementNode())
      continue;

    WebElement cur_element = parent_node.To<WebElement>();
    if (cur_element.HasHTMLTagName("form") ||
        (consider_fieldset_tags && cur_element.HasHTMLTagName("fieldset"))) {
      return true;
    }
  }
  return false;
}

// Returns true if |node| is an element and it is a container type that
// InferLabelForElement() can traverse.
bool IsTraversableContainerElement(const WebNode& node) {
  if (!node.IsElementNode())
    return false;

  const WebElement element = node.ToConst<WebElement>();
  return element.HasHTMLTagName("dd") || element.HasHTMLTagName("div") ||
         element.HasHTMLTagName("fieldset") || element.HasHTMLTagName("li") ||
         element.HasHTMLTagName("td") || element.HasHTMLTagName("table");
}

// Returns the colspan for a <td> / <th>. Defaults to 1.
size_t CalculateTableCellColumnSpan(const WebElement& element) {
  DCHECK(element.HasHTMLTagName("td") || element.HasHTMLTagName("th"));

  size_t span = 1;
  if (element.HasAttribute("colspan")) {
    base::string16 colspan = element.GetAttribute("colspan").Utf16();
    // Do not check return value to accept imperfect conversions.
    base::StringToSizeT(colspan, &span);
    // Handle overflow.
    if (span == std::numeric_limits<size_t>::max())
      span = 1;
    span = std::max(span, static_cast<size_t>(1));
  }

  return span;
}

// Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
// to a single space.  If |force_whitespace| is true, then the resulting string
// is guaranteed to have a space between |prefix| and |suffix|.  Otherwise, the
// result includes a space only if |prefix| has trailing whitespace or |suffix|
// has leading whitespace.
// A few examples:
//  * CombineAndCollapseWhitespace("foo", "bar", false)       -> "foobar"
//  * CombineAndCollapseWhitespace("foo", "bar", true)        -> "foo bar"
//  * CombineAndCollapseWhitespace("foo ", "bar", false)      -> "foo bar"
//  * CombineAndCollapseWhitespace("foo", " bar", false)      -> "foo bar"
//  * CombineAndCollapseWhitespace("foo", " bar", true)       -> "foo bar"
//  * CombineAndCollapseWhitespace("foo   ", "   bar", false) -> "foo bar"
//  * CombineAndCollapseWhitespace(" foo", "bar ", false)     -> " foobar "
//  * CombineAndCollapseWhitespace(" foo", "bar ", true)      -> " foo bar "
const base::string16 CombineAndCollapseWhitespace(const base::string16& prefix,
                                                  const base::string16& suffix,
                                                  bool force_whitespace) {
  base::string16 prefix_trimmed;
  base::TrimPositions prefix_trailing_whitespace =
      base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed);

  // Recursively compute the children's text.
  base::string16 suffix_trimmed;
  base::TrimPositions suffix_leading_whitespace =
      base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed);

  if (prefix_trailing_whitespace || suffix_leading_whitespace ||
      force_whitespace) {
    return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed;
  }
  return prefix_trimmed + suffix_trimmed;
}

// This is a helper function for the FindChildText() function (see below).
// Search depth is limited with the |depth| parameter.
// |divs_to_skip| is a list of <div> tags to ignore if encountered.
base::string16 FindChildTextInner(const WebNode& node,
                                  int depth,
                                  const std::set<WebNode>& divs_to_skip) {
  if (depth <= 0 || node.IsNull())
    return base::string16();

  // Skip over comments.
  if (node.IsCommentNode())
    return FindChildTextInner(node.NextSibling(), depth - 1, divs_to_skip);

  if (!node.IsElementNode() && !node.IsTextNode())
    return base::string16();

  // Ignore elements known not to contain inferable labels.
  if (node.IsElementNode()) {
    const WebElement element = node.ToConst<WebElement>();
    if (IsOptionElement(element) || IsScriptElement(element) ||
        IsNoScriptElement(element) ||
        (element.IsFormControlElement() &&
         IsAutofillableElement(element.ToConst<WebFormControlElement>()))) {
      return base::string16();
    }

    if (element.HasHTMLTagName("div") && base::Contains(divs_to_skip, node))
      return base::string16();
  }

  // Extract the text exactly at this node.
  base::string16 node_text = node.NodeValue().Utf16();

  // Recursively compute the children's text.
  // Preserve inter-element whitespace separation.
  base::string16 child_text =
      FindChildTextInner(node.FirstChild(), depth - 1, divs_to_skip);
  bool add_space = node.IsTextNode() && node_text.empty();
  node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);

  // Recursively compute the siblings' text.
  // Again, preserve inter-element whitespace separation.
  base::string16 sibling_text =
      FindChildTextInner(node.NextSibling(), depth - 1, divs_to_skip);
  add_space = node.IsTextNode() && node_text.empty();
  node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);

  return node_text;
}

// Same as FindChildText() below, but with a list of div nodes to skip.
base::string16 FindChildTextWithIgnoreList(
    const WebNode& node,
    const std::set<WebNode>& divs_to_skip) {
  if (node.IsTextNode())
    return node.NodeValue().Utf16();

  WebNode child = node.FirstChild();

  const int kChildSearchDepth = 10;
  base::string16 node_text =
      FindChildTextInner(child, kChildSearchDepth, divs_to_skip);
  base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
  return node_text;
}

bool IsLabelValid(base::StringPiece16 inferred_label,
                  const std::vector<base::char16>& stop_words) {
  // If |inferred_label| has any character other than those in |stop_words|.
  auto* first_non_stop_word = std::find_if(
      inferred_label.begin(), inferred_label.end(),
      [&stop_words](base::char16 c) { return !base::Contains(stop_words, c); });
  return first_non_stop_word != inferred_label.end();
}

// Shared function for InferLabelFromPrevious() and InferLabelFromNext().
bool InferLabelFromSibling(const WebFormControlElement& element,
                           const std::vector<base::char16>& stop_words,
                           bool forward,
                           base::string16* label,
                           FormFieldData::LabelSource* label_source) {
  base::string16 inferred_label;
  FormFieldData::LabelSource inferred_label_source =
      FormFieldData::LabelSource::kUnknown;
  WebNode sibling = element;
  while (true) {
    sibling = forward ? sibling.NextSibling() : sibling.PreviousSibling();
    if (sibling.IsNull())
      break;

    // Skip over comments.
    if (sibling.IsCommentNode())
      continue;

    // Otherwise, only consider normal HTML elements and their contents.
    if (!sibling.IsElementNode() && !sibling.IsTextNode())
      break;

    // A label might be split across multiple "lightweight" nodes.
    // Coalesce any text contained in multiple consecutive
    //  (a) plain text nodes or
    //  (b) inline HTML elements that are essentially equivalent to text nodes.
    static base::NoDestructor<WebString> kBold("b");
    static base::NoDestructor<WebString> kStrong("strong");
    static base::NoDestructor<WebString> kSpan("span");
    static base::NoDestructor<WebString> kFont("font");
    if (sibling.IsTextNode() || HasTagName(sibling, *kBold) ||
        HasTagName(sibling, *kStrong) || HasTagName(sibling, *kSpan) ||
        HasTagName(sibli