有没有一种简单的方法可以在JavaScript中获取一个html字符串并去掉html?


当前回答

下面的代码允许您保留一些html标记,同时剥离所有其他标记

function strip_tags(input, allowed) {

  allowed = (((allowed || '') + '')
    .toLowerCase()
    .match(/<[a-z][a-z0-9]*>/g) || [])
    .join(''); // making sure the allowed arg is a string containing only tags in lowercase (<a><b><c>)

  var tags = /<\/?([a-z][a-z0-9]*)\b[^>]*>/gi,
      commentsAndPhpTags = /<!--[\s\S]*?-->|<\?(?:php)?[\s\S]*?\?>/gi;

  return input.replace(commentsAndPhpTags, '')
      .replace(tags, function($0, $1) {
          return allowed.indexOf('<' + $1.toLowerCase() + '>') > -1 ? $0 : '';
      });
}

其他回答

简单的2行jquery去掉html。

 var content = "<p>checking the html source&nbsp;</p><p>&nbsp;
  </p><p>with&nbsp;</p><p>all</p><p>the html&nbsp;</p><p>content</p>";

 var text = $(content).text();//It gets you the plain text
 console.log(text);//check the data in your console

 cj("#text_area_id").val(text);//set your content to text area using text_area_id
function strip_html_tags(str)
{
   if ((str===null) || (str===''))
       return false;
  else
   str = str.toString();
  return str.replace(/<[^>]*>/g, '');
}

我只需要去掉<a>标签,并用链接的文本替换它们。

这似乎很有效。

htmlContent= htmlContent.replace(/<a.*href="(.*?)">/g, '');
htmlContent= htmlContent.replace(/<\/a>/g, '');
const getTextFromHtml = (t) =>
  t
    ?.split('>')
    ?.map((i) => i.split('<')[0])
    .filter((i) => !i.includes('=') && i.trim())
    .join('');

const test = '<p>This <strong>one</strong> <em>time</em>,</p><br /><blockquote>I went to</blockquote><ul><li>band <a href="https://workingclasshistory.com" rel="noopener noreferrer" target="_blank">camp</a>…</li></ul><p>I edited this as a reviewer just to double check</p>'

getTextFromHtml(test)
  // 'This onetime,I went toband camp…I edited this as a reviewer just to double check'

另一个公认不如nickf或Shog9优雅的解决方案是从<body>标记开始递归遍历DOM并附加每个文本节点。

var bodyContent = document.getElementsByTagName('body')[0];
var result = appendTextNodes(bodyContent);

function appendTextNodes(element) {
    var text = '';

    // Loop through the childNodes of the passed in element
    for (var i = 0, len = element.childNodes.length; i < len; i++) {
        // Get a reference to the current child
        var node = element.childNodes[i];
        // Append the node's value if it's a text node
        if (node.nodeType == 3) {
            text += node.nodeValue;
        }
        // Recurse through the node's children, if there are any
        if (node.childNodes.length > 0) {
            appendTextNodes(node);
        }
    }
    // Return the final result
    return text;
}