从文本JavaScript中删除HTML

有没有一种简单的方法可以在JavaScript中获取一个html字符串并去掉html？

当前回答

const-htmlParser=new DOMParser（）.parseFromString（“<h6>用户<p>名称</p></h6>”，'text/html'）；const textString=htmlParser.body.textContent；console.log（textString）

2022-07-14 06:25:29

其他回答

使用jQuery，您可以使用

$('#elementID').text()

2012-09-03 15:03:35

var text = html.replace(/<\/?("[^"]*"|'[^']*'|[^>])*(>|$)/g, "");

这是一个正则表达式版本，对格式错误的HTML更具弹性，例如：

未闭合的标记

某些文本<img

标记属性内的“<”，“>”

某些文本<img alt=“x>y”>

换行符

一些<ahref=“http://google.com">

代码

var html = '<br>This <img alt="a>b" \r\n src="a_b.gif" />is > \nmy<>< > <a>"text"</a'
var text = html.replace(/<\/?("[^"]*"|'[^']*'|[^>])*(>|$)/g, "");

2018-07-06 10:39:57

另一个公认不如nickf或Shog9优雅的解决方案是从＜body＞标记开始递归遍历DOM并附加每个文本节点。

var bodyContent = document.getElementsByTagName('body')[0];
var result = appendTextNodes(bodyContent);

function appendTextNodes(element) {
    var text = '';

    // Loop through the childNodes of the passed in element
    for (var i = 0, len = element.childNodes.length; i < len; i++) {
        // Get a reference to the current child
        var node = element.childNodes[i];
        // Append the node's value if it's a text node
        if (node.nodeType == 3) {
            text += node.nodeValue;
        }
        // Recurse through the node's children, if there are any
        if (node.childNodes.length > 0) {
            appendTextNodes(node);
        }
    }
    // Return the final result
    return text;
}

2009-05-04 23:14:30

在尝试了所有提到的答案后，如果不是所有答案都有边缘案例，也不能完全支持我的需求。

我开始探索php是如何做到这一点的，并在这里遇到了复制strip_tags方法的php.js库：http://phpjs.org/functions/strip_tags/

2015-06-11 22:06:11

我修改了Jibberboy2000的答案，加入了几种<BR/>标记格式，删除了<SCRIPT>和<STYLE>标记中的所有内容，通过删除多个换行符和空格来格式化生成的HTML，并将一些HTML编码代码转换为普通代码。经过一些测试后，您似乎可以将大部分完整网页转换为保留页面标题和内容的简单文本。

在简单示例中，

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<!--comment-->

<head>

<title>This is my title</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style>

    body {margin-top: 15px;}
    a { color: #D80C1F; font-weight:bold; text-decoration:none; }

</style>
</head>

<body>
    <center>
        This string has <i>html</i> code i want to <b>remove</b><br>
        In this line <a href="http://www.bbc.co.uk">BBC</a> with link is mentioned.<br/>Now back to &quot;normal text&quot; and stuff using &lt;html encoding&gt;                 
    </center>
</body>
</html>

变成

这是我的头衔此字符串包含我要删除的html代码在这一行中，BBC(http://www.bbc.co.uk)其中提到了链接。现在回到“普通文本”和使用

JavaScript函数和测试页面如下所示：

function convertHtmlToText() {
    var inputText = document.getElementById("input").value;
    var returnText = "" + inputText;

    //-- remove BR tags and replace them with line break
    returnText=returnText.replace(/<br>/gi, "\n");
    returnText=returnText.replace(/<br\s\/>/gi, "\n");
    returnText=returnText.replace(/<br\/>/gi, "\n");

    //-- remove P and A tags but preserve what's inside of them
    returnText=returnText.replace(/<p.*>/gi, "\n");
    returnText=returnText.replace(/<a.*href="(.*?)".*>(.*?)<\/a>/gi, " $2 ($1)");

    //-- remove all inside SCRIPT and STYLE tags
    returnText=returnText.replace(/<script.*>[\w\W]{1,}(.*?)[\w\W]{1,}<\/script>/gi, "");
    returnText=returnText.replace(/<style.*>[\w\W]{1,}(.*?)[\w\W]{1,}<\/style>/gi, "");
    //-- remove all else
    returnText=returnText.replace(/<(?:.|\s)*?>/g, "");

    //-- get rid of more than 2 multiple line breaks:
    returnText=returnText.replace(/(?:(?:\r\n|\r|\n)\s*){2,}/gim, "\n\n");

    //-- get rid of more than 2 spaces:
    returnText = returnText.replace(/ +(?= )/g,'');

    //-- get rid of html-encoded characters:
    returnText=returnText.replace(/&nbsp;/gi," ");
    returnText=returnText.replace(/&amp;/gi,"&");
    returnText=returnText.replace(/&quot;/gi,'"');
    returnText=returnText.replace(/&lt;/gi,'<');
    returnText=returnText.replace(/&gt;/gi,'>');

    //-- return
    document.getElementById("output").value = returnText;
}

它与此HTML一起使用：

<textarea id="input" style="width: 400px; height: 300px;"></textarea><br />
<button onclick="convertHtmlToText()">CONVERT</button><br />
<textarea id="output" style="width: 400px; height: 300px;"></textarea><br />

2012-01-10 12:59:46

从文本JavaScript中删除HTML

推荐文章

最新文章

标签