如何从字符串中删除所有非字母的字符?

非字母数字呢?

这必须是一个自定义函数还是也有更通用的解决方案?


当前回答

这是另一个递归CTE解决方案,基于@Gerhard Weiss的回答。您应该能够将整个代码块复制并粘贴到SSMS中,并在那里使用它。结果包括一些额外的列,以帮助我们理解发生了什么。我花了一段时间才理解了PATINDEX (RegEx)和递归CTE的全部原理。

DECLARE @DefineBadCharPattern varchar(30)
SET @DefineBadCharPattern = '%[^A-z]%'  --Means anything NOT between A and z characters (according to ascii char value) is "bad"
SET @DefineBadCharPattern = '%[^a-z0-9]%'  --Means anything NOT between a and z characters or numbers 0 through 9 (according to ascii char value) are "bad"
SET @DefineBadCharPattern = '%[^ -~]%'  --Means anything NOT between space and ~ characters (all non-printable characters) is "bad"
--Change @ReplaceBadCharWith to '' to strip "bad" characters from string
--Change to some character if you want to 'see' what's being replaced. NOTE: It must be allowed accoring to @DefineBadCharPattern above
DECLARE @ReplaceBadCharWith varchar(1) = '#'  --Change this to whatever you want to replace non-printable chars with 
IF patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN, @ReplaceBadCharWith) > 0
    BEGIN
        RAISERROR('@ReplaceBadCharWith value (%s) must be a character allowed by PATINDEX pattern of %s',16,1,@ReplaceBadCharWith, @DefineBadCharPattern)
        RETURN
    END
--A table of values to play with:
DECLARE @temp TABLE (OriginalString varchar(100))
INSERT @temp SELECT ' 1hello' + char(13) + char(10) + 'there' + char(30) + char(9) + char(13) + char(10)
INSERT @temp SELECT '2hello' + char(30) + 'there' + char(30)
INSERT @temp SELECT ' 3hello there'
INSERT @temp SELECT ' tab' + char(9) + ' character'
INSERT @temp SELECT 'good bye'

--Let the magic begin:
;WITH recurse AS (
    select
    OriginalString,
    OriginalString as CleanString,
    patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN, OriginalString) as [Position],
    substring(OriginalString,patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN, OriginalString),1) as [InvalidCharacter],
    ascii(substring(OriginalString,patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN, OriginalString),1)) as [ASCIICode]
    from @temp
   UNION ALL
    select
    OriginalString,
    CONVERT(varchar(100),REPLACE(CleanString,InvalidCharacter,@ReplaceBadCharWith)),
    patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString) as [Position],
    substring(CleanString,patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString),1),
    ascii(substring(CleanString,patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString),1))
    from recurse
    where patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString) > 0
)
SELECT * FROM recurse
--optionally comment out this last WHERE clause to see more of what the recursion is doing:
WHERE patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString) = 0

其他回答

信不信由你,在我的系统中,这个丑陋的函数比G masters的优雅函数表现得更好。

CREATE FUNCTION dbo.RemoveSpecialChar (@s VARCHAR(256)) 
RETURNS VARCHAR(256) 
WITH SCHEMABINDING
    BEGIN
        IF @s IS NULL
            RETURN NULL
        DECLARE @s2 VARCHAR(256) = '',
                @l INT = LEN(@s),
                @p INT = 1

        WHILE @p <= @l
            BEGIN
                DECLARE @c INT
                SET @c = ASCII(SUBSTRING(@s, @p, 1))
                IF @c BETWEEN 48 AND 57
                   OR  @c BETWEEN 65 AND 90
                   OR  @c BETWEEN 97 AND 122
                    SET @s2 = @s2 + CHAR(@c)
                SET @p = @p + 1
            END

        IF LEN(@s2) = 0
            RETURN NULL

        RETURN @s2

这是另一个递归CTE解决方案,基于@Gerhard Weiss的回答。您应该能够将整个代码块复制并粘贴到SSMS中,并在那里使用它。结果包括一些额外的列,以帮助我们理解发生了什么。我花了一段时间才理解了PATINDEX (RegEx)和递归CTE的全部原理。

DECLARE @DefineBadCharPattern varchar(30)
SET @DefineBadCharPattern = '%[^A-z]%'  --Means anything NOT between A and z characters (according to ascii char value) is "bad"
SET @DefineBadCharPattern = '%[^a-z0-9]%'  --Means anything NOT between a and z characters or numbers 0 through 9 (according to ascii char value) are "bad"
SET @DefineBadCharPattern = '%[^ -~]%'  --Means anything NOT between space and ~ characters (all non-printable characters) is "bad"
--Change @ReplaceBadCharWith to '' to strip "bad" characters from string
--Change to some character if you want to 'see' what's being replaced. NOTE: It must be allowed accoring to @DefineBadCharPattern above
DECLARE @ReplaceBadCharWith varchar(1) = '#'  --Change this to whatever you want to replace non-printable chars with 
IF patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN, @ReplaceBadCharWith) > 0
    BEGIN
        RAISERROR('@ReplaceBadCharWith value (%s) must be a character allowed by PATINDEX pattern of %s',16,1,@ReplaceBadCharWith, @DefineBadCharPattern)
        RETURN
    END
--A table of values to play with:
DECLARE @temp TABLE (OriginalString varchar(100))
INSERT @temp SELECT ' 1hello' + char(13) + char(10) + 'there' + char(30) + char(9) + char(13) + char(10)
INSERT @temp SELECT '2hello' + char(30) + 'there' + char(30)
INSERT @temp SELECT ' 3hello there'
INSERT @temp SELECT ' tab' + char(9) + ' character'
INSERT @temp SELECT 'good bye'

--Let the magic begin:
;WITH recurse AS (
    select
    OriginalString,
    OriginalString as CleanString,
    patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN, OriginalString) as [Position],
    substring(OriginalString,patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN, OriginalString),1) as [InvalidCharacter],
    ascii(substring(OriginalString,patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN, OriginalString),1)) as [ASCIICode]
    from @temp
   UNION ALL
    select
    OriginalString,
    CONVERT(varchar(100),REPLACE(CleanString,InvalidCharacter,@ReplaceBadCharWith)),
    patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString) as [Position],
    substring(CleanString,patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString),1),
    ascii(substring(CleanString,patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString),1))
    from recurse
    where patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString) > 0
)
SELECT * FROM recurse
--optionally comment out this last WHERE clause to see more of what the recursion is doing:
WHERE patindex(@DefineBadCharPattern COLLATE Latin1_General_BIN,CleanString) = 0

使用CTE生成的数字表来检查每个字符,然后FOR XML连接到一个保留值的字符串,您可以…

CREATE FUNCTION [dbo].[PatRemove](
    @pattern varchar(50),
    @expression varchar(8000) 
    )
RETURNS varchar(8000)
AS
BEGIN
    WITH 
        d(d) AS (SELECT d FROM (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) digits(d)),
        nums(n) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM d d1, d d2, d d3, d d4),
        chars(c) AS (SELECT SUBSTRING(@expression, n, 1) FROM nums WHERE n <= LEN(@expression))
    SELECT 
        @expression = (SELECT c AS [text()] FROM chars WHERE c NOT LIKE @pattern FOR XML PATH(''));

    RETURN @expression;
END

SQL Server 2017+的另一个可能的选项,没有循环和/或递归,是使用TRANSLATE()和REPLACE()的基于字符串的方法。

t - sql声明:

DECLARE @pattern varchar(52) = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

SELECT 
   v.[Text], 
   REPLACE(
      TRANSLATE(
         v.[Text],
         REPLACE(TRANSLATE(v.[Text], @pattern, REPLICATE('a', LEN(@pattern))), 'a', ''),
         REPLICATE('0', LEN(REPLACE(TRANSLATE(v.[Text], @pattern, REPLICATE('a', LEN(@pattern))), 'a', '')))
      ),
      '0',
      ''
   ) AS AlphabeticCharacters
FROM (VALUES
   ('abc1234def5678ghi90jkl#@$&'),
   ('1234567890'),
   ('JAHDBESBN%*#*@*($E*sd55bn')
) v ([Text])

或作为一个函数:

CREATE FUNCTION dbo.RemoveNonAlphabeticCharacters (@Text varchar(1000)) 
RETURNS varchar(1000)
AS BEGIN

   DECLARE @pattern varchar(52) = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
   SET @text = REPLACE(
      TRANSLATE(
         @Text,
         REPLACE(TRANSLATE(@Text, @pattern, REPLICATE('a', LEN(@pattern))), 'a', ''),
         REPLICATE('0', LEN(REPLACE(TRANSLATE(@Text, @pattern, REPLICATE('a', LEN(@pattern))), 'a', '')))
      ),
      '0',
      ''
   )
   
   RETURN @Text
END

虽然这篇文章有点老了,但我想说以下几点。 我有上述解决方案的问题是,它没有过滤出字符,如ç, ë, ï等。我调整了一个函数如下(我只使用80 varchar字符串来节省内存):

create FUNCTION dbo.udf_Cleanchars (@InputString varchar(80)) 
RETURNS varchar(80) 
AS 

BEGIN 
declare @return varchar(80) , @length int , @counter int , @cur_char char(1) 
SET @return = '' 
SET @length = 0 
SET @counter = 1 
SET @length = LEN(@InputString) 
IF @length > 0 
BEGIN WHILE @counter <= @length 

BEGIN SET @cur_char = SUBSTRING(@InputString, @counter, 1) IF ((ascii(@cur_char) in (32,44,46)) or (ascii(@cur_char) between 48 and 57) or (ascii(@cur_char) between 65 and 90) or (ascii(@cur_char) between 97 and 122))
BEGIN SET @return = @return + @cur_char END 
SET @counter = @counter + 1 
END END 

RETURN @return END