代码之家  ›  专栏  ›  技术社区  ›  Hacker

在字符串中分隔整数和文本

  •  9
  • Hacker  · 技术社区  · 15 年前

    我有类似于fulldata1到fulldata10的字符串,在这里我需要分离出整数和文本部分。我该如何使用javascript呢?

    3 回复  |  直到 15 年前
        1
  •  10
  •   laggingreflex    7 年前

    将字符串按整数拆分为数组:

    myArray = datastring.split(/([0-9]+)/)

    然后是 myArray 会有点像 fullData 第二个是一些数字,比如 1 10 .

    如果你的绳子是 fullData10foo 那么你就有一个数组 ['fullData', 10, 'foo']

    你也可以:

    • .split(/(?=\d+)/) 哪个会屈服 ["fullData", "1", "0"]

    • .split(/(\d+)/) 哪个会屈服 ["fullData", "10", ""]

    • 另外 .filter(Boolean) 去掉任何空字符串( "" )

        2
  •  1
  •   Kangkan    15 年前

    如果字符部分的长度是常量,则可以使用子字符串方法很好地删除它们。

        3
  •  1
  •   CodeManX    9 年前

    DR

    如果 RegExp sticky flag 在您的JS环境中受支持,请使用它以获得最佳性能。

    基准

    以下是8种不同的实现,用于从其他字符中分离数字:

    function naturalSplit(str) {
        'use strict';
        let arr = [];
        let split = str.split(/(\d+)/);
        for (let i in split) {
            let s = split[i];
            if (s !== "") {
                if (i % 2) {
                    arr.push(+s);
                } else {
                    arr.push(s);
                }
            }
        }
        return arr;
    }
    
    function naturalSplit2(str) {
        'use strict';
        return str.split(/(\d+)/)
            .map((elem, i) => {
                if (i % 2) {
                    return +elem;
                }
                return elem;
            })
            .filter(elem => elem !== "");
    }
    
    function naturalSplitMapFilterUnaryPlus(str) {
        'use strict';
        return str.split(/(\d+)/)
            .map((elem, i) => i % 2 ? +elem : elem)
            .filter(elem => elem !== "");
    }
    
    function naturalSplitMapFilterNumber(str) {
        'use strict';
        return str.split(/(\d+)/)
            .map((elem, i) => i % 2 ? Number(elem) : elem)
            .filter(elem => elem !== "");
    }
    
    function naturalConcat(str) {
        'use strict';
        const arr = [];
        let i = 0;
        while (i < str.length) {
            let token = "";
            while (i < str.length && str[i] >= "0" && str[i] <= "9") {
                token += str[i];
                i++;
            }
            if (token) {
                arr.push(Number(token));
                token = "";
            }
            while (i < str.length && (str[i] < "0" || str[i] > "9")) {
                token += str[i];
                i++;
            }
            if (token) {
                arr.push(token);
            }
        }
        return arr;
    }
    
    function naturalMatch(str) {
        'use strict';
        const arr = [];
        const num_re = /^(\D+)?(\d+)?(.*)$/;
        let s = str;
        while (s) {
            const match = s.match(num_re);
            if (!match) {
                break;
            }
            if (match[1]) {
                arr.push(match[1]);
            }
            if (match[2]) {
                arr.push(Number(match[2]));
            }
            s = match[3];
        }
        return arr;
    }
    
    function naturalExecSticky(str) {
        'use strict';
        const arr = [];
        const num_re = /(\D+)?(\d+)?/y;
        let match;
        do {
            match = num_re.exec(str);
            if (match[1] !== undefined) {
                arr.push(match[1]);
            }
            if (match[2] !== undefined) {
                arr.push(Number(match[2]));
            }
        } while (match[0]);
        return arr;
    }
    
    function naturalSlice(str) {
        'use strict';
        const arr = [];
        let i = 0;
        while (i < str.length) {
            let j = 0;
            while ((i + j) < str.length && str[i + j] >= "0" && str[i + j] <= "9") {
                j++;
            }
            if (j) {
                arr.push(Number(str.substr(i, j)));
                i += j;
                j = 0;
            }
            while ((i + j) < str.length && (str[i + j] < "0" || str[i + j] > "9")) {
                j++;
            }
            if (j) {
                arr.push(str.substr(i, j));
                i += j;
            }
        }
        return arr;
    }
    
    const algorithms = [
        naturalSplit,
        naturalSplit2,
        naturalSplitMapFilterUnaryPlus,
        naturalSplitMapFilterNumber,
        naturalConcat,
        naturalSlice,
        naturalMatch,
        naturalExecSticky
    ];
    
    (function(){
        'use strict';
    
        let randomTests = [];
        for (let i = 0; i < 100000; i++) {
            randomTests.push({str: Math.random().toString(36).slice(2)});
        }
    
        const tests = [
            {str: "112233", expect: [112233]},
            {str: "foo bar baz", expect: ["foo bar baz"]},
            {str: "foo11bar22baz", expect: ["foo", 11, "bar", 22, "baz"]},
            {str: "11foo22bar33baz", expect: [11, "foo", 22, "bar", 33, "baz"]},
            {str: "foo11bar22baz33", expect: ["foo", 11, "bar", 22, "baz", 33]},
            {str: "11foo22bar33baz44", expect: [11, "foo", 22, "bar", 33, "baz", 44]},
            {str: "", expect: []},
            //{str: "99999999999999999999999999999999999999999999999999999999999999999999999999999999999", expect: ""}, // number too large for JS = ?
            {str: "Li Europan 0234 lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular. Li lingues differe solmen in li 0.00 grammatica, -1e5 li pronunciation e li plu commun vocabules. Omnicos directe al desirabilite de un nov lingua franca: On refusa continuar payar custosi traductores. At solmen va 8esser necessi far uniform grammatica, pronunciation 025.35 e plu sommun paroles. Ma +234234 quande lingues coalesce, li grammatica del resultant lingue es plu simplic e 432 regulari quam ti del coalescent9 lingues. Li nov 90548 lingua franca va esser plu simplic e 23453 regulari quam li existent 234898234 Europan lingues. It va esser tam simplic23423452349819879234quam Occidental in fact, it va esser Occidental. A un Angleso it va semblar un simplificat Angles, quam un skeptic 89723894 Cambridge amico dit me que Occidental es.Li Europan lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular. Li 3,4,5,6,7,8 lingues differe solmen in li grammatica, li 495 pronunciation e li plu commun -45345 vocabules. Omnicos directe al desirabilite de un nov lingua franca: On refusa continuar payar custosi traductores. At solmen va esser necessi far uniform grammatica, pronunciation e plu sommun paroles.",
            expect: ["Li Europan ", 234, " lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular. Li lingues differe solmen in li ", 0, ".", 0, " grammatica, -", 1, "e", 5, " li pronunciation e li plu commun vocabules. Omnicos directe al desirabilite de un nov lingua franca: On refusa continuar payar custosi traductores. At solmen va ", 8, "esser necessi far uniform grammatica, pronunciation ", 25, ".", 35, " e plu sommun paroles. Ma +", 234234, " quande lingues coalesce, li grammatica del resultant lingue es plu simplic e ", 432, " regulari quam ti del coalescent", 9, " lingues. Li nov ", 90548, " lingua franca va esser plu simplic e ", 23453, " regulari quam li existent ", 234898234, " Europan lingues. It va esser tam simplic", 23423452349819879234, "quam Occidental in fact, it va esser Occidental. A un Angleso it va semblar un simplificat Angles, quam un skeptic ", 89723894, " Cambridge amico dit me que Occidental es.Li Europan lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular. Li ", 3, ",", 4, ",", 5, ",", 6, ",", 7, ",", 8, " lingues differe solmen in li grammatica, li ", 495, " pronunciation e li plu commun -", 45345, " vocabules. Omnicos directe al desirabilite de un nov lingua franca: On refusa continuar payar custosi traductores. At solmen va esser necessi far uniform grammatica, pronunciation e plu sommun paroles."]}
        ];
    
        for (let t of tests) {
            console.log('\nTest "' + t.str.slice(0, 20) + '"');
            for (let f of algorithms) {
                console.time(f.name);
                for (let i = 0; i < 1000; i++) {
                    let result = f(t.str);
                }
                console.timeEnd(f.name);
            }
        }
        console.log('\nRandom tests')
        for (let f of algorithms) {
            console.time(f.name);
            for (let r of randomTests) {
                let result = f(r.str);
            }
            console.timeEnd(f.name);
        }
    })();
    

    我的测试结果

    使用Nodejs 5.11.0 --harmony_regexps --regexp-optimization :

    Test "112233"
    naturalSplit: 2.817ms
    naturalSplit2: 3.033ms
    naturalSplitMapFilterUnaryPlus: 3.199ms
    naturalSplitMapFilterNumber: 1.910ms
    naturalConcat: 0.876ms
    naturalSlice: 1.274ms
    naturalMatch: 0.960ms
    naturalExecSticky: 0.863ms
    
    Test "foo bar baz"
    naturalSplit: 1.072ms
    naturalSplit2: 0.839ms
    naturalSplitMapFilterUnaryPlus: 0.800ms
    naturalSplitMapFilterNumber: 0.802ms
    naturalConcat: 0.952ms
    naturalSlice: 0.697ms
    naturalMatch: 0.577ms
    naturalExecSticky: 1.329ms
    
    Test "foo11bar22baz"
    naturalSplit: 3.410ms
    naturalSplit2: 2.398ms
    naturalSplitMapFilterUnaryPlus: 2.083ms
    naturalSplitMapFilterNumber: 6.107ms
    naturalConcat: 1.627ms
    naturalSlice: 1.633ms
    naturalMatch: 2.070ms
    naturalExecSticky: 1.697ms
    
    Test "11foo22bar33baz"
    naturalSplit: 3.572ms
    naturalSplit2: 2.805ms
    naturalSplitMapFilterUnaryPlus: 2.691ms
    naturalSplitMapFilterNumber: 2.570ms
    naturalConcat: 1.990ms
    naturalSlice: 1.983ms
    naturalMatch: 2.474ms
    naturalExecSticky: 1.591ms
    
    Test "foo11bar22baz33"
    naturalSplit: 3.439ms
    naturalSplit2: 2.637ms
    naturalSplitMapFilterUnaryPlus: 2.613ms
    naturalSplitMapFilterNumber: 4.554ms
    naturalConcat: 1.958ms
    naturalSlice: 2.002ms
    naturalMatch: 0.686ms
    naturalExecSticky: 0.792ms
    
    Test "11foo22bar33baz44"
    naturalSplit: 3.916ms
    naturalSplit2: 2.824ms
    naturalSplitMapFilterUnaryPlus: 2.843ms
    naturalSplitMapFilterNumber: 2.685ms
    naturalConcat: 2.164ms
    naturalSlice: 2.246ms
    naturalMatch: 0.981ms
    naturalExecSticky: 0.961ms
    
    Test ""
    naturalSplit: 1.579ms
    naturalSplit2: 2.993ms
    naturalSplitMapFilterUnaryPlus: 1.356ms
    naturalSplitMapFilterNumber: 1.201ms
    naturalConcat: 0.029ms
    naturalSlice: 0.029ms
    naturalMatch: 0.025ms
    naturalExecSticky: 0.186ms
    
    Test "Li Europan 0234 ling"
    naturalSplit: 25.771ms
    naturalSplit2: 14.735ms
    naturalSplitMapFilterUnaryPlus: 14.905ms
    naturalSplitMapFilterNumber: 13.707ms
    naturalConcat: 90.956ms
    naturalSlice: 54.905ms
    naturalMatch: 20.436ms
    naturalExecSticky: 5.915ms
    
    Random tests
    naturalSplit: 376.622ms
    naturalSplit2: 293.722ms
    naturalSplitMapFilterUnaryPlus: 286.914ms
    naturalSplitMapFilterNumber: 281.534ms
    naturalConcat: 234.996ms
    naturalSlice: 233.745ms
    naturalMatch: 100.181ms
    naturalExecSticky: 100.647ms
    

    naturalMatch 明显比其他人快-除了 naturalExecSticky ,这是在标准杆上,但有时甚至更优越(4倍长的输入字符串)。

    btw:函数被调用 natural... ,因为结果对于自然排序很有用(“file10”在“file2”之后,而不是“file1”,它是字母顺序)。