代码之家  ›  专栏  ›  技术社区  ›  Andreas Grech

仅当delimeter未包含在引号中时才拆分字符串

  •  4
  • Andreas Grech  · 技术社区  · 16 年前

    ' " ).

    "peanut, butter, jelly"
      -> ["peanut", "butter", "jelly"]
    
    "peanut, 'butter, bread', 'jelly'"
      -> ["peanut", "butter, bread", "jelly"]
    
    'peanut, "butter, bread", "jelly"'
      -> ["peanut", 'butter, bread', "jelly"]
    

    我不能使用JavaScript的原因 split 方法的原因是,当分隔符用引号括起时,它也会拆分。

    我怎样才能做到这一点,也许是用正则表达式?


    关于上下文,我将使用它来分割从第三个参数的第三个元素传递到扩展jQuery的 $.expr[':'] . 通常,此参数的名称被调用 meta ,该数组包含有关筛选器的特定信息。

    无论如何,这个数组的第三个元素是一个字符串,它包含了过滤器传递的参数;由于参数是字符串格式的,我需要能够正确地拆分它们进行解析。

    4 回复  |  直到 16 年前
        1
  •  3
  •   Community Mohan Dere    8 年前

    您需要的基本上是一个Javascript CSV解析器。在“JavaScriptCSV解析器”上进行谷歌搜索,你会得到大量的点击,其中许多是完整的脚本。另见 Javascript code to parse CSV data

        2
  •  1
  •   Thomas Eding    16 年前

    嗯,我已经有了一个解决方案的手锤(为其他东西编写的通用代码),所以只是为了好玩。

    function Lexer () {
      this.setIndex = false;
      this.useNew = false;
      for (var i = 0; i < arguments.length; ++i) {
        var arg = arguments [i];
        if (arg === Lexer.USE_NEW) {
          this.useNew = true;
        }
        else if (arg === Lexer.SET_INDEX) {
          this.setIndex = Lexer.DEFAULT_INDEX;
        }
        else if (arg instanceof Lexer.SET_INDEX) {
          this.setIndex = arg.indexProp;
        }
      }
      this.rules = [];
      this.errorLexeme = null;
    }
    
    Lexer.NULL_LEXEME = {};
    
    Lexer.ERROR_LEXEME = { 
      toString: function () {
        return "[object Lexer.ERROR_LEXEME]";
      }
    };
    
    Lexer.DEFAULT_INDEX = "index";
    
    Lexer.USE_NEW = {};
    
    Lexer.SET_INDEX = function (indexProp) {
      if ( !(this instanceof arguments.callee)) {
        return new arguments.callee.apply (this, arguments);
      }
      if (indexProp === undefined) {
        indexProp = Lexer.DEFAULT_INDEX;
      }
      this.indexProp = indexProp;
    };
    
    (function () {
      var New = (function () {
        var fs = [];
        return function () {
          var f = fs [arguments.length];
          if (f) {
            return f.apply (this, arguments);
          }
          var argStrs = [];
          for (var i = 0; i < arguments.length; ++i) {
            argStrs.push ("a[" + i + "]");
          }
          f = new Function ("var a=arguments;return new this(" + argStrs.join () + ");");
          if (arguments.length < 100) {
            fs [arguments.length] = f;
          }
          return f.apply (this, arguments);
        };
      }) ();
    
      var flagMap = [
          ["global", "g"]
        , ["ignoreCase", "i"]
        , ["multiline", "m"]
        , ["sticky", "y"]
        ];
    
      function getFlags (regex) {
        var flags = "";
        for (var i = 0; i < flagMap.length; ++i) {
          if (regex [flagMap [i] [0]]) {
            flags += flagMap [i] [1];
          }
        }
        return flags;
      }
    
      function not (x) {
        return function (y) {
          return x !== y;
        };
      }
    
      function Rule (regex, lexeme) {
        if (!regex.global) {
          var flags = "g" + getFlags (regex);
          regex = new RegExp (regex.source, flags);
        }
        this.regex = regex;
        this.lexeme = lexeme;
      }
    
      Lexer.prototype = {
          constructor: Lexer
    
        , addRule: function (regex, lexeme) {
            var rule = new Rule (regex, lexeme);
            this.rules.push (rule);
          }
    
        , setErrorLexeme: function (lexeme) {
            this.errorLexeme = lexeme;
          }
    
        , runLexeme: function (lexeme, exec) {
            if (typeof lexeme !== "function") {
              return lexeme;
            }
            var args = exec.concat (exec.index, exec.input);
            if (this.useNew) {
              return New.apply (lexeme, args);
            }
            return lexeme.apply (null, args);
          }
    
        , lex: function (str) {
            var index = 0;
            var lexemes = [];
            if (this.setIndex) {
              lexemes.push = function () {
                for (var i = 0; i < arguments.length; ++i) {
                  if (arguments [i]) {
                    arguments [i] [this.setIndex] = index;
                  }
                }
                return Array.prototype.push.apply (this, arguments);
              };
            }
            while (index < str.length) {
              var bestExec = null;
              var bestRule = null;
              for (var i = 0; i < this.rules.length; ++i) {
                var rule = this.rules [i];
                rule.regex.lastIndex = index;
                var exec = rule.regex.exec (str);
                if (exec) {
                  var doUpdate = !bestExec 
                    || (exec.index < bestExec.index)
                    || (exec.index === bestExec.index && exec [0].length > bestExec [0].length)
                    ;
                  if (doUpdate) {
                    bestExec = exec;
                    bestRule = rule;
                  }
                }
              }
              if (!bestExec) {
                if (this.errorLexeme) {
                  lexemes.push (this.errorLexeme);
                  return lexemes.filter (not (Lexer.NULL_LEXEME));
                }
                ++index;
              }
              else {
                if (this.errorLexeme && index !== bestExec.index) {
                  lexemes.push (this.errorLexeme);
                }
                var lexeme = this.runLexeme (bestRule.lexeme, bestExec);
                lexemes.push (lexeme);
              }
              index = bestRule.regex.lastIndex;
            }
            return lexemes.filter (not (Lexer.NULL_LEXEME));
          }
      };
    }) ();
    
    if (!Array.prototype.filter) {
      Array.prototype.filter = function (fun) {
        var len = this.length >>> 0;
        var res = [];
        var thisp = arguments [1];
        for (var i = 0; i < len; ++i) {
          if (i in this) {
            var val = this [i];
            if (fun.call (thisp, val, i, this)) {
              res.push (val);
            }
          }
        }
        return res;
      };
    }
    

    function trim (str) {
      str = str.replace (/^\s+/, "");
      str = str.replace (/\s+$/, "");
      return str;
    }
    
    var splitter = new Lexer ();
    splitter.setErrorLexeme (Lexer.ERROR_LEXEME);
    splitter.addRule (/[^,"]*"[^"]*"[^,"]*/g, trim);
    splitter.addRule (/[^,']*'[^']*'[^,']*/g, trim);
    splitter.addRule (/[^,"']+/g, trim);
    splitter.addRule (/,/g, Lexer.NULL_LEXEME);
    
    var strs = [
        "peanut, butter, jelly"
      , "peanut, 'butter, bread', 'jelly'"
      , 'peanut, "butter, bread", "jelly"'
      ];
    
    // NOTE: I'm lazy here, so I'm using Array.prototype.map, 
    //       which isn't supported in all browsers.
    var splitStrs = strs.map (function (str) {
      return splitter.lex (str);
    });
    
        3
  •  1
  •   watain    16 年前
    var str = 'text, foo, "haha, dude", bar';
    var fragments = str.match(/[a-z]+|(['"]).*?\1/g);
    

    甚至更好(支持) " ' 内部 ):

    var str = 'text_123 space, foo, "text, here\", dude", bar, \'one, two\', blob';
    var fragments = str.match(/[^"', ][^"',]+[^"', ]|(["'])(?:[^\1\\\\]|\\\\.)*\1/g);
    
    // Result:
    0: text_123 space
    1: foo
    2: "text, here\", dude"
    3: bar
    4: 'one, two'
    5: blob
    
        4
  •  -1
  •   Community Mohan Dere    8 年前

    如果您可以控制输入以强制将字符串括在双引号中 " 所有包含该字符串的元素都将用单引号括起来 ' ,并且任何元素都不能包含单个引号,则可以在 , ' RegEx match open tags except XHTML self-contained tags