代码之家  ›  专栏  ›  技术社区  ›  JtR

如何编写统一diff语法的解析器

  •  4
  • JtR  · 技术社区  · 14 年前

    here .

    4 回复  |  直到 14 年前
        1
  •  3
  •   michael.kebe    14 年前

    下面是一个使用 RegexParsers

    import scala.util.parsing.combinator.RegexParsers
    
    object UnifiedDiffParser extends RegexParsers {
    
      // case classes representing the data of the diff
      case class UnifiedDiff(oldFile: File, newFile: File, changeChunks: List[ChangeChunk])
      case class File(name: String, timeStamp: String)
      case class ChangeChunk(rangeInformation: RangeInformation, changeLines: List[String])
      case class RangeInformation(oldOffset: Int, oldLength: Int, newOffset: Int, newLength: Int)
    
      override def skipWhitespace = false
    
      def unifiedDiff: Parser[UnifiedDiff] = oldFile ~ newFile ~ rep1(changeChunk) ^^ {
        case of ~ nf ~ l => UnifiedDiff(of, nf, l)
      }   
    
      def oldFile: Parser[File] = ("--- " ~> filename) ~ ("""\s+""".r ~> timestamp <~ newline) ^^ {
        case f~t => File(f, t)
      }   
      def newFile: Parser[File] = ("+++ " ~> filename) ~ ("""\s+""".r ~> timestamp <~ newline) ^^ {
        case f~t => File(f, t)
      }   
      def filename: Parser[String] = """[\S]+""".r
      def timestamp: Parser[String] = """.*""".r
    
      def changeChunk: Parser[ChangeChunk] = rangeInformation ~ (newline ~> rep1(lineChange)) ^^ {
        case ri ~ l => ChangeChunk(ri, l)
      }   
      def rangeInformation: Parser[RangeInformation] = ("@@ " ~> "-" ~> number) ~ ("," ~> number) ~ (" +" ~> number) ~ ("," ~> number) <~ " @@" ^^ {
        case a ~ b ~ c ~ d => RangeInformation(a, b, c, d)
      }   
    
      def lineChange: Parser[String] = contextLine | addedLine | deletedLine
      def contextLine: Parser[String] = """ .*""".r <~ newline
      def addedLine: Parser[String] = """\+.*""".r <~ newline
      def deletedLine: Parser[String] = """-.*""".r <~ newline
    
      def newline: Parser[String] = """\n""".r
      def number: Parser[Int] = """\d+""".r ^^ {_.toInt}
    
      def main(args: Array[String]) {
        val reader = { 
          if (args.length == 0) {
            // read from stdin
            Console.in
          } else {
            new java.io.FileReader(args(0))
          }   
        }   
        println(parseAll(unifiedDiff, reader))
      }   
    }   
    
        2
  •  5
  •   Daniel C. Sobral    14 年前

    我会用正则表达式。它简化了一些事情,并使其余的标准化。

    def process(src: scala.io.Source) {
      import scala.util.matching.Regex
    
      val FilePattern = """(.*) ''(.*)''"""
      val OriginalFile = new Regex("--- "+FilePattern, "path", "timestamp")
      val NewFile = new Regex("+++ "+FilePattern, "path", "timestamp")
      val Chunk = new Regex("""@@ -(\d+),(\d+) +(\d+),(\d+) @@""", "orgStarting", "orgSize", "newStarting", "newSize")
      val AddedLine = """+(.*)""".r
      val RemovedLine = """-(.*)""".r
      val UnchangedLine = """ (.*)""".r
    
      src.getLines() foreach {
        case OriginalFile(path, timestamp) => println("Original file: "+path)
        case NewFile(path, timestamp) => println("New file: "+path)
        case Chunk(l1, s1, l2, s2) => println("Modifying %d lines at line %d, to %d lines at %d" format (s1, l1, s2, l2))
        case AddedLine(line) => println("Adding line "+line)
        case RemovedLine(line) => println("Removing line "+line)
        case UnchangedLine(line) => println("Keeping line "+line)
      }
    }
    
        3
  •  4
  •   Radomir Dopieralski    14 年前

    当然,如果你想学习如何使用一些解析库,那就去学吧。

        4
  •  1
  •   mycroft    13 年前

    在为git diff构建Scala解析器时偶然发现了这个问题,该解析器是通过运行 git diff-tree

    我在很大程度上依赖于上面的答案,并最终编写了这里包含的解析器。当然,这并不是严格意义上的原始海报,但我认为它可能对其他人有用。

    import util.parsing.combinator._
    
    object GitDiff {
      // file names have "a/" or "b/" as prefix, need to drop that to compare
      def apply (files: (String,String), op: FileOperation, chunks: List[ChangeChunk]) = {
        def strip(s: String) = s.dropWhile(_ != '/').drop(1)
        new GitDiff( strip( files._1 ), strip( files._2 ), op, chunks )
      }
    }
    
    case class GitDiff(oldFile: String, newFile: String, op: FileOperation, chunks: List[ChangeChunk]) {
      val isRename = oldFile != newFile
    }
    
    sealed trait FileOperation
    case class NewFile(mode: Int) extends FileOperation
    case class DeletedFile(mode: Int) extends FileOperation
    case object UpdatedFile extends FileOperation
    
    sealed trait LineChange { def line: String }
    case class ContextLine(line: String) extends LineChange
    case class LineRemoved(line: String) extends LineChange
    case class LineAdded(line: String) extends LineChange
    case class RangeInformation(oldOffset: Int, oldLength: Int, newOffset: Int, newLength: Int)
    case class ChangeChunk(rangeInformation: RangeInformation, changeLines: List[LineChange])
    
    // Code taken from http://stackoverflow.com/questions/3560073/how-to-write-parser-for-unified-diff-syntax
    object GitDiffParser extends RegexParsers {
    
      override def skipWhitespace = false
    
      def allDiffs: Parser[List[GitDiff]] = rep1(gitDiff)
    
      def gitDiff: Parser[GitDiff] = filesChanged ~ fileOperation ~ diffChunks ^^ {
        case files ~ op ~ chunks => GitDiff(files, op, chunks)
      }
    
      def filesChanged: Parser[(String, String)] =
        "diff --git " ~> filename ~ (" " ~> filename) <~ newline ^^ { case f1 ~ f2 => (f1,f2) }
    
      def fileOperation: Parser[FileOperation] =
        opt(deletedFileMode | newFileMode) <~ index ^^ { _ getOrElse UpdatedFile }
    
      def index: Parser[Any] = ( "index " ~ hash ~ ".." ~ hash ) ~> opt(" " ~> mode) <~ newline
      def deletedFileMode: Parser[DeletedFile] = "deleted file mode " ~> mode <~ newline ^^ { m => DeletedFile(m) }
      def newFileMode: Parser[NewFile] = "new file mode " ~> mode <~ newline ^^ { m => NewFile(m) }
      def hash: Parser[String] = """[0-9a-f]{7}""".r
      def mode: Parser[Int] = """\d{6}""".r ^^ { _.toInt }
    
      def diffChunks: Parser[List[ChangeChunk]] = (oldFile ~ newFile) ~> rep1(changeChunk)
    
      def oldFile: Parser[String] = "--- " ~> filename <~ newline
      def newFile: Parser[String] = "+++ " ~> filename <~ newline
      def filename: Parser[String] = """[\S]+""".r
    
      def changeChunk: Parser[ChangeChunk] = rangeInformation ~ opt(contextLine) ~ (opt(newline) ~> rep1(lineChange)) ^^ {
        case ri ~ opCtx ~ lines => ChangeChunk(ri, opCtx map (_ :: lines) getOrElse (lines))
      }
      def rangeInformation: Parser[RangeInformation] =
        ("@@ " ~> "-" ~> number) ~ opt("," ~> number) ~ (" +" ~> number) ~ opt("," ~> number) <~ " @@" ^^ {
          case a ~ b ~ c ~ d => RangeInformation(a, b getOrElse 0, c, d getOrElse 0)
        }
    
      def lineChange: Parser[LineChange] = contextLine | addedLine | deletedLine
      def contextLine: Parser[ContextLine] = " " ~> """.*""".r <~ newline ^^ { l => ContextLine(l) }
      def addedLine: Parser[LineAdded] = "+" ~> """.*""".r <~ newline ^^ { l => LineAdded(l) }
      def deletedLine: Parser[LineRemoved] = "-" ~> """.*""".r <~ newline ^^ { l => LineRemoved(l) }
    
      def newline: Parser[String] = """\n""".r
      def number: Parser[Int] = """\d+""".r ^^ { _.toInt }
    
      def parse(str: String) = parseAll(allDiffs, str)
    
      def main(args: Array[String]) {
        val reader = {
          if (args.length == 0) {
            // read from stdin
            Console.in
          } else {
            new java.io.FileReader(args(0))
          }
        }
        parseAll(allDiffs, reader) match {
          case Success(s,_) => println( s )
          case NoSuccess(msg,_) => sys.error("ERROR: " + msg)
        }
      }
    }