代码之家  ›  专栏  ›  技术社区  ›  moodymudskipper

在sqlite中使用子查询更新表

  •  8
  • moodymudskipper  · 技术社区  · 7 年前

    我想使用 ALTER TABLE UPDATE 语句不重新创建完整表。

    在My中使用子查询时 更新 语句我没有得到预期的输出。

    建立可复制的数据

    library(dplyr)
    library(dbplyr)
    library(DBI)
    con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
    copy_to(con, iris[c(1,2,51),],"iris")
    
    tbl(con,"iris")
    # # Source:   table<iris> [?? x 5]
    # # Database: sqlite 3.19.3 []
    #   Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
    #          <dbl>       <dbl>        <dbl>       <dbl>      <chr>
    # 1          5.1         3.5          1.4         0.2     setosa
    # 2          4.9         3.0          1.4         0.2     setosa
    # 3          7.0         3.2          4.7         1.4 versicolor
    

    在单独的表中创建新列

    DBI::dbSendQuery(con, "CREATE TABLE new_table AS SELECT t2.new_col from
                     iris t1 inner join 
                     (SELECT Species, sum(`Sepal.Width`) as new_col FROM iris GROUP BY Species) t2
                     on t1.Species = t2.Species")
    
    tbl(con,"new_table")
    # # Source:   table<new_table> [?? x 1]
    # # Database: sqlite 3.19.3 []
    #   new_col
    #     <dbl>
    # 1     6.5
    # 2     6.5
    # 3     3.2
    

    在旧表中创建新列

    DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN new_col DOUBLE")
    

    尝试从中插入新列 new_table 那里

    DBI::dbSendQuery(con, "UPDATE iris SET new_col = (SELECT new_col FROM new_table)")
    
    tbl(con,"iris")
    # # Source:   table<iris> [?? x 6]
    # # Database: sqlite 3.19.3 []
    #   Sepal.Length Sepal.Width Petal.Length Petal.Width    Species new_col
    #          <dbl>       <dbl>        <dbl>       <dbl>      <chr>   <dbl>
    # 1          5.1         3.5          1.4         0.2     setosa     6.5
    # 2          4.9         3.0          1.4         0.2     setosa     6.5
    # 3          7.0         3.2          4.7         1.4 versicolor     6.5
    

    正如你所看到的 new_col 仅包含值 6.5 在我期望的地方 3.2 在最后一排。我该怎么解决?

    1 回复  |  直到 6 年前
        1
  •  5
  •   Ralf Stubner    7 年前

    SQL数据库中表中的行没有固有的顺序。因此,不能像在r中那样分配值的“向量”。但是,可以稍微修改查询:

    library(dplyr)
    library(DBI)
    con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
    copy_to(con, iris[c(1,2,51),],"iris")
    

    用聚合数据创建单独的表

    DBI::dbSendQuery(con, "CREATE TABLE new_table AS 
                           SELECT Species, sum(`Sepal.Width`) as new_col FROM iris GROUP BY Species")
    
    tbl(con,"new_table")
    #> # Source:   table<new_table> [?? x 2]
    #> # Database: sqlite 3.22.0 []
    #>   Species    new_col
    #>   <chr>        <dbl>
    #> 1 setosa         6.5
    #> 2 versicolor     3.2
    

    在旧表中创建新列

    DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN new_col DOUBLE")
    

    使用相关子查询将数据移动到原始表

    DBI::dbSendQuery(con, "UPDATE iris SET new_col = (SELECT new_col FROM new_table t2
                                   WHERE iris.Species = t2.Species)")
    
    tbl(con,"iris")
    #> # Source:   table<iris> [?? x 6]
    #> # Database: sqlite 3.22.0 []
    #>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species    new_col
    #>          <dbl>       <dbl>        <dbl>       <dbl> <chr>        <dbl>
    #> 1          5.1         3.5          1.4         0.2 setosa         6.5
    #> 2          4.9         3            1.4         0.2 setosa         6.5
    #> 3          7           3.2          4.7         1.4 versicolor     3.2
    

    如果有多个计算列,可以使用 UPDATE ... SET (c1, c2, ...) = (...) 这样地:

    library(dplyr)
    library(dbplyr)
    library(DBI)
    con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
    copy_to(con, iris[c(1,2,51),],"iris")
    
    DBI::dbSendQuery(con, "CREATE TABLE aggs AS 
                           SELECT Species, 
                                  SUM(`Sepal.Width`) AS sw_sum,
                                  AVG(`Sepal.Width`) AS sw_avg 
                           FROM iris GROUP BY Species")
    tbl(con,"aggs")
    #> # Source:   table<aggs> [?? x 3]
    #> # Database: sqlite 3.22.0 []
    #>   Species    sw_sum sw_avg
    #>   <chr>       <dbl>  <dbl>
    #> 1 setosa        6.5   3.25
    #> 2 versicolor    3.2   3.2
    
    DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_sum DOUBLE")
    DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_avg DOUBLE")
    
    DBI::dbSendQuery(con, "UPDATE iris 
                           SET (sw_sum, sw_avg) = (SELECT sw_sum, sw_avg 
                               FROM aggs WHERE iris.Species = aggs.Species)")
    
    tbl(con,"iris")
    #> # Source:   table<iris> [?? x 7]
    #> # Database: sqlite 3.22.0 []
    #>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species  sw_sum sw_avg
    #>          <dbl>       <dbl>        <dbl>       <dbl> <chr>     <dbl>  <dbl>
    #> 1          5.1         3.5          1.4         0.2 setosa      6.5   3.25
    #> 2          4.9         3            1.4         0.2 setosa      6.5   3.25
    #> 3          7           3.2          4.7         1.4 versico…    3.2   3.2
    

    这也适用于postgres,但可能不适用于sql server。

    实际上,在这种情况下不需要中间表:

    library(dplyr)
    library(dbplyr)
    library(DBI)
    con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
    copy_to(con, iris[c(1,2,51),],"iris")
    
    DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_sum DOUBLE")
    DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_avg DOUBLE")
    
    DBI::dbSendQuery(con, "UPDATE iris 
                           SET (sw_sum, sw_avg) = 
                                  (SELECT sw_sum, sw_avg FROM 
                                        (SELECT Species, 
                                                SUM(`Sepal.Width`) AS sw_sum, 
                                                AVG(`Sepal.Width`) AS sw_avg 
                                         FROM iris GROUP BY Species) aggs 
                                   WHERE iris.Species = aggs.Species)")
    
    tbl(con,"iris")
    #> # Source:   table<iris> [?? x 7]
    #> # Database: sqlite 3.22.0 []
    #>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species  sw_sum sw_avg
    #>          <dbl>       <dbl>        <dbl>       <dbl> <chr>     <dbl>  <dbl>
    #> 1          5.1         3.5          1.4         0.2 setosa      6.5   3.25
    #> 2          4.9         3            1.4         0.2 setosa      6.5   3.25
    #> 3          7           3.2          4.7         1.4 versico…    3.2   3.2
    

    不过,中间表在其他情况下可能会有所帮助。例如,在链接问题中使用r创建时。