代码之家  ›  专栏  ›  技术社区  ›  Homer Jay Simpson

添加列出现次数

  •  1
  • Homer Jay Simpson  · 技术社区  · 1 年前

    我在SQL Server中找到了名为df的表 here :

    -- Parameters
    DECLARE @Year INT = 2020; --, @Country varchar(50)= 'Brazil';
    
    WITH ModeData AS (
        SELECT country, 
               a.Mode
        FROM df
        CROSS APPLY (
            SELECT TOP 1 Mode, COUNT(*) AS cnt
            FROM (VALUES (val1), (val2), (val3)) AS t(Mode)
            GROUP BY Mode
            ORDER BY COUNT(*) DESC
        ) a
      where year=@year --and  country=@country 
    )
    
    -- Calculate proportions and map modes to labels
    , Proportions AS (
        SELECT country, 
               CASE 
                   WHEN Mode = 1 THEN 'Very Dissatisfied'
                   WHEN Mode = 2 THEN 'Dissatisfied'
                   WHEN Mode = 3 THEN 'Neutral'
                   WHEN Mode = 4 THEN 'Satisfied'
                   WHEN Mode = 5 THEN 'Very Satisfied'
               END AS SatisfactionLevel,
               COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country) AS Proportion
        FROM ModeData
        GROUP BY country, Mode
    )
    
    -- Pivot the results to get each satisfaction level as a column
    SELECT country, 
           [Very Dissatisfied], 
           [Dissatisfied], 
           [Neutral], 
           [Satisfied], 
           [Very Satisfied]
    FROM Proportions
    PIVOT (
        MAX(Proportion)
        FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])
    ) AS p
    ORDER BY country;
    

    结果表为:

    国家 非常不满意 不满意的 中立 满意的 非常满意
    巴西 0.285714285714 0.142857142857 0.142857142857 0.142857142857 0.285714285714
    加拿大 0.111111111111 0.111111111111 0.333333333333 0.222222222222 0.222222222222
    法国 0.250000000000 0.125000000000 0.250000000000 0.250000000000 0.125000000000
    意大利 0.166666666666 0.166666666666 0.166666666666 0.166666666666 0.333333333333
    美国 0.222222222222 0.111111111111 0.111111111111 0.333333333333 0.222222222222

    我想计算每个国家的数量。df表中每个国家有多少行,并将此计数作为结果表中的额外列。理想情况下,基于玩具示例数据,我想要的结果如下:

    国家 非常不满意 不满意的 中立 满意的 非常满意 计数
    巴西 0.285714285714 0.142857142857 0.142857142857 0.142857142857 0.285714285714 7.
    加拿大 0.111111111111 0.111111111111 0.333333333333 0.222222222222 0.222222222222 9
    法国 0.250000000000 0.125000000000 0.250000000000 0.250000000000 0.125000000000 8.
    意大利 0.166666666666 0.166666666666 0.166666666666 0.166666666666 0.333333333333 6.
    美国 0.222222222222 0.111111111111 0.111111111111 0.333333333333 0.222222222222 9
    2 回复  |  直到 1 年前
        1
  •  2
  •   Charlieface    1 年前

    你把事情复杂化了,而不是逐一计算 SatisfactionLevel 作为一个单独的组,然后必须将其转回,只需使用条件聚合。

    -- Parameters
    DECLARE @Year INT = 2020; --, @Country varchar(50)= 'Brazil';
    
    WITH ModeData AS (
        SELECT country, 
               a.Mode
        FROM df
        CROSS APPLY (
            SELECT TOP 1 Mode, COUNT(*) AS cnt
            FROM (VALUES (val1), (val2), (val3)) AS t(Mode)
            GROUP BY Mode
            ORDER BY COUNT(*) DESC
        ) a
        where year = @year --and  country=@country 
    )
    
    SELECT
      country, 
      COUNT(CASE WHEN Mode = 1 THEN 1 END) * 1.0 / COUNT(*) AS [Very Dissatisfied], 
      COUNT(CASE WHEN Mode = 2 THEN 1 END) * 1.0 / COUNT(*) AS [Dissatisfied], 
      COUNT(CASE WHEN Mode = 3 THEN 1 END) * 1.0 / COUNT(*) AS [Neutral],
      COUNT(CASE WHEN Mode = 4 THEN 1 END) * 1.0 / COUNT(*) AS [Satisfied], 
      COUNT(CASE WHEN Mode = 5 THEN 1 END) * 1.0 / COUNT(*) AS [Very Satisfied],
      COUNT(*) AS Count
    FROM ModeData
    GROUP BY
      country
    ORDER BY
      country;
    

    db<>fiddle

        2
  •  1
  •   topsail    1 年前

    添加额外的CTE来解决国家计数,然后将其纳入最终结果。

    样品表

    -- Create the table
    CREATE TABLE df (
        country VARCHAR(50),
        year INT,
        val1 INT,
        val2 INT,
        val3 INT
    );
    
    -- Insert 10 rows of data
    INSERT INTO df (country, year, val1, val2, val3) VALUES
    ('USA', 2020, 4, 4, 5),
      ('USA', 2020, 4, 4, 5),
      ('USA', 2020, 5, 5, 5),
      ('USA', 2020, 5, 5, 5),
      ('USA', 2020, 1, 1, 5),
      ('USA', 2020, 3, 3, 5),
      ('USA', 2020, 4, 2, 5),
      ('USA', 2020, 1, 1, 5),
      ('USA', 2020, 2, 2, 5),
      ('Canada', 2020, 1, 1, 3),
      ('Canada', 2020, 2, 2, 3),
      ('Canada', 2020, 4, 4, 3),
      ('Canada', 2020, 3, 4, 3),
      ('Canada', 2020, 3, 4, 3),
      ('Canada', 2020, 3, 4, 3),
      ('Canada', 2020, 5, 4, 3),
      ('Canada', 2020, 5, 4, 5),
      ('Canada', 2020, 5, 4, 5),
    ('Germany', 2022, 5, 5, 4),
    ('France', 2020, 1,1, 2),
      ('France', 2020, 1,1, 2),
      ('France', 2020, 3, 2, 2),
      ('France', 2020, 3, 4, 2),
      ('France', 2020, 3, 5, 5),
      ('France', 2020, 3, 4, 4),
      ('France', 2020, 3, 4, 4),
      ('France', 2020, 3, 4, 3),
    ('UK', 2021, 4, 2, 3),
    ('Australia', 2022, 3, 3, 4),
    ('Italy', 2020, 5, 5, 5),
      ('Italy', 2020, 5, 5, 5),
      ('Italy', 2020, 5, 1,1),
       ('Italy', 2020, 4, 4,1),
       ('Italy', 2020, 2, 1,2),
      ('Italy', 2020, 3, 5, 3),
    ('Spain', 2021, 1, 2, 3),
    ('Mexico', 2022, 4, 4, 4),
        ('Brazil', 2020, 4, 1, 1),
      ('Brazil', 2020, 4, 1, 1),
      ('Brazil', 2020, 4, 3, 4),
      ('Brazil', 2020, 5, 3, 5),
        ('Brazil', 2020, 5, 3, 5),
      ('Brazil', 2020, 3, 3, 1),
    ('Brazil', 2020, 2, 3, 1);
    -- Add the mode column to the table
    select * from df;
    

    查询

    -- Parameters
    DECLARE @Year INT = 2020; --, @Country varchar(50)= 'Brazil';
    
    WITH ModeData AS (
        SELECT country, 
               a.Mode
        FROM df
        CROSS APPLY (
            SELECT TOP 1 Mode, COUNT(*) AS cnt
            FROM (VALUES (val1), (val2), (val3)) AS t(Mode)
            GROUP BY Mode
            ORDER BY COUNT(*) DESC
        ) a
      where year=@year --and  country=@country 
    )
    
    -- Calculate proportions and map modes to labels
    , Proportions AS (
        SELECT country, 
               CASE 
                   WHEN Mode = 1 THEN 'Very Dissatisfied'
                   WHEN Mode = 2 THEN 'Dissatisfied'
                   WHEN Mode = 3 THEN 'Neutral'
                   WHEN Mode = 4 THEN 'Satisfied'
                   WHEN Mode = 5 THEN 'Very Satisfied'
               END AS SatisfactionLevel,
               COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country) AS Proportion
        FROM ModeData
        GROUP BY country, Mode
    )
    
    , Pivoted AS (
    -- Pivot the results to get each satisfaction level as a column
    SELECT Country, 
           [Very Dissatisfied], 
           [Dissatisfied], 
           [Neutral], 
           [Satisfied], 
           [Very Satisfied]
    FROM Proportions
    PIVOT (
        MAX(Proportion)
        FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])
    ) AS p
    ),
    
    CountryCounts AS (
    -- Count of countries from original df table
    SELECT Country,
           COUNT(Country) AS Total
    FROM df
    GROUP BY Country)
    
    SELECT Pivoted.Country,
        [Very Dissatisfied],
        Dissatisfied,
        Neutral,
        Satisfied,
        [Very Satisfied],
        Total
    FROM Pivoted
    INNER JOIN CountryCounts
    ON Pivoted.Country = CountryCounts.Country
    ORDER BY Pivoted.Country;