TPC-DS (2012)
Similar to the Star Schema Benchmark (SSB), TPC-DS is based on TPC-H, but it took the opposite route, i.e. it expanded the number of joins needed by storing the data in a complex snowflake schema (24 instead of 8 tables). The data distribution is skewed (e.g. normal and Poisson distributions). It includes 99 reporting and ad-hoc queries with random substitutions.
References
- The Making of TPC-DS (Nambiar), 2006
Data Generation and Import
First, checkout the TPC-DS repository and compile the data generator:
git clone https://github.com/gregrahn/tpcds-kit.git
cd tpcds-kit/tools
make
Then, generate the data. Parameter -scale specifies the scale factor.
./dsdgen -scale 1
Now create tables in ClickHouse.
We stick as closely as possible to the rules of the TPC-DS specification. The abstract datatypes from the specification are mapped to ClickHouse's native datatypes as follows:
| TPC-DS Type | ClickHouse Type |
|---|---|
identifier | Int64 (or UInt32 for date/time dimension keys) |
integer | Int32 |
decimal(P,S) | Decimal(P,S) |
char(N) | LowCardinality(FixedString(N)) |
varchar(N) | LowCardinality(String) |
date | Date |
For nullability, columns marked with "N" (not null) in the specification have no Nullable wrapper as they never contain null values.
All other columns are wrapped in Nullable().
For string types, Nullable is placed inside LowCardinality, e.g. LowCardinality(Nullable(String)).
CREATE TABLE call_center(
cc_call_center_sk Int64,
cc_call_center_id LowCardinality(FixedString(16)),
cc_rec_start_date Nullable(Date),
cc_rec_end_date Nullable(Date),
cc_closed_date_sk Nullable(UInt32),
cc_open_date_sk Nullable(UInt32),
cc_name LowCardinality(Nullable(String)),
cc_class LowCardinality(Nullable(String)),
cc_employees Nullable(Int32),
cc_sq_ft Nullable(Int32),
cc_hours LowCardinality(Nullable(FixedString(20))),
cc_manager LowCardinality(Nullable(String)),
cc_mkt_id Nullable(Int32),
cc_mkt_class LowCardinality(Nullable(FixedString(50))),
cc_mkt_desc LowCardinality(Nullable(String)),
cc_market_manager LowCardinality(Nullable(String)),
cc_division Nullable(Int32),
cc_division_name LowCardinality(Nullable(String)),
cc_company Nullable(Int32),
cc_company_name LowCardinality(Nullable(FixedString(50))),
cc_street_number LowCardinality(Nullable(FixedString(10))),
cc_street_name LowCardinality(Nullable(String)),
cc_street_type LowCardinality(Nullable(FixedString(15))),
cc_suite_number LowCardinality(Nullable(FixedString(10))),
cc_city LowCardinality(Nullable(String)),
cc_county LowCardinality(Nullable(String)),
cc_state LowCardinality(Nullable(FixedString(2))),
cc_zip LowCardinality(Nullable(FixedString(10))),
cc_country LowCardinality(Nullable(String)),
cc_gmt_offset Nullable(Decimal(5,2)),
cc_tax_percentage Nullable(Decimal(5,2)),
PRIMARY KEY (cc_call_center_sk)
);
CREATE TABLE catalog_page(
cp_catalog_page_sk Int64,
cp_catalog_page_id LowCardinality(FixedString(16)),
cp_start_date_sk Nullable(UInt32),
cp_end_date_sk Nullable(UInt32),
cp_department LowCardinality(Nullable(String)),
cp_catalog_number Nullable(Int32),
cp_catalog_page_number Nullable(Int32),
cp_description LowCardinality(Nullable(String)),
cp_type LowCardinality(Nullable(String)),
PRIMARY KEY (cp_catalog_page_sk)
);
CREATE TABLE catalog_returns(
cr_returned_date_sk Nullable(UInt32),
cr_returned_time_sk Nullable(UInt32),
cr_item_sk Int64,
cr_refunded_customer_sk Nullable(Int64),
cr_refunded_cdemo_sk Nullable(Int64),
cr_refunded_hdemo_sk Nullable(Int64),
cr_refunded_addr_sk Nullable(Int64),
cr_returning_customer_sk Nullable(Int64),
cr_returning_cdemo_sk Nullable(Int64),
cr_returning_hdemo_sk Nullable(Int64),
cr_returning_addr_sk Nullable(Int64),
cr_call_center_sk Nullable(Int64),
cr_catalog_page_sk Nullable(Int64),
cr_ship_mode_sk Nullable(Int64),
cr_warehouse_sk Nullable(Int64),
cr_reason_sk Nullable(Int64),
cr_order_number Int64,
cr_return_quantity Nullable(Int32),
cr_return_amount Nullable(Decimal(7,2)),
cr_return_tax Nullable(Decimal(7,2)),
cr_return_amt_inc_tax Nullable(Decimal(7,2)),
cr_fee Nullable(Decimal(7,2)),
cr_return_ship_cost Nullable(Decimal(7,2)),
cr_refunded_cash Nullable(Decimal(7,2)),
cr_reversed_charge Nullable(Decimal(7,2)),
cr_store_credit Nullable(Decimal(7,2)),
cr_net_loss Nullable(Decimal(7,2)),
PRIMARY KEY (cr_item_sk, cr_order_number)
);
CREATE TABLE catalog_sales (
cs_sold_date_sk Nullable(UInt32),
cs_sold_time_sk Nullable(UInt32),
cs_ship_date_sk Nullable(UInt32),
cs_bill_customer_sk Nullable(Int64),
cs_bill_cdemo_sk Nullable(Int64),
cs_bill_hdemo_sk Nullable(Int64),
cs_bill_addr_sk Nullable(Int64),
cs_ship_customer_sk Nullable(Int64),
cs_ship_cdemo_sk Nullable(Int64),
cs_ship_hdemo_sk Nullable(Int64),
cs_ship_addr_sk Nullable(Int64),
cs_call_center_sk Nullable(Int64),
cs_catalog_page_sk Nullable(Int64),
cs_ship_mode_sk Nullable(Int64),
cs_warehouse_sk Nullable(Int64),
cs_item_sk Int64,
cs_promo_sk Nullable(Int64),
cs_order_number Int64,
cs_quantity Nullable(Int32),
cs_wholesale_cost Nullable(Decimal(7,2)),
cs_list_price Nullable(Decimal(7,2)),
cs_sales_price Nullable(Decimal(7,2)),
cs_ext_discount_amt Nullable(Decimal(7,2)),
cs_ext_sales_price Nullable(Decimal(7,2)),
cs_ext_wholesale_cost Nullable(Decimal(7,2)),
cs_ext_list_price Nullable(Decimal(7,2)),
cs_ext_tax Nullable(Decimal(7,2)),
cs_coupon_amt Nullable(Decimal(7,2)),
cs_ext_ship_cost Nullable(Decimal(7,2)),
cs_net_paid Nullable(Decimal(7,2)),
cs_net_paid_inc_tax Nullable(Decimal(7,2)),
cs_net_paid_inc_ship Nullable(Decimal(7,2)),
cs_net_paid_inc_ship_tax Nullable(Decimal(7,2)),
cs_net_profit Nullable(Decimal(7,2)),
PRIMARY KEY (cs_item_sk, cs_order_number)
);
CREATE TABLE customer_address (
ca_address_sk Int64,
ca_address_id LowCardinality(FixedString(16)),
ca_street_number LowCardinality(Nullable(FixedString(10))),
ca_street_name LowCardinality(Nullable(String)),
ca_street_type LowCardinality(Nullable(FixedString(15))),
ca_suite_number LowCardinality(Nullable(FixedString(10))),
ca_city LowCardinality(Nullable(String)),
ca_county LowCardinality(Nullable(String)),
ca_state LowCardinality(Nullable(FixedString(2))),
ca_zip LowCardinality(Nullable(FixedString(10))),
ca_country LowCardinality(Nullable(String)),
ca_gmt_offset Nullable(Decimal(5,2)),
ca_location_type LowCardinality(Nullable(FixedString(20))),
PRIMARY KEY (ca_address_sk)
);
CREATE TABLE customer_demographics (
cd_demo_sk Int64,
cd_gender LowCardinality(Nullable(FixedString(1))),
cd_marital_status LowCardinality(Nullable(FixedString(1))),
cd_education_status LowCardinality(Nullable(FixedString(20))),
cd_purchase_estimate Nullable(Int32),
cd_credit_rating LowCardinality(Nullable(FixedString(10))),
cd_dep_count Nullable(Int32),
cd_dep_employed_count Nullable(Int32),
cd_dep_college_count Nullable(Int32),
PRIMARY KEY (cd_demo_sk)
);
CREATE TABLE customer (
c_customer_sk Int64,
c_customer_id LowCardinality(FixedString(16)),
c_current_cdemo_sk Nullable(Int64),
c_current_hdemo_sk Nullable(Int64),
c_current_addr_sk Nullable(Int64),
c_first_shipto_date_sk Nullable(UInt32),
c_first_sales_date_sk Nullable(UInt32),
c_salutation LowCardinality(Nullable(FixedString(10))),
c_first_name LowCardinality(Nullable(FixedString(20))),
c_last_name LowCardinality(Nullable(FixedString(30))),
c_preferred_cust_flag LowCardinality(Nullable(FixedString(1))),
c_birth_day Nullable(Int32),
c_birth_month Nullable(Int32),
c_birth_year Nullable(Int32),
c_birth_country LowCardinality(Nullable(String)),
c_login LowCardinality(Nullable(FixedString(13))),
c_email_address LowCardinality(Nullable(FixedString(50))),
c_last_review_date_sk Nullable(UInt32),
PRIMARY KEY (c_customer_sk)
);
CREATE TABLE date_dim (
d_date_sk UInt32,
d_date_id LowCardinality(FixedString(16)),
d_date Date,
d_month_seq Nullable(Int32),
d_week_seq Nullable(Int32),
d_quarter_seq Nullable(Int32),
d_year Nullable(Int32),
d_dow Nullable(Int32),
d_moy Nullable(Int32),
d_dom Nullable(Int32),
d_qoy Nullable(Int32),
d_fy_year Nullable(Int32),
d_fy_quarter_seq Nullable(Int32),
d_fy_week_seq Nullable(Int32),
d_day_name LowCardinality(Nullable(FixedString(9))),
d_quarter_name LowCardinality(Nullable(FixedString(6))),
d_holiday LowCardinality(Nullable(FixedString(1))),
d_weekend LowCardinality(Nullable(FixedString(1))),
d_following_holiday LowCardinality(Nullable(FixedString(1))),
d_first_dom Nullable(Int32),
d_last_dom Nullable(Int32),
d_same_day_ly Nullable(Int32),
d_same_day_lq Nullable(Int32),
d_current_day LowCardinality(Nullable(FixedString(1))),
d_current_week LowCardinality(Nullable(FixedString(1))),
d_current_month LowCardinality(Nullable(FixedString(1))),
d_current_quarter LowCardinality(Nullable(FixedString(1))),
d_current_year LowCardinality(Nullable(FixedString(1))),
PRIMARY KEY (d_date_sk)
);
CREATE TABLE household_demographics (
hd_demo_sk Int64,
hd_income_band_sk Nullable(Int64),
hd_buy_potential LowCardinality(Nullable(FixedString(15))),
hd_dep_count Nullable(Int32),
hd_vehicle_count Nullable(Int32),
PRIMARY KEY (hd_demo_sk)
);
CREATE TABLE income_band(
ib_income_band_sk Int64,
ib_lower_bound Nullable(Int32),
ib_upper_bound Nullable(Int32),
PRIMARY KEY (ib_income_band_sk),
);
CREATE TABLE inventory (
inv_date_sk UInt32,
inv_item_sk Int64,
inv_warehouse_sk Int64,
inv_quantity_on_hand Nullable(Int32),
PRIMARY KEY (inv_date_sk, inv_item_sk, inv_warehouse_sk),
);
CREATE TABLE item (
i_item_sk Int64,
i_item_id LowCardinality(FixedString(16)),
i_rec_start_date Nullable(Date),
i_rec_end_date Nullable(Date),
i_item_desc LowCardinality(Nullable(String)),
i_current_price Nullable(Decimal(7,2)),
i_wholesale_cost Nullable(Decimal(7,2)),
i_brand_id Nullable(Int32),
i_brand LowCardinality(Nullable(FixedString(50))),
i_class_id Nullable(Int32),
i_class LowCardinality(Nullable(FixedString(50))),
i_category_id Nullable(Int32),
i_category LowCardinality(Nullable(FixedString(50))),
i_manufact_id Nullable(Int32),
i_manufact LowCardinality(Nullable(FixedString(50))),
i_size LowCardinality(Nullable(FixedString(20))),
i_formulation LowCardinality(Nullable(FixedString(20))),
i_color LowCardinality(Nullable(FixedString(20))),
i_units LowCardinality(Nullable(FixedString(10))),
i_container LowCardinality(Nullable(FixedString(10))),
i_manager_id Nullable(Int32),
i_product_name LowCardinality(Nullable(FixedString(50))),
PRIMARY KEY (i_item_sk)
);
CREATE TABLE promotion (
p_promo_sk Int64,
p_promo_id LowCardinality(FixedString(16)),
p_start_date_sk Nullable(UInt32),
p_end_date_sk Nullable(UInt32),
p_item_sk Nullable(Int64),
p_cost Nullable(Decimal(15,2)),
p_response_target Nullable(Int32),
p_promo_name LowCardinality(Nullable(FixedString(50))),
p_channel_dmail LowCardinality(Nullable(FixedString(1))),
p_channel_email LowCardinality(Nullable(FixedString(1))),
p_channel_catalog LowCardinality(Nullable(FixedString(1))),
p_channel_tv LowCardinality(Nullable(FixedString(1))),
p_channel_radio LowCardinality(Nullable(FixedString(1))),
p_channel_press LowCardinality(Nullable(FixedString(1))),
p_channel_event LowCardinality(Nullable(FixedString(1))),
p_channel_demo LowCardinality(Nullable(FixedString(1))),
p_channel_details LowCardinality(Nullable(String)),
p_purpose LowCardinality(Nullable(FixedString(15))),
p_discount_active LowCardinality(Nullable(FixedString(1))),
PRIMARY KEY (p_promo_sk)
);
CREATE TABLE reason(
r_reason_sk Int64,
r_reason_id LowCardinality(FixedString(16)),
r_reason_desc LowCardinality(Nullable(FixedString(100))),
PRIMARY KEY (r_reason_sk)
);
CREATE TABLE ship_mode(
sm_ship_mode_sk Int64,
sm_ship_mode_id LowCardinality(FixedString(16)),
sm_type LowCardinality(Nullable(FixedString(30))),
sm_code LowCardinality(Nullable(FixedString(10))),
sm_carrier LowCardinality(Nullable(FixedString(20))),
sm_contract LowCardinality(Nullable(FixedString(20))),
PRIMARY KEY (sm_ship_mode_sk)
);
CREATE TABLE store_returns (
sr_returned_date_sk Nullable(UInt32),
sr_return_time_sk Nullable(UInt32),
sr_item_sk Int64,
sr_customer_sk Nullable(Int64),
sr_cdemo_sk Nullable(Int64),
sr_hdemo_sk Nullable(Int64),
sr_addr_sk Nullable(Int64),
sr_store_sk Nullable(Int64),
sr_reason_sk Nullable(Int64),
sr_ticket_number Int64,
sr_return_quantity Nullable(Int32),
sr_return_amt Nullable(Decimal(7,2)),
sr_return_tax Nullable(Decimal(7,2)),
sr_return_amt_inc_tax Nullable(Decimal(7,2)),
sr_fee Nullable(Decimal(7,2)),
sr_return_ship_cost Nullable(Decimal(7,2)),
sr_refunded_cash Nullable(Decimal(7,2)),
sr_reversed_charge Nullable(Decimal(7,2)),
sr_store_credit Nullable(Decimal(7,2)),
sr_net_loss Nullable(Decimal(7,2)),
PRIMARY KEY (sr_item_sk, sr_ticket_number)
);
CREATE TABLE store_sales (
ss_sold_date_sk Nullable(UInt32),
ss_sold_time_sk Nullable(UInt32),
ss_item_sk Int64,
ss_customer_sk Nullable(Int64),
ss_cdemo_sk Nullable(Int64),
ss_hdemo_sk Nullable(Int64),
ss_addr_sk Nullable(Int64),
ss_store_sk Nullable(Int64),
ss_promo_sk Nullable(Int64),
ss_ticket_number Int64,
ss_quantity Nullable(Int32),
ss_wholesale_cost Nullable(Decimal(7,2)),
ss_list_price Nullable(Decimal(7,2)),
ss_sales_price Nullable(Decimal(7,2)),
ss_ext_discount_amt Nullable(Decimal(7,2)),
ss_ext_sales_price Nullable(Decimal(7,2)),
ss_ext_wholesale_cost Nullable(Decimal(7,2)),
ss_ext_list_price Nullable(Decimal(7,2)),
ss_ext_tax Nullable(Decimal(7,2)),
ss_coupon_amt Nullable(Decimal(7,2)),
ss_net_paid Nullable(Decimal(7,2)),
ss_net_paid_inc_tax Nullable(Decimal(7,2)),
ss_net_profit Nullable(Decimal(7,2)),
PRIMARY KEY (ss_item_sk, ss_ticket_number)
);
CREATE TABLE store (
s_store_sk Int64,
s_store_id LowCardinality(FixedString(16)),
s_rec_start_date Nullable(Date),
s_rec_end_date Nullable(Date),
s_closed_date_sk Nullable(UInt32),
s_store_name LowCardinality(Nullable(String)),
s_number_employees Nullable(Int32),
s_floor_space Nullable(Int32),
s_hours LowCardinality(Nullable(FixedString(20))),
s_manager LowCardinality(Nullable(String)),
s_market_id Nullable(Int32),
s_geography_class LowCardinality(Nullable(String)),
s_market_desc LowCardinality(Nullable(String)),
s_market_manager LowCardinality(Nullable(String)),
s_division_id Nullable(Int32),
s_division_name LowCardinality(Nullable(String)),
s_company_id Nullable(Int32),
s_company_name LowCardinality(Nullable(String)),
s_street_number LowCardinality(Nullable(String)),
s_street_name LowCardinality(Nullable(String)),
s_street_type LowCardinality(Nullable(FixedString(15))),
s_suite_number LowCardinality(Nullable(FixedString(10))),
s_city LowCardinality(Nullable(String)),
s_county LowCardinality(Nullable(String)),
s_state LowCardinality(Nullable(FixedString(2))),
s_zip LowCardinality(Nullable(FixedString(10))),
s_country LowCardinality(Nullable(String)),
s_gmt_offset Nullable(Decimal(5,2)),
s_tax_percentage Nullable(Decimal(5,2)),
PRIMARY KEY (s_store_sk)
);
CREATE TABLE time_dim (
t_time_sk UInt32,
t_time_id LowCardinality(FixedString(16)),
t_time Int32,
t_hour Nullable(Int32),
t_minute Nullable(Int32),
t_second Nullable(Int32),
t_am_pm LowCardinality(Nullable(FixedString(2))),
t_shift LowCardinality(Nullable(FixedString(20))),
t_sub_shift LowCardinality(Nullable(FixedString(20))),
t_meal_time LowCardinality(Nullable(FixedString(20))),
PRIMARY KEY (t_time_sk)
);
CREATE TABLE warehouse(
w_warehouse_sk Int64,
w_warehouse_id LowCardinality(FixedString(16)),
w_warehouse_name LowCardinality(Nullable(String)),
w_warehouse_sq_ft Nullable(Int32),
w_street_number LowCardinality(Nullable(FixedString(10))),
w_street_name LowCardinality(Nullable(String)),
w_street_type LowCardinality(Nullable(FixedString(15))),
w_suite_number LowCardinality(Nullable(FixedString(10))),
w_city LowCardinality(Nullable(String)),
w_county LowCardinality(Nullable(String)),
w_state LowCardinality(Nullable(FixedString(2))),
w_zip LowCardinality(Nullable(FixedString(10))),
w_country LowCardinality(Nullable(String)),
w_gmt_offset Nullable(Decimal(5,2)),
PRIMARY KEY (w_warehouse_sk)
);
CREATE TABLE web_page(
wp_web_page_sk Int64,
wp_web_page_id LowCardinality(FixedString(16)),
wp_rec_start_date Nullable(Date),
wp_rec_end_date Nullable(Date),
wp_creation_date_sk Nullable(UInt32),
wp_access_date_sk Nullable(UInt32),
wp_autogen_flag LowCardinality(Nullable(FixedString(1))),
wp_customer_sk Nullable(Int64),
wp_url LowCardinality(Nullable(String)),
wp_type LowCardinality(Nullable(FixedString(50))),
wp_char_count Nullable(Int32),
wp_link_count Nullable(Int32),
wp_image_count Nullable(Int32),
wp_max_ad_count Nullable(Int32),
PRIMARY KEY (wp_web_page_sk)
);
CREATE TABLE web_returns (
wr_returned_date_sk Nullable(UInt32),
wr_returned_time_sk Nullable(UInt32),
wr_item_sk Int64,
wr_refunded_customer_sk Nullable(Int64),
wr_refunded_cdemo_sk Nullable(Int64),
wr_refunded_hdemo_sk Nullable(Int64),
wr_refunded_addr_sk Nullable(Int64),
wr_returning_customer_sk Nullable(Int64),
wr_returning_cdemo_sk Nullable(Int64),
wr_returning_hdemo_sk Nullable(Int64),
wr_returning_addr_sk Nullable(Int64),
wr_web_page_sk Nullable(Int64),
wr_reason_sk Nullable(Int64),
wr_order_number Int64,
wr_return_quantity Nullable(Int32),
wr_return_amt Nullable(Decimal(7,2)),
wr_return_tax Nullable(Decimal(7,2)),
wr_return_amt_inc_tax Nullable(Decimal(7,2)),
wr_fee Nullable(Decimal(7,2)),
wr_return_ship_cost Nullable(Decimal(7,2)),
wr_refunded_cash Nullable(Decimal(7,2)),
wr_reversed_charge Nullable(Decimal(7,2)),
wr_account_credit Nullable(Decimal(7,2)),
wr_net_loss Nullable(Decimal(7,2)),
PRIMARY KEY (wr_item_sk, wr_order_number)
);
CREATE TABLE web_sales (
ws_sold_date_sk Nullable(UInt32),
ws_sold_time_sk Nullable(UInt32),
ws_ship_date_sk Nullable(UInt32),
ws_item_sk Int64,
ws_bill_customer_sk Nullable(Int64),
ws_bill_cdemo_sk Nullable(Int64),
ws_bill_hdemo_sk Nullable(Int64),
ws_bill_addr_sk Nullable(Int64),
ws_ship_customer_sk Nullable(Int64),
ws_ship_cdemo_sk Nullable(Int64),
ws_ship_hdemo_sk Nullable(Int64),
ws_ship_addr_sk Nullable(Int64),
ws_web_page_sk Nullable(Int64),
ws_web_site_sk Nullable(Int64),
ws_ship_mode_sk Nullable(Int64),
ws_warehouse_sk Nullable(Int64),
ws_promo_sk Nullable(Int64),
ws_order_number Int64,
ws_quantity Nullable(Int32),
ws_wholesale_cost Nullable(Decimal(7,2)),
ws_list_price Nullable(Decimal(7,2)),
ws_sales_price Nullable(Decimal(7,2)),
ws_ext_discount_amt Nullable(Decimal(7,2)),
ws_ext_sales_price Nullable(Decimal(7,2)),
ws_ext_wholesale_cost Nullable(Decimal(7,2)),
ws_ext_list_price Nullable(Decimal(7,2)),
ws_ext_tax Nullable(Decimal(7,2)),
ws_coupon_amt Nullable(Decimal(7,2)),
ws_ext_ship_cost Nullable(Decimal(7,2)),
ws_net_paid Nullable(Decimal(7,2)),
ws_net_paid_inc_tax Nullable(Decimal(7,2)),
ws_net_paid_inc_ship Nullable(Decimal(7,2)),
ws_net_paid_inc_ship_tax Nullable(Decimal(7,2)),
ws_net_profit Nullable(Decimal(7,2)),
PRIMARY KEY (ws_item_sk, ws_order_number)
);
CREATE TABLE web_site (
web_site_sk Int64,
web_site_id LowCardinality(FixedString(16)),
web_rec_start_date Nullable(Date),
web_rec_end_date Nullable(Date),
web_name LowCardinality(Nullable(String)),
web_open_date_sk Nullable(UInt32),
web_close_date_sk Nullable(UInt32),
web_class LowCardinality(Nullable(String)),
web_manager LowCardinality(Nullable(String)),
web_mkt_id Nullable(Int32),
web_mkt_class LowCardinality(Nullable(String)),
web_mkt_desc LowCardinality(Nullable(String)),
web_market_manager LowCardinality(Nullable(String)),
web_company_id Nullable(Int32),
web_company_name LowCardinality(Nullable(FixedString(50))),
web_street_number LowCardinality(Nullable(FixedString(10))),
web_street_name LowCardinality(Nullable(String)),
web_street_type LowCardinality(Nullable(FixedString(15))),
web_suite_number LowCardinality(Nullable(FixedString(10))),
web_city LowCardinality(Nullable(String)),
web_county LowCardinality(Nullable(String)),
web_state LowCardinality(Nullable(FixedString(2))),
web_zip LowCardinality(Nullable(FixedString(10))),
web_country LowCardinality(Nullable(String)),
web_gmt_offset Nullable(Decimal(5,2)),
web_tax_percentage Nullable(Decimal(5,2)),
PRIMARY KEY (web_site_sk)
);
The data can be imported as follows:
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO call_center FORMAT CSV" < call_center.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO catalog_page FORMAT CSV" < catalog_page.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO catalog_returns FORMAT CSV" < catalog_returns.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO catalog_sales FORMAT CSV" < catalog_sales.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO customer FORMAT CSV" < customer.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO customer_address FORMAT CSV" < customer_address.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO customer_demographics FORMAT CSV" < customer_demographics.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO date_dim FORMAT CSV" < date_dim.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO household_demographics FORMAT CSV" < household_demographics.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO income_band FORMAT CSV" < income_band.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO inventory FORMAT CSV" < inventory.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO item FORMAT CSV" < item.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO promotion FORMAT CSV" < promotion.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO reason FORMAT CSV" < reason.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO ship_mode FORMAT CSV" < ship_mode.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO store FORMAT CSV" < store.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO store_returns FORMAT CSV" < store_returns.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO store_sales FORMAT CSV" < store_sales.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO time_dim FORMAT CSV" < time_dim.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO warehouse FORMAT CSV" < warehouse.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO web_page FORMAT CSV" < web_page.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO web_returns FORMAT CSV" < web_returns.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO web_sales FORMAT CSV" < web_sales.dat
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO web_site FORMAT CSV" < web_site.dat
Then run the generated queries.
Queries
The following settings should be enabled to produce correct results according to SQL standard:
SET group_by_use_nulls = 1;
SET intersect_default_mode = 'DISTINCT';
SET joined_subquery_requires_alias = 0;
SET union_default_mode = 'DISTINCT';
SET join_use_nulls = 1;
Correctness
The result of the queries agrees with the official results unless mentioned otherwise. There may be minor precision differences, which are permitted by the TPC-DS specification.
Q1
WITH customer_total_return AS
(
SELECT
sr_customer_sk AS ctr_customer_sk,
sr_store_sk AS ctr_store_sk,
sum(sr_return_amt) AS ctr_total_return
FROM store_returns, date_dim
WHERE (sr_returned_date_sk = d_date_sk) AND (d_year = 2000)
GROUP BY
sr_customer_sk,
sr_store_sk
)
SELECT c_customer_id
FROM customer_total_return AS ctr1, store, customer
WHERE (ctr1.ctr_total_return > (
SELECT avg(ctr_total_return) * 1.2
FROM customer_total_return AS ctr2
WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk
)) AND (s_store_sk = ctr1.ctr_store_sk) AND (s_state = 'TN') AND (ctr1.ctr_customer_sk = c_customer_sk)
ORDER BY c_customer_id
LIMIT 100;
Q2
WITH
wscs AS
(
SELECT
sold_date_sk,
sales_price
FROM
(
SELECT
ws_sold_date_sk AS sold_date_sk,
ws_ext_sales_price AS sales_price
FROM web_sales
UNION ALL
SELECT
cs_sold_date_sk AS sold_date_sk,
cs_ext_sales_price AS sales_price
FROM catalog_sales
)
),
wswscs AS
(
SELECT
d_week_seq,
sum(multiIf(d_day_name = 'Sunday', sales_price, NULL)) AS sun_sales,
sum(multiIf(d_day_name = 'Monday', sales_price, NULL)) AS mon_sales,
sum(multiIf(d_day_name = 'Tuesday', sales_price, NULL)) AS tue_sales,
sum(multiIf(d_day_name = 'Wednesday', sales_price, NULL)) AS wed_sales,
sum(multiIf(d_day_name = 'Thursday', sales_price, NULL)) AS thu_sales,
sum(multiIf(d_day_name = 'Friday', sales_price, NULL)) AS fri_sales,
sum(multiIf(d_day_name = 'Saturday', sales_price, NULL)) AS sat_sales
FROM wscs, date_dim
WHERE d_date_sk = sold_date_sk
GROUP BY d_week_seq
)
SELECT
d_week_seq1,
round(sun_sales1 / sun_sales2, 2),
round(mon_sales1 / mon_sales2, 2),
round(tue_sales1 / tue_sales2, 2),
round(wed_sales1 / wed_sales2, 2),
round(thu_sales1 / thu_sales2, 2),
round(fri_sales1 / fri_sales2, 2),
round(sat_sales1 / sat_sales2, 2)
FROM
(
SELECT
wswscs.d_week_seq AS d_week_seq1,
sun_sales AS sun_sales1,
mon_sales AS mon_sales1,
tue_sales AS tue_sales1,
wed_sales AS wed_sales1,
thu_sales AS thu_sales1,
fri_sales AS fri_sales1,
sat_sales AS sat_sales1
FROM wswscs, date_dim
WHERE (date_dim.d_week_seq = wswscs.d_week_seq) AND (d_year = 2001)
) AS y,
(
SELECT
wswscs.d_week_seq AS d_week_seq2,
sun_sales AS sun_sales2,
mon_sales AS mon_sales2,
tue_sales AS tue_sales2,
wed_sales AS wed_sales2,
thu_sales AS thu_sales2,
fri_sales AS fri_sales2,
sat_sales AS sat_sales2
FROM wswscs, date_dim
WHERE (date_dim.d_week_seq = wswscs.d_week_seq) AND (d_year = (2001 + 1))
) AS z
WHERE d_week_seq1 = (d_week_seq2 - 53)
ORDER BY d_week_seq1;
Q3
SELECT
dt.d_year,
item.i_brand_id AS brand_id,
item.i_brand AS brand,
sum(ss_ext_sales_price) AS sum_agg
FROM date_dim AS dt, store_sales, item
WHERE (dt.d_date_sk = store_sales.ss_sold_date_sk) AND (store_sales.ss_item_sk = item.i_item_sk) AND (item.i_manufact_id = 128) AND (dt.d_moy = 11)
GROUP BY
dt.d_year,
item.i_brand,
item.i_brand_id
ORDER BY
dt.d_year,
sum_agg DESC,
brand_id
LIMIT 100;
Q4
WITH year_total AS
(
SELECT
c_customer_id AS customer_id,
c_first_name AS customer_first_name,
c_last_name AS customer_last_name,
c_preferred_cust_flag AS customer_preferred_cust_flag,
c_birth_country AS customer_birth_country,
c_login AS customer_login,
c_email_address AS customer_email_address,
d_year AS dyear,
sum((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2) AS year_total,
's' AS sale_type
FROM customer, store_sales, date_dim
WHERE (c_customer_sk = ss_customer_sk) AND (ss_sold_date_sk = d_date_sk)
GROUP BY
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address,
d_year
UNION ALL
SELECT
c_customer_id AS customer_id,
c_first_name AS customer_first_name,
c_last_name AS customer_last_name,
c_preferred_cust_flag AS customer_preferred_cust_flag,
c_birth_country AS customer_birth_country,
c_login AS customer_login,
c_email_address AS customer_email_address,
d_year AS dyear,
sum((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2) AS year_total,
'c' AS sale_type
FROM customer, catalog_sales, date_dim
WHERE (c_customer_sk = cs_bill_customer_sk) AND (cs_sold_date_sk = d_date_sk)
GROUP BY
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address,
d_year
UNION ALL
SELECT
c_customer_id AS customer_id,
c_first_name AS customer_first_name,
c_last_name AS customer_last_name,
c_preferred_cust_flag AS customer_preferred_cust_flag,
c_birth_country AS customer_birth_country,
c_login AS customer_login,
c_email_address AS customer_email_address,
d_year AS dyear,
sum((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2) AS year_total,
'w' AS sale_type
FROM customer, web_sales, date_dim
WHERE (c_customer_sk = ws_bill_customer_sk) AND (ws_sold_date_sk = d_date_sk)
GROUP BY
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address,
d_year
)
SELECT
t_s_secyear.customer_id,
t_s_secyear.customer_first_name,
t_s_secyear.customer_last_name,
t_s_secyear.customer_preferred_cust_flag
FROM year_total AS t_s_firstyear, year_total AS t_s_secyear, year_total AS t_c_firstyear, year_total AS t_c_secyear, year_total AS t_w_firstyear, year_total AS t_w_secyear
WHERE (t_s_secyear.customer_id = t_s_firstyear.customer_id) AND (t_s_firstyear.customer_id = t_c_secyear.customer_id) AND (t_s_firstyear.customer_id = t_c_firstyear.customer_id) AND (t_s_firstyear.customer_id = t_w_firstyear.customer_id) AND (t_s_firstyear.customer_id = t_w_secyear.customer_id) AND (t_s_firstyear.sale_type = 's') AND (t_c_firstyear.sale_type = 'c') AND (t_w_firstyear.sale_type = 'w') AND (t_s_secyear.sale_type = 's') AND (t_c_secyear.sale_type = 'c') AND (t_w_secyear.sale_type = 'w') AND (t_s_firstyear.dyear = 2001) AND (t_s_secyear.dyear = (2001 + 1)) AND (t_c_firstyear.dyear = 2001) AND (t_c_secyear.dyear = (2001 + 1)) AND (t_w_firstyear.dyear = 2001) AND (t_w_secyear.dyear = (2001 + 1)) AND (t_s_firstyear.year_total > 0) AND (t_c_firstyear.year_total > 0) AND (t_w_firstyear.year_total > 0) AND (multiIf(t_c_firstyear.year_total > 0, t_c_secyear.year_total / t_c_firstyear.year_total, NULL) > multiIf(t_s_firstyear.year_total > 0, t_s_secyear.year_total / t_s_firstyear.year_total, NULL)) AND (multiIf(t_c_firstyear.year_total > 0, t_c_secyear.year_total / t_c_firstyear.year_total, NULL) > multiIf(t_w_firstyear.year_total > 0, t_w_secyear.year_total / t_w_firstyear.year_total, NULL))
ORDER BY
t_s_secyear.customer_id,
t_s_secyear.customer_first_name,
t_s_secyear.customer_last_name,
t_s_secyear.customer_preferred_cust_flag
LIMIT 100;
Q5
WITH
ssr AS
(
SELECT
s_store_id,
sum(sales_price) AS sales,
sum(profit) AS profit,
sum(return_amt) AS returns,
sum(net_loss) AS profit_loss
FROM
(
SELECT
ss_store_sk AS store_sk,
ss_sold_date_sk AS date_sk,
ss_ext_sales_price AS sales_price,
ss_net_profit AS profit,
CAST(0, 'decimal(7, 2)') AS return_amt,
CAST(0, 'decimal(7, 2)') AS net_loss
FROM store_sales
UNION ALL
SELECT
sr_store_sk AS store_sk,
sr_returned_date_sk AS date_sk,
CAST(0, 'decimal(7, 2)') AS sales_price,
CAST(0, 'decimal(7, 2)') AS profit,
sr_return_amt AS return_amt,
sr_net_loss AS net_loss
FROM store_returns
) AS salesreturns, date_dim, store
WHERE (date_sk = d_date_sk) AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 14 day))) AND (store_sk = s_store_sk)
GROUP BY s_store_id
),
csr AS
(
SELECT
cp_catalog_page_id,
sum(sales_price) AS sales,
sum(profit) AS profit,
sum(return_amt) AS returns,
sum(net_loss) AS profit_loss
FROM
(
SELECT
cs_catalog_page_sk AS page_sk,
cs_sold_date_sk AS date_sk,
cs_ext_sales_price AS sales_price,
cs_net_profit AS profit,
CAST(0, 'decimal(7, 2)') AS return_amt,
CAST(0, 'decimal(7, 2)') AS net_loss
FROM catalog_sales
UNION ALL
SELECT
cr_catalog_page_sk AS page_sk,
cr_returned_date_sk AS date_sk,
CAST(0, 'decimal(7, 2)') AS sales_price,
CAST(0, 'decimal(7, 2)') AS profit,
cr_return_amount AS return_amt,
cr_net_loss AS net_loss
FROM catalog_returns
) AS salesreturns, date_dim, catalog_page
WHERE (date_sk = d_date_sk) AND (d_date BETWEEN CAST('2000-08-23', 'date') AND CAST('2000-08-23', 'date') + INTERVAL 14 day) AND (page_sk = cp_catalog_page_sk)
GROUP BY cp_catalog_page_id
),
wsr AS
(
SELECT
web_site_id,
sum(sales_price) AS sales,
sum(profit) AS profit,
sum(return_amt) AS returns,
sum(net_loss) AS profit_loss
FROM
(
SELECT
ws_web_site_sk AS wsr_web_site_sk,
ws_sold_date_sk AS date_sk,
ws_ext_sales_price AS sales_price,
ws_net_profit AS profit,
CAST(0, 'decimal(7, 2)') AS return_amt,
CAST(0, 'decimal(7, 2)') AS net_loss
FROM web_sales
UNION ALL
SELECT
ws_web_site_sk AS wsr_web_site_sk,
wr_returned_date_sk AS date_sk,
CAST(0, 'decimal(7, 2)') AS sales_price,
CAST(0, 'decimal(7, 2)') AS profit,
wr_return_amt AS return_amt,
wr_net_loss AS net_loss
FROM web_returns
LEFT JOIN web_sales ON (wr_item_sk = ws_item_sk) AND (wr_order_number = ws_order_number)
) AS salesreturns, date_dim, web_site
WHERE (date_sk = d_date_sk) AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 14 day))) AND (wsr_web_site_sk = web_site_sk)
GROUP BY web_site_id
)
SELECT
channel,
id,
sum(sales) AS sales,
sum(returns) AS returns,
sum(profit) AS profit
FROM
(
SELECT
'store channel' AS channel,
concat('store', s_store_id) AS id,
sales,
returns,
profit - profit_loss AS profit
FROM ssr
UNION ALL
SELECT
'catalog channel' AS channel,
concat('catalog_page', cp_catalog_page_id) AS id,
sales,
returns,
profit - profit_loss AS profit
FROM csr
UNION ALL
SELECT
'web channel' AS channel,
concat('web_site', web_site_id) AS id,
sales,
returns,
profit - profit_loss AS profit
FROM wsr
) AS x
GROUP BY
channel,
id
WITH ROLLUP
ORDER BY
channel,
id
LIMIT 100;
The query does not work out-of-the-box due to https://github.com/ClickHouse/ClickHouse/issues/95299. This alternative formulation with a minor fix works:
WITH
ssr AS
(
SELECT
s_store_id,
sum(sales_price) AS sales,
sum(profit) AS profit,
sum(return_amt) AS returns,
sum(net_loss) AS profit_loss
FROM
(
SELECT
ss_store_sk AS store_sk,
ss_sold_date_sk AS date_sk,
ss_ext_sales_price AS sales_price,
ss_net_profit AS profit,
CAST(0, 'decimal(7, 2)') AS return_amt,
CAST(0, 'decimal(7, 2)') AS net_loss
FROM store_sales
UNION ALL
SELECT
sr_store_sk AS store_sk,
sr_returned_date_sk AS date_sk,
CAST(0, 'decimal(7, 2)') AS sales_price,
CAST(0, 'decimal(7, 2)') AS profit,
sr_return_amt AS return_amt,
sr_net_loss AS net_loss
FROM store_returns
) AS salesreturns, date_dim, store
WHERE (date_sk = d_date_sk) AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 14 day))) AND (store_sk = s_store_sk)
GROUP BY s_store_id
),
csr AS
(
SELECT
cp_catalog_page_id,
sum(sales_price) AS sales,
sum(profit) AS profit,
sum(return_amt) AS returns,
sum(net_loss) AS profit_loss
FROM
(
SELECT
cs_catalog_page_sk AS page_sk,
cs_sold_date_sk AS date_sk,
cs_ext_sales_price AS sales_price,
cs_net_profit AS profit,
CAST(0, 'decimal(7, 2)') AS return_amt,
CAST(0, 'decimal(7, 2)') AS net_loss
FROM catalog_sales
UNION ALL
SELECT
cr_catalog_page_sk AS page_sk,
cr_returned_date_sk AS date_sk,
CAST(0, 'decimal(7, 2)') AS sales_price,
CAST(0, 'decimal(7, 2)') AS profit,
cr_return_amount AS return_amt,
cr_net_loss AS net_loss
FROM catalog_returns
) AS salesreturns, date_dim, catalog_page
WHERE (date_sk = d_date_sk) AND (d_date BETWEEN CAST('2000-08-23', 'date') AND CAST('2000-08-23', 'date') + INTERVAL 14 day) AND (page_sk = cp_catalog_page_sk)
GROUP BY cp_catalog_page_id
),
wsr AS
(
SELECT
web_site_id,
sum(sales_price) AS sales,
sum(profit) AS profit,
sum(return_amt) AS returns,
sum(net_loss) AS profit_loss
FROM
(
SELECT
ws_web_site_sk AS wsr_web_site_sk,
ws_sold_date_sk AS date_sk,
ws_ext_sales_price AS sales_price,
ws_net_profit AS profit,
CAST(0, 'decimal(7, 2)') AS return_amt,
CAST(0, 'decimal(7, 2)') AS net_loss
FROM web_sales
UNION ALL
SELECT
ws_web_site_sk AS wsr_web_site_sk,
wr_returned_date_sk AS date_sk,
CAST(0, 'decimal(7, 2)') AS sales_price,
CAST(0, 'decimal(7, 2)') AS profit,
wr_return_amt AS return_amt,
wr_net_loss AS net_loss
FROM web_returns
LEFT JOIN web_sales ON (wr_item_sk = ws_item_sk) AND (wr_order_number = ws_order_number)
) AS salesreturns, date_dim, web_site
WHERE (date_sk = d_date_sk) AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 14 day))) AND (wsr_web_site_sk = web_site_sk)
GROUP BY web_site_id
)
SELECT
channel,
id,
sum(sales) AS sales,
sum(returns) AS returns,
sum(profit) AS profit
FROM
(
SELECT
'store channel' AS channel,
CAST(concat('store', s_store_id) AS String) AS id,
sales,
returns,
profit - profit_loss AS profit
FROM ssr
UNION ALL
SELECT
'catalog channel' AS channel,
concat('catalog_page', cp_catalog_page_id) AS id,
sales,
returns,
profit - profit_loss AS profit
FROM csr
UNION ALL
SELECT
'web channel' AS channel,
concat('web_site', web_site_id) AS id,
sales,
returns,
profit - profit_loss AS profit
FROM wsr
) AS x
GROUP BY
channel,
id
WITH ROLLUP
ORDER BY
channel,
id
LIMIT 100;
Q6
SELECT
a.ca_state AS state,
count(*) AS cnt
FROM customer_address AS a, customer AS c, store_sales AS s, date_dim AS d, item AS i
WHERE (a.ca_address_sk = c.c_current_addr_sk)
AND (c.c_customer_sk = s.ss_customer_sk)
AND (s.ss_sold_date_sk = d.d_date_sk)
AND (s.ss_item_sk = i.i_item_sk)
AND (d.d_month_seq = (
SELECT DISTINCT d_month_seq
FROM date_dim
WHERE (d_year = 2001) AND (d_moy = 1)
))
AND (i.i_current_price > 1.2 * (
SELECT avg(j.i_current_price)
FROM item AS j
WHERE (j.i_category = i.i_category)
))
GROUP BY a.ca_state
HAVING count(*) >= 10
ORDER BY cnt, a.ca_state
LIMIT 100;
Q7
SELECT
i_item_id,
avg(ss_quantity) AS agg1,
avg(ss_list_price) AS agg2,
avg(ss_coupon_amt) AS agg3,
avg(ss_sales_price) AS agg4
FROM store_sales, customer_demographics, date_dim, item, promotion
WHERE (ss_sold_date_sk = d_date_sk)
AND (ss_item_sk = i_item_sk)
AND (ss_cdemo_sk = cd_demo_sk)
AND (ss_promo_sk = p_promo_sk)
AND (cd_gender = 'M')
AND (cd_marital_status = 'S')
AND (cd_education_status = 'College')
AND ((p_channel_email = 'N') OR (p_channel_event = 'N'))
AND (d_year = 2000)
GROUP BY i_item_id
ORDER BY i_item_id
LIMIT 100;
Q8
SELECT
s_store_name,
sum(ss_net_profit)
FROM store_sales, date_dim, store,
(
SELECT substr(ca_zip, 1, 2) AS ca_zip
FROM
(
SELECT substr(ca_zip, 1, 5) AS ca_zip
FROM customer_address
WHERE substr(ca_zip, 1, 5) IN ('24128', '76232', '65084', '87816', '83926', '77556', '20548', '26231', '43848', '15126', '91137', '61265', '98294', '25782', '17920', '18426', '98235', '40081', '84093', '28577', '55565', '17183', '54601', '67897', '22752', '86284', '18376', '38607', '45200', '21756', '29741', '96765', '23932', '89360', '29839', '25989', '28898', '91068', '72550', '10390', '18845', '47770', '82636', '41367', '76638', '86198', '81312', '37126', '39192', '88424', '72175', '81426', '53672', '10445', '42666', '66864', '66708', '41248', '48583', '82276', '18842', '78890', '49448', '14089', '38122', '34425', '79077', '19849', '43285', '39861', '66162', '77610', '13695', '99543', '83444', '83041', '12305', '57665', '68341', '25003', '57834', '62878', '49130', '81096', '18840', '27700', '23470', '50412', '21195', '16021', '76107', '71954', '68309', '18119', '98359', '64544', '10336', '86379', '27068', '39736', '98569', '28915', '24206', '56529', '57647', '54917', '42961', '91110', '63981', '14922', '36420', '23006', '67467', '32754', '30903', '20260', '31671', '51798', '72325', '85816', '68621', '13955', '36446', '41766', '68806', '16725', '15146', '22744', '35850', '88086', '51649', '18270', '52867', '39972', '96976', '63792', '11376', '94898', '13595', '10516', '90225', '58943', '39371', '94945', '28587', '96576', '57855', '28488', '26105', '83933', '25858', '34322', '44438', '73171', '30122', '34102', '22685', '71256', '78451', '54364', '13354', '45375', '40558', '56458', '28286', '45266', '47305', '69399', '83921', '26233', '11101', '15371', '69913', '35942', '15882', '25631', '24610', '44165', '99076', '33786', '70738', '26653', '14328', '72305', '62496', '22152', '10144', '64147', '48425', '14663', '21076', '18799', '30450', '63089', '81019', '68893', '24996', '51200', '51211', '45692', '92712', '70466', '79994', '22437', '25280', '38935', '71791', '73134', '56571', '14060', '19505', '72425', '56575', '74351', '68786', '51650', '20004', '18383', '76614', '11634', '18906', '15765', '41368', '73241', '76698', '78567', '97189', '28545', '76231', '75691', '22246', '51061', '90578', '56691', '68014', '51103', '94167', '57047', '14867', '73520', '15734', '63435', '25733', '35474', '24676', '94627', '53535', '17879', '15559', '53268', '59166', '11928', '59402', '33282', '45721', '43933', '68101', '33515', '36634', '71286', '19736', '58058', '55253', '67473', '41918', '19515', '36495', '19430', '22351', '77191', '91393', '49156', '50298', '87501', '18652', '53179', '18767', '63193', '23968', '65164', '68880', '21286', '72823', '58470', '67301', '13394', '31016', '70372', '67030', '40604', '24317', '45748', '39127', '26065', '77721', '31029', '31880', '60576', '24671', '45549', '13376', '50016', '33123', '19769', '22927', '97789', '46081', '72151', '15723', '46136', '51949', '68100', '96888', '64528', '14171', '79777', '28709', '11489', '25103', '32213', '78668', '22245', '15798', '27156', '37930', '62971', '21337', '51622', '67853', '10567', '38415', '15455', '58263', '42029', '60279', '37125', '56240', '88190', '50308', '26859', '64457', '89091', '82136', '62377', '36233', '63837', '58078', '17043', '30010', '60099', '28810', '98025', '29178', '87343', '73273', '30469', '64034', '39516', '86057', '21309', '90257', '67875', '40162', '11356', '73650', '61810', '72013', '30431', '22461', '19512', '13375', '55307', '30625', '83849', '68908', '26689', '96451', '38193', '46820', '88885', '84935', '69035', '83144', '47537', '56616', '94983', '48033', '69952', '25486', '61547', '27385', '61860', '58048', '56910', '16807', '17871', '35258', '31387', '35458', '35576')
INTERSECT
SELECT ca_zip
FROM
(
SELECT
substr(ca_zip, 1, 5) AS ca_zip,
count(*) AS cnt
FROM customer_address, customer
WHERE (ca_address_sk = c_current_addr_sk) AND (c_preferred_cust_flag = 'Y')
GROUP BY ca_zip
HAVING count(*) > 10
) AS A1
) AS A2
) AS V1
WHERE (ss_store_sk = s_store_sk) AND (ss_sold_date_sk = d_date_sk) AND (d_qoy = 2) AND (d_year = 1998) AND (substr(s_zip, 1, 2) = substr(V1.ca_zip, 1, 2))
GROUP BY s_store_name
ORDER BY s_store_name
LIMIT 100;
Q9
SELECT
multiIf((
SELECT count(*)
FROM store_sales
WHERE (ss_quantity >= 1) AND (ss_quantity <= 20)
) > 74129, (
SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE (ss_quantity >= 1) AND (ss_quantity <= 20)
), (
SELECT avg(ss_net_paid)
FROM store_sales
WHERE (ss_quantity >= 1) AND (ss_quantity <= 20)
)) AS bucket1,
multiIf((
SELECT count(*)
FROM store_sales
WHERE (ss_quantity >= 21) AND (ss_quantity <= 40)
) > 122840, (
SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE (ss_quantity >= 21) AND (ss_quantity <= 40)
), (
SELECT avg(ss_net_paid)
FROM store_sales
WHERE (ss_quantity >= 21) AND (ss_quantity <= 40)
)) AS bucket2,
multiIf((
SELECT count(*)
FROM store_sales
WHERE (ss_quantity >= 41) AND (ss_quantity <= 60)
) > 56580, (
SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE (ss_quantity >= 41) AND (ss_quantity <= 60)
), (
SELECT avg(ss_net_paid)
FROM store_sales
WHERE (ss_quantity >= 41) AND (ss_quantity <= 60)
)) AS bucket3,
multiIf((
SELECT count(*)
FROM store_sales
WHERE (ss_quantity >= 61) AND (ss_quantity <= 80)
) > 10097, (
SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE (ss_quantity >= 61) AND (ss_quantity <= 80)
), (
SELECT avg(ss_net_paid)
FROM store_sales
WHERE (ss_quantity >= 61) AND (ss_quantity <= 80)
)) AS bucket4,
multiIf((
SELECT count(*)
FROM store_sales
WHERE (ss_quantity >= 81) AND (ss_quantity <= 100)
) > 165306, (
SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE (ss_quantity >= 81) AND (ss_quantity <= 100)
), (
SELECT avg(ss_net_paid)
FROM store_sales
WHERE (ss_quantity >= 81) AND (ss_quantity <= 100)
)) AS bucket5
FROM reason
WHERE r_reason_sk = 1;
Q10
SELECT
cd_gender,
cd_marital_status,
cd_education_status,
count(*) AS cnt1,
cd_purchase_estimate,
count(*) AS cnt2,
cd_credit_rating,
count(*) AS cnt3,
cd_dep_count,
count(*) AS cnt4,
cd_dep_employed_count,
count(*) AS cnt5,
cd_dep_college_count,
count(*) AS cnt6
FROM customer AS c, customer_address AS ca, customer_demographics
WHERE (c.c_current_addr_sk = ca.ca_address_sk) AND (ca_county IN ('Rush County', 'Toole County', 'Jefferson County', 'Dona Ana County', 'La Porte County')) AND (cd_demo_sk = c.c_current_cdemo_sk) AND exists((
SELECT *
FROM store_sales, date_dim
WHERE (c.c_customer_sk = ss_customer_sk) AND (ss_sold_date_sk = d_date_sk) AND (d_year = 2002) AND ((d_moy >= 1) AND (d_moy <= (1 + 3)))
)) AND (exists((
SELECT *
FROM web_sales, date_dim
WHERE (c.c_customer_sk = ws_bill_customer_sk) AND (ws_sold_date_sk = d_date_sk) AND (d_year = 2002) AND ((d_moy >= 1) AND (d_moy <= (1 + 3)))
)) OR exists((
SELECT *
FROM catalog_sales, date_dim
WHERE (c.c_customer_sk = cs_ship_customer_sk) AND (cs_sold_date_sk = d_date_sk) AND (d_year = 2002) AND ((d_moy >= 1) AND (d_moy <= (1 + 3)))
)))
GROUP BY
cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
ORDER BY
cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
LIMIT 100;
Q11
WITH year_total AS
(
SELECT
c_customer_id AS customer_id,
c_first_name AS customer_first_name,
c_last_name AS customer_last_name,
c_preferred_cust_flag AS customer_preferred_cust_flag,
c_birth_country AS customer_birth_country,
c_login AS customer_login,
c_email_address AS customer_email_address,
d_year AS dyear,
sum(ss_ext_list_price - ss_ext_discount_amt) AS year_total,
's' AS sale_type
FROM customer, store_sales, date_dim
WHERE (c_customer_sk = ss_customer_sk) AND (ss_sold_date_sk = d_date_sk)
GROUP BY
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address,
d_year
UNION ALL
SELECT
c_customer_id AS customer_id,
c_first_name AS customer_first_name,
c_last_name AS customer_last_name,
c_preferred_cust_flag AS customer_preferred_cust_flag,
c_birth_country AS customer_birth_country,
c_login AS customer_login,
c_email_address AS customer_email_address,
d_year AS dyear,
sum(ws_ext_list_price - ws_ext_discount_amt) AS year_total,
'w' AS sale_type
FROM customer, web_sales, date_dim
WHERE (c_customer_sk = ws_bill_customer_sk) AND (ws_sold_date_sk = d_date_sk)
GROUP BY
c_customer_id,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_country,
c_login,
c_email_address,
d_year
)
SELECT
t_s_secyear.customer_id,
t_s_secyear.customer_first_name,
t_s_secyear.customer_last_name,
t_s_secyear.customer_preferred_cust_flag
FROM year_total AS t_s_firstyear, year_total AS t_s_secyear, year_total AS t_w_firstyear, year_total AS t_w_secyear
WHERE (t_s_secyear.customer_id = t_s_firstyear.customer_id) AND (t_s_firstyear.customer_id = t_w_secyear.customer_id) AND (t_s_firstyear.customer_id = t_w_firstyear.customer_id) AND (t_s_firstyear.sale_type = 's') AND (t_w_firstyear.sale_type = 'w') AND (t_s_secyear.sale_type = 's') AND (t_w_secyear.sale_type = 'w') AND (t_s_firstyear.dyear = 2001) AND (t_s_secyear.dyear = (2001 + 1)) AND (t_w_firstyear.dyear = 2001) AND (t_w_secyear.dyear = (2001 + 1)) AND (t_s_firstyear.year_total > 0) AND (t_w_firstyear.year_total > 0) AND (multiIf(t_w_firstyear.year_total > 0, CAST(t_w_secyear.year_total AS Float32) / t_w_firstyear.year_total, 0.) > multiIf(t_s_firstyear.year_total > 0, CAST(t_s_secyear.year_total AS Float32) / t_s_firstyear.year_total, 0.))
ORDER BY
t_s_secyear.customer_id,
t_s_secyear.customer_first_name,
t_s_secyear.customer_last_name,
t_s_secyear.customer_preferred_cust_flag
LIMIT 100;
Q12
SELECT
i_item_id,
i_item_desc,
i_category,
i_class,
i_current_price,
sum(ws_ext_sales_price) AS itemrevenue,
(sum(ws_ext_sales_price) * 100) / sum(sum(ws_ext_sales_price)) OVER (PARTITION BY i_class) AS revenueratio
FROM web_sales, item, date_dim
WHERE (ws_item_sk = i_item_sk) AND (i_category IN ('Sports', 'Books', 'Home')) AND (ws_sold_date_sk = d_date_sk) AND ((d_date >= CAST('1999-02-22', 'date')) AND (d_date <= (CAST('1999-02-22', 'date') + INTERVAL 30 day)))
GROUP BY
i_item_id,
i_item_desc,
i_category,
i_class,
i_current_price
ORDER BY
i_category,
i_class,
i_item_id,
i_item_desc,
revenueratio
LIMIT 100;
Q13
SELECT
avg(ss_quantity),
avg(ss_ext_sales_price),
avg(ss_ext_wholesale_cost),
sum(ss_ext_wholesale_cost)
FROM store_sales, store, customer_demographics, household_demographics, customer_address, date_dim
WHERE (s_store_sk = ss_store_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (
(
(ss_hdemo_sk = hd_demo_sk)
AND (cd_demo_sk = ss_cdemo_sk)
AND (cd_marital_status = 'M')
AND (cd_education_status = 'Advanced Degree')
AND (ss_sales_price BETWEEN 100.00 AND 150.00)
AND (hd_dep_count = 3)
)
OR (
(ss_hdemo_sk = hd_demo_sk)
AND (cd_demo_sk = ss_cdemo_sk)
AND (cd_marital_status = 'S')
AND (cd_education_status = 'College')
AND (ss_sales_price BETWEEN 50.00 AND 100.00)
AND (hd_dep_count = 1)
)
OR (
(ss_hdemo_sk = hd_demo_sk)
AND (cd_demo_sk = ss_cdemo_sk)
AND (cd_marital_status = 'W')
AND (cd_education_status = '2 yr Degree')
AND (ss_sales_price BETWEEN 150.00 AND 200.)
AND (hd_dep_count = 1)
)
)
AND (
(
(ss_addr_sk = ca_address_sk)
AND (ca_country = 'United States')
AND (ca_state IN ('TX', 'OH', 'TX'))
AND (ss_net_profit BETWEEN 100 AND 200)
)
OR (
(ss_addr_sk = ca_address_sk)
AND (ca_country = 'United States')
AND (ca_state IN ('OR', 'NM', 'KY'))
AND (ss_net_profit BETWEEN 150 AND 300)
)
OR (
(ss_addr_sk = ca_address_sk)
AND (ca_country = 'United States')
AND (ca_state IN ('VA', 'TX', 'MS'))
AND (ss_net_profit BETWEEN 50 AND 250)
)
);
Q14
WITH
cross_items AS
(
SELECT i_item_sk AS ss_item_sk
FROM item,
(
SELECT
iss.i_brand_id AS brand_id,
iss.i_class_id AS class_id,
iss.i_category_id AS category_id
FROM store_sales, item AS iss, date_dim AS d1
WHERE (ss_item_sk = iss.i_item_sk) AND (ss_sold_date_sk = d1.d_date_sk) AND (d1.d_year BETWEEN 1999 AND 1999 + 2)
INTERSECT
SELECT
ics.i_brand_id,
ics.i_class_id,
ics.i_category_id
FROM catalog_sales, item AS ics, date_dim AS d2
WHERE (cs_item_sk = ics.i_item_sk) AND (cs_sold_date_sk = d2.d_date_sk) AND (d2.d_year BETWEEN 1999 AND 1999 + 2)
INTERSECT
SELECT
iws.i_brand_id,
iws.i_class_id,
iws.i_category_id
FROM web_sales, item AS iws, date_dim AS d3
WHERE (ws_item_sk = iws.i_item_sk) AND (ws_sold_date_sk = d3.d_date_sk) AND (d3.d_year BETWEEN 1999 AND 1999 + 2)
)
WHERE (i_brand_id = brand_id) AND (i_class_id = class_id) AND (i_category_id = category_id)
),
avg_sales AS
(
SELECT avg(quantity * list_price) AS average_sales
FROM
(
SELECT
ss_quantity AS quantity,
ss_list_price AS list_price
FROM store_sales, date_dim
WHERE (ss_sold_date_sk = d_date_sk) AND (d_year BETWEEN 1999 AND 1999 + 2)
UNION ALL
SELECT
cs_quantity AS quantity,
cs_list_price AS list_price
FROM catalog_sales, date_dim
WHERE (cs_sold_date_sk = d_date_sk) AND (d_year BETWEEN 1999 AND 1999 + 2)
UNION ALL
SELECT
ws_quantity AS quantity,
ws_list_price AS list_price
FROM web_sales, date_dim
WHERE (ws_sold_date_sk = d_date_sk) AND (d_year BETWEEN 1999 AND 1999 + 2)
) AS x
)
SELECT
channel,
i_brand_id,
i_class_id,
i_category_id,
sum(sales),
sum(number_sales)
FROM
(
SELECT
'store' AS channel,
i_brand_id,
i_class_id,
i_category_id,
sum(ss_quantity * ss_list_price) AS sales,
count(*) AS number_sales
FROM store_sales, item, date_dim
WHERE (ss_item_sk IN (
SELECT ss_item_sk
FROM cross_items
)) AND (ss_item_sk = i_item_sk) AND (ss_sold_date_sk = d_date_sk) AND (d_year = (1999 + 2)) AND (d_moy = 11)
GROUP BY
i_brand_id,
i_class_id,
i_category_id
HAVING sum(ss_quantity * ss_list_price) > (
SELECT average_sales
FROM avg_sales
)
UNION ALL
SELECT
'catalog' AS channel,
i_brand_id,
i_class_id,
i_category_id,
sum(cs_quantity * cs_list_price) AS sales,
count(*) AS number_sales
FROM catalog_sales, item, date_dim
WHERE (cs_item_sk IN (
SELECT ss_item_sk
FROM cross_items
)) AND (cs_item_sk = i_item_sk) AND (cs_sold_date_sk = d_date_sk) AND (d_year = (1999 + 2)) AND (d_moy = 11)
GROUP BY
i_brand_id,
i_class_id,
i_category_id
HAVING sum(cs_quantity * cs_list_price) > (
SELECT average_sales
FROM avg_sales
)
UNION ALL
SELECT
'web' AS channel,
i_brand_id,
i_class_id,
i_category_id,
sum(ws_quantity * ws_list_price) AS sales,
count(*) AS number_sales
FROM web_sales, item, date_dim
WHERE (ws_item_sk IN (
SELECT ss_item_sk
FROM cross_items
)) AND (ws_item_sk = i_item_sk) AND (ws_sold_date_sk = d_date_sk) AND (d_year = (1999 + 2)) AND (d_moy = 11)
GROUP BY
i_brand_id,
i_class_id,
i_category_id
HAVING sum(ws_quantity * ws_list_price) > (
SELECT average_sales
FROM avg_sales
)
) AS y
GROUP BY
channel,
i_brand_id,
i_class_id,
i_category_id
WITH ROLLUP
ORDER BY
channel,
i_brand_id,
i_class_id,
i_category_id
LIMIT 100;
WITH
cross_items AS
(
SELECT i_item_sk AS ss_item_sk
FROM item,
(
SELECT
iss.i_brand_id AS brand_id,
iss.i_class_id AS class_id,
iss.i_category_id AS category_id
FROM store_sales, item AS iss, date_dim AS d1
WHERE (ss_item_sk = iss.i_item_sk) AND (ss_sold_date_sk = d1.d_date_sk) AND (d1.d_year BETWEEN 1999 AND 1999 + 2)
INTERSECT
SELECT
ics.i_brand_id,
ics.i_class_id,
ics.i_category_id
FROM catalog_sales, item AS ics, date_dim AS d2
WHERE (cs_item_sk = ics.i_item_sk) AND (cs_sold_date_sk = d2.d_date_sk) AND (d2.d_year BETWEEN 1999 AND 1999 + 2)
INTERSECT
SELECT
iws.i_brand_id,
iws.i_class_id,
iws.i_category_id
FROM web_sales, item AS iws, date_dim AS d3
WHERE (ws_item_sk = iws.i_item_sk) AND (ws_sold_date_sk = d3.d_date_sk) AND (d3.d_year BETWEEN 1999 AND 1999 + 2)
)
WHERE (i_brand_id = brand_id) AND (i_class_id = class_id) AND (i_category_id = category_id)
),
avg_sales AS
(
SELECT avg(quantity * list_price) AS average_sales
FROM
(
SELECT
ss_quantity AS quantity,
ss_list_price AS list_price
FROM store_sales, date_dim
WHERE (ss_sold_date_sk = d_date_sk) AND (d_year BETWEEN 1999 AND 1999 + 2)
UNION ALL
SELECT
cs_quantity AS quantity,
cs_list_price AS list_price
FROM catalog_sales, date_dim
WHERE (cs_sold_date_sk = d_date_sk) AND (d_year BETWEEN 1999 AND 1999 + 2)
UNION ALL
SELECT
ws_quantity AS quantity,
ws_list_price AS list_price
FROM web_sales, date_dim
WHERE (ws_sold_date_sk = d_date_sk) AND (d_year BETWEEN 1999 AND 1999 + 2)
) AS x
)
SELECT
this_year.channel AS ty_channel,
this_year.i_brand_id AS ty_brand,
this_year.i_class_id AS ty_class,
this_year.i_category_id AS ty_category,
this_year.sales AS ty_sales,
this_year.number_sales AS ty_number_sales,
last_year.channel AS ly_channel,
last_year.i_brand_id AS ly_brand,
last_year.i_class_id AS ly_class,
last_year.i_category_id AS ly_category,
last_year.sales AS ly_sales,
last_year.number_sales AS ly_number_sales
FROM
(
SELECT
'store' AS channel,
i_brand_id,
i_class_id,
i_category_id,
sum(ss_quantity * ss_list_price) AS sales,
count(*) AS number_sales
FROM store_sales, item, date_dim
WHERE (ss_item_sk IN (
SELECT ss_item_sk
FROM cross_items
)) AND (ss_item_sk = i_item_sk) AND (ss_sold_date_sk = d_date_sk) AND (d_week_seq = (
SELECT d_week_seq
FROM date_dim
WHERE (d_year = 1999 + 1) AND (d_moy = 12) AND (d_dom = 11)
))
GROUP BY
i_brand_id,
i_class_id,
i_category_id
HAVING sum(ss_quantity * ss_list_price) > (
SELECT average_sales
FROM avg_sales
)
) AS this_year,
(
SELECT
'store' AS channel,
i_brand_id,
i_class_id,
i_category_id,
sum(ss_quantity * ss_list_price) AS sales,
count(*) AS number_sales
FROM store_sales, item, date_dim
WHERE (ss_item_sk IN (
SELECT ss_item_sk
FROM cross_items
)) AND (ss_item_sk = i_item_sk) AND (ss_sold_date_sk = d_date_sk) AND (d_week_seq = (
SELECT d_week_seq
FROM date_dim
WHERE (d_year = 1999) AND (d_moy = 12) AND (d_dom = 11)
))
GROUP BY
i_brand_id,
i_class_id,
i_category_id
HAVING sum(ss_quantity * ss_list_price) > (
SELECT average_sales
FROM avg_sales
)
) AS last_year
WHERE (this_year.i_brand_id = last_year.i_brand_id)
AND (this_year.i_class_id = last_year.i_class_id)
AND (this_year.i_category_id = last_year.i_category_id)
ORDER BY
this_year.channel,
this_year.i_brand_id,
this_year.i_class_id,
this_year.i_category_id
LIMIT 100;
Q15
SELECT
ca_zip,
sum(cs_sales_price)
FROM catalog_sales, customer, customer_address, date_dim
WHERE (cs_bill_customer_sk = c_customer_sk)
AND (c_current_addr_sk = ca_address_sk)
AND ((substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792'))
OR (ca_state IN ('CA', 'WA', 'GA'))
OR (cs_sales_price > 500))
AND (cs_sold_date_sk = d_date_sk)
AND (d_qoy = 2)
AND (d_year = 2001)
GROUP BY ca_zip
ORDER BY ca_zip
LIMIT 100;
Q16
SELECT
count(DISTINCT cs_order_number) AS "order count",
sum(cs_ext_ship_cost) AS "total shipping cost",
sum(cs_net_profit) AS "total net profit"
FROM catalog_sales AS cs1, date_dim, customer_address, call_center
WHERE (d_date BETWEEN '2002-2-01' AND (CAST('2002-2-01' AS date) + INTERVAL 60 DAY))
AND (cs1.cs_ship_date_sk = d_date_sk)
AND (cs1.cs_ship_addr_sk = ca_address_sk)
AND (ca_state = 'GA')
AND (cs1.cs_call_center_sk = cc_call_center_sk)
AND (cc_county IN ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County'))
AND EXISTS (
SELECT *
FROM catalog_sales AS cs2
WHERE (cs1.cs_order_number = cs2.cs_order_number) AND (cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
)
AND NOT EXISTS (
SELECT *
FROM catalog_returns AS cr1
WHERE (cs1.cs_order_number = cr1.cr_order_number)
)
ORDER BY count(DISTINCT cs_order_number)
LIMIT 100;
Q17
The query returns nan instead of NULL when stddev_samp is called on a single value. Corresponding issue: https://github.com/ClickHouse/ClickHouse/issues/94683. Otherwise, the result is correct.
SELECT
i_item_id,
i_item_desc,
s_state,
count(ss_quantity) AS store_sales_quantitycount,
avg(ss_quantity) AS store_sales_quantityave,
stddev_samp(ss_quantity) AS store_sales_quantitystdev,
stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov,
count(sr_return_quantity) AS store_returns_quantitycount,
avg(sr_return_quantity) AS store_returns_quantityave,
stddev_samp(sr_return_quantity) AS store_returns_quantitystdev,
stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov,
count(cs_quantity) AS catalog_sales_quantitycount,
avg(cs_quantity) AS catalog_sales_quantityave,
stddev_samp(cs_quantity) AS catalog_sales_quantitystdev,
stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov
FROM store_sales, store_returns, catalog_sales, date_dim AS d1, date_dim AS d2, date_dim AS d3, store, item
WHERE (d1.d_quarter_name = '2001Q1')
AND (d1.d_date_sk = ss_sold_date_sk)
AND (i_item_sk = ss_item_sk)
AND (s_store_sk = ss_store_sk)
AND (ss_customer_sk = sr_customer_sk)
AND (ss_item_sk = sr_item_sk)
AND (ss_ticket_number = sr_ticket_number)
AND (sr_returned_date_sk = d2.d_date_sk)
AND (d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3'))
AND (sr_customer_sk = cs_bill_customer_sk)
AND (sr_item_sk = cs_item_sk)
AND (cs_sold_date_sk = d3.d_date_sk)
AND (d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3'))
GROUP BY
i_item_id,
i_item_desc,
s_state
ORDER BY
i_item_id,
i_item_desc,
s_state
LIMIT 100;
Q18
SELECT
i_item_id,
ca_country,
ca_state,
ca_county,
avg(CAST(cs_quantity AS Nullable(decimal(12, 2)))) AS agg1,
avg(CAST(cs_list_price AS Nullable(decimal(12, 2)))) AS agg2,
avg(CAST(cs_coupon_amt AS Nullable(decimal(12, 2)))) AS agg3,
avg(CAST(cs_sales_price AS Nullable(decimal(12, 2)))) AS agg4,
avg(CAST(cs_net_profit AS Nullable(decimal(12, 2)))) AS agg5,
avg(CAST(c_birth_year AS Nullable(decimal(12, 2)))) AS agg6,
avg(CAST(cd1.cd_dep_count AS Nullable(decimal(12, 2)))) AS agg7
FROM catalog_sales, customer_demographics AS cd1, customer_demographics AS cd2, customer, customer_address, date_dim, item
WHERE (cs_sold_date_sk = d_date_sk)
AND (cs_item_sk = i_item_sk)
AND (cs_bill_cdemo_sk = cd1.cd_demo_sk)
AND (cs_bill_customer_sk = c_customer_sk)
AND (cd1.cd_gender = 'F')
AND (cd1.cd_education_status = 'Unknown')
AND (c_current_cdemo_sk = cd2.cd_demo_sk)
AND (c_current_addr_sk = ca_address_sk)
AND (c_birth_month IN (1, 6, 8, 9, 12, 2))
AND (d_year = 1998)
AND (ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS'))
GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county)
ORDER BY
ca_country,
ca_state,
ca_county,
i_item_id
LIMIT 100;
Q19
SELECT
i_brand_id AS brand_id,
i_brand AS brand,
i_manufact_id,
i_manufact,
sum(ss_ext_sales_price) AS ext_price
FROM date_dim, store_sales, item, customer, customer_address, store
WHERE (d_date_sk = ss_sold_date_sk)
AND (ss_item_sk = i_item_sk)
AND (i_manager_id = 8)
AND (d_moy = 11)
AND (d_year = 1998)
AND (ss_customer_sk = c_customer_sk)
AND (c_current_addr_sk = ca_address_sk)
AND (substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5))
AND (ss_store_sk = s_store_sk)
GROUP BY
i_brand,
i_brand_id,
i_manufact_id,
i_manufact
ORDER BY
ext_price DESC,
i_brand,
i_brand_id,
i_manufact_id,
i_manufact
LIMIT 100;
Q20
SELECT
i_item_id,
i_item_desc,
i_category,
i_class,
i_current_price,
sum(cs_ext_sales_price) AS itemrevenue,
sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price)) OVER (PARTITION BY i_class) AS revenueratio
FROM catalog_sales, item, date_dim
WHERE (cs_item_sk = i_item_sk)
AND (i_category IN ('Sports', 'Books', 'Home'))
AND (cs_sold_date_sk = d_date_sk)
AND (d_date BETWEEN CAST('1999-02-22' AS date) AND (CAST('1999-02-22' AS date) + INTERVAL 30 DAY))
GROUP BY
i_item_id,
i_item_desc,
i_category,
i_class,
i_current_price
ORDER BY
i_category,
i_class,
i_item_id,
i_item_desc,
revenueratio
LIMIT 100;
Q21
SELECT *
FROM
(
SELECT
w_warehouse_name,
i_item_id,
sum(CASE WHEN (CAST(d_date AS date) < CAST('2000-03-11' AS date)) THEN inv_quantity_on_hand ELSE 0 END) AS inv_before,
sum(CASE WHEN (CAST(d_date AS date) >= CAST('2000-03-11' AS date)) THEN inv_quantity_on_hand ELSE 0 END) AS inv_after
FROM inventory, warehouse, item, date_dim
WHERE (i_current_price BETWEEN 0.99 AND 1.49)
AND (i_item_sk = inv_item_sk)
AND (inv_warehouse_sk = w_warehouse_sk)
AND (inv_date_sk = d_date_sk)
AND (d_date BETWEEN (CAST('2000-03-11' AS date) - INTERVAL 30 DAY) AND (CAST('2000-03-11' AS date) + INTERVAL 30 DAY))
GROUP BY w_warehouse_name, i_item_id
) AS x
WHERE (CASE WHEN inv_before > 0 THEN inv_after / inv_before ELSE NULL END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0
ORDER BY
w_warehouse_name,
i_item_id
LIMIT 100;
Q22
SELECT
i_product_name,
i_brand,
i_class,
i_category,
avg(inv_quantity_on_hand) AS qoh
FROM inventory, date_dim, item
WHERE (inv_date_sk = d_date_sk)
AND (inv_item_sk = i_item_sk)
AND (d_month_seq BETWEEN 1200 AND 1200 + 11)
GROUP BY ROLLUP(i_product_name, i_brand, i_class, i_category)
ORDER BY qoh, i_product_name, i_brand, i_class, i_category
LIMIT 100;
Q23
WITH
frequent_ss_items AS
(
SELECT
substr(i_item_desc, 1, 30) AS itemdesc,
i_item_sk AS item_sk,
d_date AS solddate,
count(*) AS cnt
FROM store_sales, date_dim, item
WHERE (ss_sold_date_sk = d_date_sk)
AND (ss_item_sk = i_item_sk)
AND (d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3))
GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
HAVING count(*) > 4
),
max_store_sales AS
(
SELECT max(csales) AS tpcds_cmax
FROM
(
SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) AS csales
FROM store_sales, customer, date_dim
WHERE (ss_customer_sk = c_customer_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3))
GROUP BY c_customer_sk
)
),
best_ss_customer AS
(
SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) AS ssales
FROM store_sales, customer
WHERE (ss_customer_sk = c_customer_sk)
GROUP BY c_customer_sk
HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * (
SELECT *
FROM max_store_sales
)
)
SELECT sum(sales)
FROM
(
SELECT cs_quantity * cs_list_price AS sales
FROM catalog_sales, date_dim
WHERE (d_year = 2000)
AND (d_moy = 2)
AND (cs_sold_date_sk = d_date_sk)
AND (cs_item_sk IN (SELECT item_sk FROM frequent_ss_items))
AND (cs_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer))
UNION ALL
SELECT ws_quantity * ws_list_price AS sales
FROM web_sales, date_dim
WHERE (d_year = 2000)
AND (d_moy = 2)
AND (ws_sold_date_sk = d_date_sk)
AND (ws_item_sk IN (SELECT item_sk FROM frequent_ss_items))
AND (ws_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer))
)
LIMIT 100;
WITH
frequent_ss_items AS
(
SELECT
substr(i_item_desc, 1, 30) AS itemdesc,
i_item_sk AS item_sk,
d_date AS solddate,
count(*) AS cnt
FROM store_sales, date_dim, item
WHERE (ss_sold_date_sk = d_date_sk)
AND (ss_item_sk = i_item_sk)
AND (d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3))
GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
HAVING count(*) > 4
),
max_store_sales AS
(
SELECT max(csales) AS tpcds_cmax
FROM
(
SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) AS csales
FROM store_sales, customer, date_dim
WHERE (ss_customer_sk = c_customer_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3))
GROUP BY c_customer_sk
)
),
best_ss_customer AS
(
SELECT
c_customer_sk,
sum(ss_quantity * ss_sales_price) AS ssales
FROM store_sales, customer
WHERE (ss_customer_sk = c_customer_sk)
GROUP BY c_customer_sk
HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * (
SELECT *
FROM max_store_sales
)
)
SELECT
c_last_name,
c_first_name,
sales
FROM
(
SELECT
c_last_name,
c_first_name,
sum(cs_quantity * cs_list_price) AS sales
FROM catalog_sales, customer, date_dim
WHERE (d_year = 2000)
AND (d_moy = 2)
AND (cs_sold_date_sk = d_date_sk)
AND (cs_item_sk IN (SELECT item_sk FROM frequent_ss_items))
AND (cs_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer))
AND (cs_bill_customer_sk = c_customer_sk)
GROUP BY c_last_name, c_first_name
UNION ALL
SELECT
c_last_name,
c_first_name,
sum(ws_quantity * ws_list_price) AS sales
FROM web_sales, customer, date_dim
WHERE (d_year = 2000)
AND (d_moy = 2)
AND (ws_sold_date_sk = d_date_sk)
AND (ws_item_sk IN (SELECT item_sk FROM frequent_ss_items))
AND (ws_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer))
AND (ws_bill_customer_sk = c_customer_sk)
GROUP BY c_last_name, c_first_name
)
ORDER BY c_last_name, c_first_name, sales
LIMIT 100;
Q24
WITH
ssales AS
(
SELECT
c_last_name,
c_first_name,
s_store_name,
ca_state,
s_state,
i_color,
i_current_price,
i_manager_id,
i_units,
i_size,
sum(ss_net_paid) AS netpaid
FROM store_sales, store_returns, store, item, customer, customer_address
WHERE (ss_ticket_number = sr_ticket_number)
AND (ss_item_sk = sr_item_sk)
AND (ss_customer_sk = c_customer_sk)
AND (ss_item_sk = i_item_sk)
AND (ss_store_sk = s_store_sk)
AND (c_current_addr_sk = ca_address_sk)
AND (c_birth_country <> upper(ca_country))
AND (s_zip = ca_zip)
AND (s_market_id = 8)
GROUP BY
c_last_name,
c_first_name,
s_store_name,
ca_state,
s_state,
i_color,
i_current_price,
i_manager_id,
i_units,
i_size
)
SELECT
c_last_name,
c_first_name,
s_store_name,
sum(netpaid) AS paid
FROM ssales
WHERE (i_color = 'peach')
GROUP BY
c_last_name,
c_first_name,
s_store_name
HAVING sum(netpaid) > (
SELECT 0.05 * avg(netpaid)
FROM ssales
)
ORDER BY
c_last_name,
c_first_name,
s_store_name;
WITH
ssales AS
(
SELECT
c_last_name,
c_first_name,
s_store_name,
ca_state,
s_state,
i_color,
i_current_price,
i_manager_id,
i_units,
i_size,
sum(ss_net_paid) AS netpaid
FROM store_sales, store_returns, store, item, customer, customer_address
WHERE (ss_ticket_number = sr_ticket_number)
AND (ss_item_sk = sr_item_sk)
AND (ss_customer_sk = c_customer_sk)
AND (ss_item_sk = i_item_sk)
AND (ss_store_sk = s_store_sk)
AND (c_current_addr_sk = ca_address_sk)
AND (c_birth_country <> upper(ca_country))
AND (s_zip = ca_zip)
AND (s_market_id = 8)
GROUP BY
c_last_name,
c_first_name,
s_store_name,
ca_state,
s_state,
i_color,
i_current_price,
i_manager_id,
i_units,
i_size
)
SELECT
c_last_name,
c_first_name,
s_store_name,
sum(netpaid) AS paid
FROM ssales
WHERE (i_color = 'saddle')
GROUP BY
c_last_name,
c_first_name,
s_store_name
HAVING sum(netpaid) > (
SELECT 0.05 * avg(netpaid)
FROM ssales
)
ORDER BY
c_last_name,
c_first_name,
s_store_name;
Q25
SELECT
i_item_id,
i_item_desc,
s_store_id,
s_store_name,
sum(ss_net_profit) AS store_sales_profit,
sum(sr_net_loss) AS store_returns_loss,
sum(cs_net_profit) AS catalog_sales_profit
FROM store_sales, store_returns, catalog_sales, date_dim AS d1, date_dim AS d2, date_dim AS d3, store, item
WHERE (d1.d_moy = 4)
AND (d1.d_year = 2001)
AND (d1.d_date_sk = ss_sold_date_sk)
AND (i_item_sk = ss_item_sk)
AND (s_store_sk = ss_store_sk)
AND (ss_customer_sk = sr_customer_sk)
AND (ss_item_sk = sr_item_sk)
AND (ss_ticket_number = sr_ticket_number)
AND (sr_returned_date_sk = d2.d_date_sk)
AND (d2.d_moy BETWEEN 4 AND 10)
AND (d2.d_year = 2001)
AND (sr_customer_sk = cs_bill_customer_sk)
AND (sr_item_sk = cs_item_sk)
AND (cs_sold_date_sk = d3.d_date_sk)
AND (d3.d_moy BETWEEN 4 AND 10)
AND (d3.d_year = 2001)
GROUP BY
i_item_id,
i_item_desc,
s_store_id,
s_store_name
ORDER BY
i_item_id,
i_item_desc,
s_store_id,
s_store_name
LIMIT 100;
Q26
SELECT
i_item_id,
avg(cs_quantity) AS agg1,
avg(cs_list_price) AS agg2,
avg(cs_coupon_amt) AS agg3,
avg(cs_sales_price) AS agg4
FROM catalog_sales, customer_demographics, date_dim, item, promotion
WHERE (cs_sold_date_sk = d_date_sk)
AND (cs_item_sk = i_item_sk)
AND (cs_bill_cdemo_sk = cd_demo_sk)
AND (cs_promo_sk = p_promo_sk)
AND (cd_gender = 'M')
AND (cd_marital_status = 'S')
AND (cd_education_status = 'College')
AND ((p_channel_email = 'N') OR (p_channel_event = 'N'))
AND (d_year = 2000)
GROUP BY i_item_id
ORDER BY i_item_id
LIMIT 100;
Q27
SELECT
i_item_id,
s_state,
grouping(s_state) AS g_state,
avg(ss_quantity) AS agg1,
avg(ss_list_price) AS agg2,
avg(ss_coupon_amt) AS agg3,
avg(ss_sales_price) AS agg4
FROM store_sales, customer_demographics, date_dim, store, item
WHERE (ss_sold_date_sk = d_date_sk)
AND (ss_item_sk = i_item_sk)
AND (ss_store_sk = s_store_sk)
AND (ss_cdemo_sk = cd_demo_sk)
AND (cd_gender = 'M')
AND (cd_marital_status = 'S')
AND (cd_education_status = 'College')
AND (d_year = 2002)
AND (s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN'))
GROUP BY ROLLUP(i_item_id, s_state)
ORDER BY
i_item_id,
s_state
LIMIT 100;
The query does not work out-of-the-box due to https://github.com/ClickHouse/ClickHouse/issues/95299. This alternative formulation with a minor fix works:
SELECT
CAST(i_item_id AS String),
s_state,
grouping(s_state) AS g_state,
avg(ss_quantity) AS agg1,
avg(ss_list_price) AS agg2,
avg(ss_coupon_amt) AS agg3,
avg(ss_sales_price) AS agg4
FROM store_sales, customer_demographics, date_dim, store, item
WHERE (ss_sold_date_sk = d_date_sk)
AND (ss_item_sk = i_item_sk)
AND (ss_store_sk = s_store_sk)
AND (ss_cdemo_sk = cd_demo_sk)
AND (cd_gender = 'M')
AND (cd_marital_status = 'S')
AND (cd_education_status = 'College')
AND (d_year = 2002)
AND (s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN'))
GROUP BY ROLLUP(CAST(i_item_id AS String), s_state)
ORDER BY
CAST(i_item_id AS String),
s_state
LIMIT 100;
Q28
SELECT *
FROM
(
SELECT
avg(ss_list_price) AS B1_LP,
count(ss_list_price) AS B1_CNT,
count(DISTINCT ss_list_price) AS B1_CNTD
FROM store_sales
WHERE (ss_quantity BETWEEN 0 AND 5)
AND ((ss_list_price BETWEEN 8 AND 8 + 10)
OR (ss_coupon_amt BETWEEN 459 AND 459 + 1000)
OR (ss_wholesale_cost BETWEEN 57 AND 57 + 20))
) AS B1,
(
SELECT
avg(ss_list_price) AS B2_LP,
count(ss_list_price) AS B2_CNT,
count(DISTINCT ss_list_price) AS B2_CNTD
FROM store_sales
WHERE (ss_quantity BETWEEN 6 AND 10)
AND ((ss_list_price BETWEEN 90 AND 90 + 10)
OR (ss_coupon_amt BETWEEN 2323 AND 2323 + 1000)
OR (ss_wholesale_cost BETWEEN 31 AND 31 + 20))
) AS B2,
(
SELECT
avg(ss_list_price) AS B3_LP,
count(ss_list_price) AS B3_CNT,
count(DISTINCT ss_list_price) AS B3_CNTD
FROM store_sales
WHERE (ss_quantity BETWEEN 11 AND 15)
AND ((ss_list_price BETWEEN 142 AND 142 + 10)
OR (ss_coupon_amt BETWEEN 12214 AND 12214 + 1000)
OR (ss_wholesale_cost BETWEEN 79 AND 79 + 20))
) AS B3,
(
SELECT
avg(ss_list_price) AS B4_LP,
count(ss_list_price) AS B4_CNT,
count(DISTINCT ss_list_price) AS B4_CNTD
FROM store_sales
WHERE (ss_quantity BETWEEN 16 AND 20)
AND ((ss_list_price BETWEEN 135 AND 135 + 10)
OR (ss_coupon_amt BETWEEN 6071 AND 6071 + 1000)
OR (ss_wholesale_cost BETWEEN 38 AND 38 + 20))
) AS B4,
(
SELECT
avg(ss_list_price) AS B5_LP,
count(ss_list_price) AS B5_CNT,
count(DISTINCT ss_list_price) AS B5_CNTD
FROM store_sales
WHERE (ss_quantity BETWEEN 21 AND 25)
AND ((ss_list_price BETWEEN 122 AND 122 + 10)
OR (ss_coupon_amt BETWEEN 836 AND 836 + 1000)
OR (ss_wholesale_cost BETWEEN 17 AND 17 + 20))
) AS B5,
(
SELECT
avg(ss_list_price) AS B6_LP,
count(ss_list_price) AS B6_CNT,
count(DISTINCT ss_list_price) AS B6_CNTD
FROM store_sales
WHERE (ss_quantity BETWEEN 26 AND 30)
AND ((ss_list_price BETWEEN 154 AND 154 + 10)
OR (ss_coupon_amt BETWEEN 7326 AND 7326 + 1000)
OR (ss_wholesale_cost BETWEEN 7 AND 7 + 20))
) AS B6
LIMIT 100;
Q29
SELECT
i_item_id,
i_item_desc,
s_store_id,
s_store_name,
sum(ss_quantity) AS store_sales_quantity,
sum(sr_return_quantity) AS store_returns_quantity,
sum(cs_quantity) AS catalog_sales_quantity
FROM store_sales, store_returns, catalog_sales, date_dim AS d1, date_dim AS d2, date_dim AS d3, store, item
WHERE (d1.d_moy = 9)
AND (d1.d_year = 1999)
AND (d1.d_date_sk = ss_sold_date_sk)
AND (i_item_sk = ss_item_sk)
AND (s_store_sk = ss_store_sk)
AND (ss_customer_sk = sr_customer_sk)
AND (ss_item_sk = sr_item_sk)
AND (ss_ticket_number = sr_ticket_number)
AND (sr_returned_date_sk = d2.d_date_sk)
AND (d2.d_moy BETWEEN 9 AND 9 + 3)
AND (d2.d_year = 1999)
AND (sr_customer_sk = cs_bill_customer_sk)
AND (sr_item_sk = cs_item_sk)
AND (cs_sold_date_sk = d3.d_date_sk)
AND (d3.d_year IN (1999, 1999 + 1, 1999 + 2))
GROUP BY
i_item_id,
i_item_desc,
s_store_id,
s_store_name
ORDER BY
i_item_id,
i_item_desc,
s_store_id,
s_store_name
LIMIT 100;
Q30
WITH
customer_total_return AS
(
SELECT
wr_returning_customer_sk AS ctr_customer_sk,
ca_state AS ctr_state,
sum(wr_return_amt) AS ctr_total_return
FROM web_returns, date_dim, customer_address
WHERE (wr_returned_date_sk = d_date_sk)
AND (d_year = 2002)
AND (wr_returning_addr_sk = ca_address_sk)
GROUP BY
wr_returning_customer_sk,
ca_state
)
SELECT
c_customer_id,
c_salutation,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_day,
c_birth_month,
c_birth_year,
c_birth_country,
c_login,
c_email_address,
c_last_review_date_sk,
ctr_total_return
FROM customer_total_return AS ctr1, customer_address, customer
WHERE (ctr1.ctr_total_return > (
SELECT avg(ctr_total_return) * 1.2
FROM customer_total_return AS ctr2
WHERE (ctr1.ctr_state = ctr2.ctr_state)
))
AND (ca_address_sk = c_current_addr_sk)
AND (ca_state = 'GA')
AND (ctr1.ctr_customer_sk = c_customer_sk)
ORDER BY
c_customer_id,
c_salutation,
c_first_name,
c_last_name,
c_preferred_cust_flag,
c_birth_day,
c_birth_month,
c_birth_year,
c_birth_country,
c_login,
c_email_address,
c_last_review_date_sk,
ctr_total_return
LIMIT 100;
Q31
WITH
ss AS
(
SELECT
ca_county,
d_qoy,
d_year,
sum(ss_ext_sales_price) AS store_sales
FROM store_sales, date_dim, customer_address
WHERE (ss_sold_date_sk = d_date_sk)
AND (ss_addr_sk = ca_address_sk)
GROUP BY ca_county, d_qoy, d_year
),
ws AS
(
SELECT
ca_county,
d_qoy,
d_year,
sum(ws_ext_sales_price) AS web_sales
FROM web_sales, date_dim, customer_address
WHERE (ws_sold_date_sk = d_date_sk)
AND (ws_bill_addr_sk = ca_address_sk)
GROUP BY ca_county, d_qoy, d_year
)
SELECT
ss1.ca_county,
ss1.d_year,
CAST(ws2.web_sales AS Float64) / ws1.web_sales AS web_q1_q2_increase,
CAST(ss2.store_sales AS Float64) / ss1.store_sales AS store_q1_q2_increase,
CAST(ws3.web_sales AS Float64) / ws2.web_sales AS web_q2_q3_increase,
CAST(ss3.store_sales AS Float64) / ss2.store_sales AS store_q2_q3_increase
FROM ss AS ss1, ss AS ss2, ss AS ss3, ws AS ws1, ws AS ws2, ws AS ws3
WHERE (ss1.d_qoy = 1)
AND (ss1.d_year = 2000)
AND (ss1.ca_county = ss2.ca_county)
AND (ss2.d_qoy = 2)
AND (ss2.d_year = 2000)
AND (ss2.ca_county = ss3.ca_county)
AND (ss3.d_qoy = 3)
AND (ss3.d_year = 2000)
AND (ss1.ca_county = ws1.ca_county)
AND (ws1.d_qoy = 1)
AND (ws1.d_year = 2000)
AND (ws1.ca_county = ws2.ca_county)
AND (ws2.d_qoy = 2)
AND (ws2.d_year = 2000)
AND (ws1.ca_county = ws3.ca_county)
AND (ws3.d_qoy = 3)
AND (ws3.d_year = 2000)
AND (CASE WHEN ws1.web_sales > 0 THEN CAST(ws2.web_sales AS Float64) / ws1.web_sales ELSE NULL END
> CASE WHEN ss1.store_sales > 0 THEN CAST(ss2.store_sales AS Float64) / ss1.store_sales ELSE NULL END)
AND (CASE WHEN ws2.web_sales > 0 THEN CAST(ws3.web_sales AS Float64) / ws2.web_sales ELSE NULL END
> CASE WHEN ss2.store_sales > 0 THEN CAST(ss3.store_sales AS Float64) / ss2.store_sales ELSE NULL END)
ORDER BY ss1.ca_county;
Q32
SELECT sum(cs_ext_discount_amt) AS "excess discount amount"
FROM catalog_sales, item, date_dim
WHERE (i_manufact_id = 977)
AND (i_item_sk = cs_item_sk)
AND (d_date BETWEEN '2000-01-27' AND (CAST('2000-01-27' AS date) + INTERVAL 90 DAY))
AND (d_date_sk = cs_sold_date_sk)
AND (cs_ext_discount_amt > (
SELECT 1.3 * avg(cs_ext_discount_amt)
FROM catalog_sales, date_dim
WHERE (cs_item_sk = i_item_sk)
AND (d_date BETWEEN '2000-01-27' AND (CAST('2000-01-27' AS date) + INTERVAL 90 DAY))
AND (d_date_sk = cs_sold_date_sk)
))
LIMIT 100;
Q33
WITH
ss AS
(
SELECT
i_manufact_id,
sum(ss_ext_sales_price) AS total_sales
FROM store_sales, date_dim, customer_address, item
WHERE (i_manufact_id IN (
SELECT i_manufact_id
FROM item
WHERE (i_category IN ('Electronics'))
))
AND (ss_item_sk = i_item_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year = 1998)
AND (d_moy = 5)
AND (ss_addr_sk = ca_address_sk)
AND (ca_gmt_offset = -5)
GROUP BY i_manufact_id
),
cs AS
(
SELECT
i_manufact_id,
sum(cs_ext_sales_price) AS total_sales
FROM catalog_sales, date_dim, customer_address, item
WHERE (i_manufact_id IN (
SELECT i_manufact_id
FROM item
WHERE (i_category IN ('Electronics'))
))
AND (cs_item_sk = i_item_sk)
AND (cs_sold_date_sk = d_date_sk)
AND (d_year = 1998)
AND (d_moy = 5)
AND (cs_bill_addr_sk = ca_address_sk)
AND (ca_gmt_offset = -5)
GROUP BY i_manufact_id
),
ws AS
(
SELECT
i_manufact_id,
sum(ws_ext_sales_price) AS total_sales
FROM web_sales, date_dim, customer_address, item
WHERE (i_manufact_id IN (
SELECT i_manufact_id
FROM item
WHERE (i_category IN ('Electronics'))
))
AND (ws_item_sk = i_item_sk)
AND (ws_sold_date_sk = d_date_sk)
AND (d_year = 1998)
AND (d_moy = 5)
AND (ws_bill_addr_sk = ca_address_sk)
AND (ca_gmt_offset = -5)
GROUP BY i_manufact_id
)
SELECT
i_manufact_id,
sum(total_sales) AS total_sales
FROM
(
SELECT * FROM ss
UNION ALL
SELECT * FROM cs
UNION ALL
SELECT * FROM ws
) AS tmp1
GROUP BY i_manufact_id
ORDER BY total_sales
LIMIT 100;
Q34
SELECT
c_last_name,
c_first_name,
c_salutation,
c_preferred_cust_flag,
ss_ticket_number,
cnt
FROM
(
SELECT
ss_ticket_number,
ss_customer_sk,
count(*) AS cnt
FROM store_sales, date_dim, store, household_demographics
WHERE (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
AND (store_sales.ss_store_sk = store.s_store_sk)
AND (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
AND ((date_dim.d_dom BETWEEN 1 AND 3) OR (date_dim.d_dom BETWEEN 25 AND 28))
AND ((household_demographics.hd_buy_potential = '>10000') OR (household_demographics.hd_buy_potential = 'Unknown'))
AND (household_demographics.hd_vehicle_count > 0)
AND ((CASE WHEN household_demographics.hd_vehicle_count > 0 THEN (household_demographics.hd_dep_count) / household_demographics.hd_vehicle_count ELSE NULL END) > 1.2)
AND (date_dim.d_year IN (1999, 1999 + 1, 1999 + 2))
AND (store.s_county IN ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County'))
GROUP BY ss_ticket_number, ss_customer_sk
) AS dn, customer
WHERE (ss_customer_sk = c_customer_sk)
AND (cnt BETWEEN 15 AND 20)
ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC, ss_ticket_number;
Q35
SELECT
ca_state,
cd_gender,
cd_marital_status,
cd_dep_count,
count(*) AS cnt1,
avg(cd_dep_count),
min(cd_dep_count),
min(cd_dep_count),
cd_dep_employed_count,
count(*) AS cnt2,
avg(cd_dep_employed_count),
min(cd_dep_employed_count),
min(cd_dep_employed_count),
cd_dep_college_count,
count(*) AS cnt3,
avg(cd_dep_college_count),
min(cd_dep_college_count),
min(cd_dep_college_count)
FROM customer AS c, customer_address AS ca, customer_demographics
WHERE (c.c_current_addr_sk = ca.ca_address_sk)
AND (cd_demo_sk = c.c_current_cdemo_sk)
AND EXISTS (
SELECT *
FROM store_sales, date_dim
WHERE (c.c_customer_sk = ss_customer_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year = 2002)
AND (d_qoy < 4)
)
AND (
EXISTS (
SELECT *
FROM web_sales, date_dim
WHERE (c.c_customer_sk = ws_bill_customer_sk)
AND (ws_sold_date_sk = d_date_sk)
AND (d_year = 2002)
AND (d_qoy < 4)
)
OR EXISTS (
SELECT *
FROM catalog_sales, date_dim
WHERE (c.c_customer_sk = cs_ship_customer_sk)
AND (cs_sold_date_sk = d_date_sk)
AND (d_year = 2002)
AND (d_qoy < 4)
)
)
GROUP BY
ca_state,
cd_gender,
cd_marital_status,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
ORDER BY
ca_state,
cd_gender,
cd_marital_status,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
LIMIT 100;
Q36
SELECT
sum(ss_net_profit) / sum(ss_ext_sales_price) AS gross_margin,
i_category,
i_class,
grouping(i_category) + grouping(i_class) AS lochierarchy,
rank() OVER (
PARTITION BY grouping(i_category) + grouping(i_class),
CASE WHEN grouping(i_class) = 0 THEN i_category END
ORDER BY (sum(ss_net_profit) * 1.) / sum(ss_ext_sales_price) ASC
) AS rank_within_parent
FROM store_sales, date_dim AS d1, item, store
WHERE (d1.d_year = 2001)
AND (d1.d_date_sk = ss_sold_date_sk)
AND (i_item_sk = ss_item_sk)
AND (s_store_sk = ss_store_sk)
AND (s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN'))
GROUP BY ROLLUP(i_category, i_class)
ORDER BY
lochierarchy DESC,
CASE WHEN lochierarchy = 0 THEN i_category END,
rank_within_parent
LIMIT 100;
Q37
SELECT
i_item_id,
i_item_desc,
i_current_price
FROM item, inventory, date_dim, catalog_sales
WHERE (i_current_price BETWEEN 68 AND 68 + 30)
AND (inv_item_sk = i_item_sk)
AND (d_date_sk = inv_date_sk)
AND (d_date BETWEEN CAST('2000-02-01' AS date) AND (CAST('2000-02-01' AS date) + INTERVAL 60 DAY))
AND (i_manufact_id IN (677, 940, 694, 808))
AND (inv_quantity_on_hand BETWEEN 100 AND 500)
AND (cs_item_sk = i_item_sk)
GROUP BY i_item_id, i_item_desc, i_current_price
ORDER BY i_item_id
LIMIT 100;
Q38
SELECT count(*)
FROM
(
SELECT DISTINCT c_last_name, c_first_name, d_date
FROM store_sales, date_dim, customer
WHERE (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
AND (store_sales.ss_customer_sk = customer.c_customer_sk)
AND (d_month_seq BETWEEN 1200 AND 1200 + 11)
INTERSECT
SELECT DISTINCT c_last_name, c_first_name, d_date
FROM catalog_sales, date_dim, customer
WHERE (catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
AND (catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)
AND (d_month_seq BETWEEN 1200 AND 1200 + 11)
INTERSECT
SELECT DISTINCT c_last_name, c_first_name, d_date
FROM web_sales, date_dim, customer
WHERE (web_sales.ws_sold_date_sk = date_dim.d_date_sk)
AND (web_sales.ws_bill_customer_sk = customer.c_customer_sk)
AND (d_month_seq BETWEEN 1200 AND 1200 + 11)
) AS hot_cust
LIMIT 100;
Q39
WITH
inv AS
(
SELECT
w_warehouse_name,
w_warehouse_sk,
i_item_sk,
d_moy,
stdev,
mean,
CASE mean WHEN 0 THEN NULL ELSE stdev / mean END AS cov
FROM
(
SELECT
w_warehouse_name,
w_warehouse_sk,
i_item_sk,
d_moy,
stddev_samp(inv_quantity_on_hand) AS stdev,
avg(inv_quantity_on_hand) AS mean
FROM inventory, item, warehouse, date_dim
WHERE (inv_item_sk = i_item_sk)
AND (inv_warehouse_sk = w_warehouse_sk)
AND (inv_date_sk = d_date_sk)
AND (d_year = 2001)
GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy
) AS foo
WHERE (CASE mean WHEN 0 THEN 0 ELSE stdev / mean END > 1)
)
SELECT
inv1.w_warehouse_sk,
inv1.i_item_sk,
inv1.d_moy,
inv1.mean,
inv1.cov,
inv2.w_warehouse_sk,
inv2.i_item_sk,
inv2.d_moy,
inv2.mean,
inv2.cov
FROM inv AS inv1, inv AS inv2
WHERE (inv1.i_item_sk = inv2.i_item_sk)
AND (inv1.w_warehouse_sk = inv2.w_warehouse_sk)
AND (inv1.d_moy = 1)
AND (inv2.d_moy = 1 + 1)
ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov, inv2.d_moy, inv2.mean, inv2.cov;
WITH
inv AS
(
SELECT
w_warehouse_name,
w_warehouse_sk,
i_item_sk,
d_moy,
stdev,
mean,
CASE mean WHEN 0 THEN NULL ELSE stdev / mean END AS cov
FROM
(
SELECT
w_warehouse_name,
w_warehouse_sk,
i_item_sk,
d_moy,
stddev_samp(inv_quantity_on_hand) AS stdev,
avg(inv_quantity_on_hand) AS mean
FROM inventory, item, warehouse, date_dim
WHERE (inv_item_sk = i_item_sk)
AND (inv_warehouse_sk = w_warehouse_sk)
AND (inv_date_sk = d_date_sk)
AND (d_year = 2001)
GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy
) AS foo
WHERE (CASE mean WHEN 0 THEN 0 ELSE stdev / mean END > 1)
)
SELECT
inv1.w_warehouse_sk,
inv1.i_item_sk,
inv1.d_moy,
inv1.mean,
inv1.cov,
inv2.w_warehouse_sk,
inv2.i_item_sk,
inv2.d_moy,
inv2.mean,
inv2.cov
FROM inv AS inv1, inv AS inv2
WHERE (inv1.i_item_sk = inv2.i_item_sk)
AND (inv1.w_warehouse_sk = inv2.w_warehouse_sk)
AND (inv1.d_moy = 1)
AND (inv2.d_moy = 1 + 1)
AND (inv1.cov > 1.5)
ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov, inv2.d_moy, inv2.mean, inv2.cov;
Q40
SELECT
w_state,
i_item_id,
sum(CASE WHEN (CAST(d_date AS date) < CAST('2000-03-11' AS date)) THEN cs_sales_price - coalesce(cr_refunded_cash, 0) ELSE 0 END) AS sales_before,
sum(CASE WHEN (CAST(d_date AS date) >= CAST('2000-03-11' AS date)) THEN cs_sales_price - coalesce(cr_refunded_cash, 0) ELSE 0 END) AS sales_after
FROM catalog_sales
LEFT OUTER JOIN catalog_returns ON (cs_order_number = cr_order_number) AND (cs_item_sk = cr_item_sk),
warehouse, item, date_dim
WHERE (i_current_price BETWEEN 0.99 AND 1.49)
AND (i_item_sk = cs_item_sk)
AND (cs_warehouse_sk = w_warehouse_sk)
AND (cs_sold_date_sk = d_date_sk)
AND (d_date BETWEEN (CAST('2000-03-11' AS date) - INTERVAL 30 DAY) AND (CAST('2000-03-11' AS date) + INTERVAL 30 DAY))
GROUP BY w_state, i_item_id
ORDER BY w_state, i_item_id
LIMIT 100;
Q41
SELECT DISTINCT(i_product_name)
FROM item AS i1
WHERE (i_manufact_id BETWEEN 738 AND 738 + 40)
AND (
SELECT count(*) AS item_cnt
FROM item
WHERE (
(i_manufact = i1.i_manufact)
AND (
(
(i_category = 'Women')
AND (i_color = 'powder' OR i_color = 'khaki')
AND (i_units = 'Ounce' OR i_units = 'Oz')
AND (i_size = 'medium' OR i_size = 'extra large')
)
OR (
(i_category = 'Women')
AND (i_color = 'brown' OR i_color = 'honeydew')
AND (i_units = 'Bunch' OR i_units = 'Ton')
AND (i_size = 'N/A' OR i_size = 'small')
)
OR (
(i_category = 'Men')
AND (i_color = 'floral' OR i_color = 'deep')
AND (i_units = 'N/A' OR i_units = 'Dozen')
AND (i_size = 'petite' OR i_size = 'large')
)
OR (
(i_category = 'Men')
AND (i_color = 'light' OR i_color = 'cornflower')
AND (i_units = 'Box' OR i_units = 'Pound')
AND (i_size = 'medium' OR i_size = 'extra large')
)
)
)
OR (
(i_manufact = i1.i_manufact)
AND (
(
(i_category = 'Women')
AND (i_color = 'midnight' OR i_color = 'snow')
AND (i_units = 'Pallet' OR i_units = 'Gross')
AND (i_size = 'medium' OR i_size = 'extra large')
)
OR (
(i_category = 'Women')
AND (i_color = 'cyan' OR i_color = 'papaya')
AND (i_units = 'Cup' OR i_units = 'Dram')
AND (i_size = 'N/A' OR i_size = 'small')
)
OR (
(i_category = 'Men')
AND (i_color = 'orange' OR i_color = 'frosted')
AND (i_units = 'Each' OR i_units = 'Tbl')
AND (i_size = 'petite' OR i_size = 'large')
)
OR (
(i_category = 'Men')
AND (i_color = 'forest' OR i_color = 'ghost')
AND (i_units = 'Lb' OR i_units = 'Bundle')
AND (i_size = 'medium' OR i_size = 'extra large')
)
)
)
) > 0
ORDER BY i_product_name
LIMIT 100;
Q42
SELECT
dt.d_year,
item.i_category_id,
item.i_category,
sum(ss_ext_sales_price)
FROM date_dim AS dt, store_sales, item
WHERE (dt.d_date_sk = store_sales.ss_sold_date_sk)
AND (store_sales.ss_item_sk = item.i_item_sk)
AND (item.i_manager_id = 1)
AND (dt.d_moy = 11)
AND (dt.d_year = 2000)
GROUP BY
dt.d_year,
item.i_category_id,
item.i_category
ORDER BY
sum(ss_ext_sales_price) DESC,
dt.d_year,
item.i_category_id,
item.i_category
LIMIT 100;
Q43
SELECT
s_store_name,
s_store_id,
sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END) AS sun_sales,
sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END) AS mon_sales,
sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END) AS tue_sales,
sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END) AS wed_sales,
sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END) AS thu_sales,
sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END) AS fri_sales,
sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) AS sat_sales
FROM date_dim, store_sales, store
WHERE (d_date_sk = ss_sold_date_sk)
AND (s_store_sk = ss_store_sk)
AND (s_gmt_offset = -5)
AND (d_year = 2000)
GROUP BY s_store_name, s_store_id
ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, thu_sales, fri_sales, sat_sales
LIMIT 100;
Q44
SELECT
asceding.rnk,
i1.i_product_name AS best_performing,
i2.i_product_name AS worst_performing
FROM
(
SELECT *
FROM
(
SELECT
item_sk,
rank() OVER (ORDER BY rank_col ASC) AS rnk
FROM
(
SELECT
ss_item_sk AS item_sk,
avg(ss_net_profit) AS rank_col
FROM store_sales AS ss1
WHERE (ss_store_sk = 4)
GROUP BY ss_item_sk
HAVING avg(ss_net_profit) > 0.9 * (
SELECT avg(ss_net_profit) AS rank_col
FROM store_sales
WHERE (ss_store_sk = 4) AND (ss_addr_sk IS NULL)
GROUP BY ss_store_sk
)
) AS V1
) AS V11
WHERE (rnk < 11)
) AS asceding,
(
SELECT *
FROM
(
SELECT
item_sk,
rank() OVER (ORDER BY rank_col DESC) AS rnk
FROM
(
SELECT
ss_item_sk AS item_sk,
avg(ss_net_profit) AS rank_col
FROM store_sales AS ss1
WHERE (ss_store_sk = 4)
GROUP BY ss_item_sk
HAVING avg(ss_net_profit) > 0.9 * (
SELECT avg(ss_net_profit) AS rank_col
FROM store_sales
WHERE (ss_store_sk = 4) AND (ss_addr_sk IS NULL)
GROUP BY ss_store_sk
)
) AS V2
) AS V21
WHERE (rnk < 11)
) AS descending,
item AS i1,
item AS i2
WHERE (asceding.rnk = descending.rnk)
AND (i1.i_item_sk = asceding.item_sk)
AND (i2.i_item_sk = descending.item_sk)
ORDER BY asceding.rnk
LIMIT 100;
Q45
SELECT
ca_zip,
ca_city,
sum(ws_sales_price)
FROM web_sales, customer, customer_address, date_dim, item
WHERE (ws_bill_customer_sk = c_customer_sk)
AND (c_current_addr_sk = ca_address_sk)
AND (ws_item_sk = i_item_sk)
AND (
(substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792'))
OR (i_item_id IN (
SELECT i_item_id
FROM item
WHERE (i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29))
))
)
AND (ws_sold_date_sk = d_date_sk)
AND (d_qoy = 2)
AND (d_year = 2001)
GROUP BY ca_zip, ca_city
ORDER BY ca_zip, ca_city
LIMIT 100;
Q46
SELECT
c_last_name,
c_first_name,
ca_city,
bought_city,
ss_ticket_number,
amt,
profit
FROM
(
SELECT
ss_ticket_number,
ss_customer_sk,
ca_city AS bought_city,
sum(ss_coupon_amt) AS amt,
sum(ss_net_profit) AS profit
FROM store_sales, date_dim, store, household_demographics, customer_address
WHERE (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
AND (store_sales.ss_store_sk = store.s_store_sk)
AND (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (store_sales.ss_addr_sk = customer_address.ca_address_sk)
AND ((household_demographics.hd_dep_count = 4) OR (household_demographics.hd_vehicle_count = 3))
AND (date_dim.d_dow IN (6, 0))
AND (date_dim.d_year IN (1999, 1999 + 1, 1999 + 2))
AND (store.s_city IN ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview'))
GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city
) AS dn, customer, customer_address AS current_addr
WHERE (ss_customer_sk = c_customer_sk)
AND (customer.c_current_addr_sk = current_addr.ca_address_sk)
AND (current_addr.ca_city <> bought_city)
ORDER BY
c_last_name,
c_first_name,
ca_city,
bought_city,
ss_ticket_number
LIMIT 100;
Q47
WITH
v1 AS
(
SELECT
i_category,
i_brand,
s_store_name,
s_company_name,
d_year,
d_moy,
sum(ss_sales_price) AS sum_sales,
avg(sum(ss_sales_price)) OVER (PARTITION BY i_category, i_brand, s_store_name, s_company_name, d_year) AS avg_monthly_sales,
rank() OVER (PARTITION BY i_category, i_brand, s_store_name, s_company_name ORDER BY d_year, d_moy) AS rn
FROM item, store_sales, date_dim, store
WHERE (ss_item_sk = i_item_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (ss_store_sk = s_store_sk)
AND (
(d_year = 1999)
OR ((d_year = 1999 - 1) AND (d_moy = 12))
OR ((d_year = 1999 + 1) AND (d_moy = 1))
)
GROUP BY i_category, i_brand, s_store_name, s_company_name, d_year, d_moy
),
v2 AS
(
SELECT
v1.i_category,
v1.i_brand,
v1.s_store_name,
v1.s_company_name,
v1.d_year,
v1.d_moy,
v1.avg_monthly_sales,
v1.sum_sales,
v1_lag.sum_sales AS psum,
v1_lead.sum_sales AS nsum
FROM v1, v1 AS v1_lag, v1 AS v1_lead
WHERE (v1.i_category = v1_lag.i_category)
AND (v1.i_category = v1_lead.i_category)
AND (v1.i_brand = v1_lag.i_brand)
AND (v1.i_brand = v1_lead.i_brand)
AND (v1.s_store_name = v1_lag.s_store_name)
AND (v1.s_store_name = v1_lead.s_store_name)
AND (v1.s_company_name = v1_lag.s_company_name)
AND (v1.s_company_name = v1_lead.s_company_name)
AND (v1.rn = v1_lag.rn + 1)
AND (v1.rn = v1_lead.rn - 1)
)
SELECT *
FROM v2
WHERE (d_year = 1999)
AND (avg_monthly_sales > 0)
AND (CASE WHEN avg_monthly_sales > 0 THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales ELSE NULL END > 0.1)
ORDER BY sum_sales - avg_monthly_sales, s_store_name
LIMIT 100;
The query does not work out-of-the-box due to https://github.com/ClickHouse/ClickHouse/issues/94858. This alternative formulation with a minor fix works:
WITH
v1 AS
(
SELECT
i_category,
i_brand,
s_store_name,
s_company_name,
d_year,
d_moy,
sum(ss_sales_price) AS sum_sales,
avg(sum(ss_sales_price)) OVER (PARTITION BY i_category, i_brand, s_store_name, s_company_name, d_year) AS avg_monthly_sales,
rank() OVER (PARTITION BY i_category, i_brand, s_store_name, s_company_name ORDER BY d_year, d_moy) AS rn
FROM item, store_sales, date_dim, store
WHERE (ss_item_sk = i_item_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (ss_store_sk = s_store_sk)
AND (
(d_year = 1999)
OR ((d_year = 1999 - 1) AND (d_moy = 12))
OR ((d_year = 1999 + 1) AND (d_moy = 1))
)
GROUP BY i_category, i_brand, s_store_name, s_company_name, d_year, d_moy
),
v2 AS
(
SELECT
v1.i_category,
v1.i_brand,
v1.s_store_name,
v1.s_company_name,
v1.d_year,
v1.d_moy,
v1.avg_monthly_sales,
v1.sum_sales,
v1_lag.sum_sales AS psum,
v1_lead.sum_sales AS nsum
FROM v1, v1 AS v1_lag, v1 AS v1_lead
WHERE (v1.i_category = v1_lag.i_category)
AND (v1.i_category = v1_lead.i_category)
AND (v1.i_brand = v1_lag.i_brand)
AND (v1.i_brand = v1_lead.i_brand)
AND (v1.s_store_name = v1_lag.s_store_name)
AND (v1.s_store_name = v1_lead.s_store_name)
AND (v1.s_company_name = v1_lag.s_company_name)
AND (v1.s_company_name = v1_lead.s_company_name)
AND (v1.rn = v1_lag.rn + 1)
AND (v1.rn = v1_lead.rn - 1)
)
SELECT *
FROM v2
WHERE (v1.d_year = 1999)
AND (v1.avg_monthly_sales > 0)
AND (CASE WHEN v1.avg_monthly_sales > 0 THEN abs(v1.sum_sales - v1.avg_monthly_sales) / v1.avg_monthly_sales ELSE NULL END > 0.1)
ORDER BY v1.sum_sales - v1.avg_monthly_sales, v1.s_store_name
LIMIT 100;
Q48
SELECT sum(ss_quantity)
FROM store_sales, store, customer_demographics, customer_address, date_dim
WHERE (s_store_sk = ss_store_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year = 2000)
AND (
(
(cd_demo_sk = ss_cdemo_sk)
AND (cd_marital_status = 'M')
AND (cd_education_status = '4 yr Degree')
AND (ss_sales_price BETWEEN 100.00 AND 150.00)
)
OR (
(cd_demo_sk = ss_cdemo_sk)
AND (cd_marital_status = 'D')
AND (cd_education_status = '2 yr Degree')
AND (ss_sales_price BETWEEN 50.00 AND 100.00)
)
OR (
(cd_demo_sk = ss_cdemo_sk)
AND (cd_marital_status = 'S')
AND (cd_education_status = 'College')
AND (ss_sales_price BETWEEN 150.00 AND 200.00)
)
)
AND (
(
(ss_addr_sk = ca_address_sk)
AND (ca_country = 'United States')
AND (ca_state IN ('CO', 'OH', 'TX'))
AND (ss_net_profit BETWEEN 0 AND 2000)
)
OR (
(ss_addr_sk = ca_address_sk)
AND (ca_country = 'United States')
AND (ca_state IN ('OR', 'MN', 'KY'))
AND (ss_net_profit BETWEEN 150 AND 3000)
)
OR (
(ss_addr_sk = ca_address_sk)
AND (ca_country = 'United States')
AND (ca_state IN ('VA', 'CA', 'MS'))
AND (ss_net_profit BETWEEN 50 AND 25000)
)
);
Q49
SELECT
channel,
item,
return_ratio,
return_rank,
currency_rank
FROM
(
SELECT
'web' AS channel,
web.item,
web.return_ratio,
web.return_rank,
web.currency_rank
FROM
(
SELECT
item,
return_ratio,
currency_ratio,
rank() OVER (ORDER BY return_ratio) AS return_rank,
rank() OVER (ORDER BY currency_ratio) AS currency_rank
FROM
(
SELECT
ws.ws_item_sk AS item,
(CAST(sum(coalesce(wr.wr_return_quantity, 0)) AS decimal(15, 4)) / CAST(sum(coalesce(ws.ws_quantity, 0)) AS decimal(15, 4))) AS return_ratio,
(CAST(sum(coalesce(wr.wr_return_amt, 0)) AS decimal(15, 4)) / CAST(sum(coalesce(ws.ws_net_paid, 0)) AS decimal(15, 4))) AS currency_ratio
FROM web_sales AS ws
LEFT OUTER JOIN web_returns AS wr ON (ws.ws_order_number = wr.wr_order_number) AND (ws.ws_item_sk = wr.wr_item_sk),
date_dim
WHERE (wr.wr_return_amt > 10000)
AND (ws.ws_net_profit > 1)
AND (ws.ws_net_paid > 0)
AND (ws.ws_quantity > 0)
AND (ws_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (d_moy = 12)
GROUP BY ws.ws_item_sk
) AS in_web
) AS web
WHERE (web.return_rank <= 10) OR (web.currency_rank <= 10)
UNION
SELECT
'catalog' AS channel,
catalog.item,
catalog.return_ratio,
catalog.return_rank,
catalog.currency_rank
FROM
(
SELECT
item,
return_ratio,
currency_ratio,
rank() OVER (ORDER BY return_ratio) AS return_rank,
rank() OVER (ORDER BY currency_ratio) AS currency_rank
FROM
(
SELECT
cs.cs_item_sk AS item,
(CAST(sum(coalesce(cr.cr_return_quantity, 0)) AS decimal(15, 4)) / CAST(sum(coalesce(cs.cs_quantity, 0)) AS decimal(15, 4))) AS return_ratio,
(CAST(sum(coalesce(cr.cr_return_amount, 0)) AS decimal(15, 4)) / CAST(sum(coalesce(cs.cs_net_paid, 0)) AS decimal(15, 4))) AS currency_ratio
FROM catalog_sales AS cs
LEFT OUTER JOIN catalog_returns AS cr ON (cs.cs_order_number = cr.cr_order_number) AND (cs.cs_item_sk = cr.cr_item_sk),
date_dim
WHERE (cr.cr_return_amount > 10000)
AND (cs.cs_net_profit > 1)
AND (cs.cs_net_paid > 0)
AND (cs.cs_quantity > 0)
AND (cs_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (d_moy = 12)
GROUP BY cs.cs_item_sk
) AS in_cat
) AS catalog
WHERE (catalog.return_rank <= 10) OR (catalog.currency_rank <= 10)
UNION
SELECT
'store' AS channel,
store.item,
store.return_ratio,
store.return_rank,
store.currency_rank
FROM
(
SELECT
item,
return_ratio,
currency_ratio,
rank() OVER (ORDER BY return_ratio) AS return_rank,
rank() OVER (ORDER BY currency_ratio) AS currency_rank
FROM
(
SELECT
sts.ss_item_sk AS item,
(CAST(sum(coalesce(sr.sr_return_quantity, 0)) AS decimal(15, 4)) / CAST(sum(coalesce(sts.ss_quantity, 0)) AS decimal(15, 4))) AS return_ratio,
(CAST(sum(coalesce(sr.sr_return_amt, 0)) AS decimal(15, 4)) / CAST(sum(coalesce(sts.ss_net_paid, 0)) AS decimal(15, 4))) AS currency_ratio
FROM store_sales AS sts
LEFT OUTER JOIN store_returns AS sr ON (sts.ss_ticket_number = sr.sr_ticket_number) AND (sts.ss_item_sk = sr.sr_item_sk),
date_dim
WHERE (sr.sr_return_amt > 10000)
AND (sts.ss_net_profit > 1)
AND (sts.ss_net_paid > 0)
AND (sts.ss_quantity > 0)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (d_moy = 12)
GROUP BY sts.ss_item_sk
) AS in_store
) AS store
WHERE (store.return_rank <= 10) OR (store.currency_rank <= 10)
)
ORDER BY 1, 4, 5, 2
LIMIT 100;
Q50
SELECT
s_store_name,
s_company_id,
s_street_number,
s_street_name,
s_street_type,
s_suite_number,
s_city,
s_county,
s_state,
s_zip,
sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk <= 30) THEN 1 ELSE 0 END) AS "30 days",
sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 30) AND (sr_returned_date_sk - ss_sold_date_sk <= 60) THEN 1 ELSE 0 END) AS "31-60 days",
sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 60) AND (sr_returned_date_sk - ss_sold_date_sk <= 90) THEN 1 ELSE 0 END) AS "61-90 days",
sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 90) AND (sr_returned_date_sk - ss_sold_date_sk <= 120) THEN 1 ELSE 0 END) AS "91-120 days",
sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 120) THEN 1 ELSE 0 END) AS ">120 days"
FROM store_sales, store_returns, store, date_dim AS d1, date_dim AS d2
WHERE (d2.d_year = 2001)
AND (d2.d_moy = 8)
AND (ss_ticket_number = sr_ticket_number)
AND (ss_item_sk = sr_item_sk)
AND (ss_sold_date_sk = d1.d_date_sk)
AND (sr_returned_date_sk = d2.d_date_sk)
AND (ss_customer_sk = sr_customer_sk)
AND (ss_store_sk = s_store_sk)
GROUP BY
s_store_name,
s_company_id,
s_street_number,
s_street_name,
s_street_type,
s_suite_number,
s_city,
s_county,
s_state,
s_zip
ORDER BY
s_store_name,
s_company_id,
s_street_number,
s_street_name,
s_street_type,
s_suite_number,
s_city,
s_county,
s_state,
s_zip
LIMIT 100;
Q51
WITH
web_v1 AS
(
SELECT
ws_item_sk AS item_sk,
d_date,
sum(sum(ws_sales_price)) OVER (PARTITION BY ws_item_sk ORDER BY d_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cume_sales
FROM web_sales, date_dim
WHERE (ws_sold_date_sk = d_date_sk)
AND (d_month_seq BETWEEN 1200 AND 1200 + 11)
AND (ws_item_sk IS NOT NULL)
GROUP BY ws_item_sk, d_date
),
store_v1 AS
(
SELECT
ss_item_sk AS item_sk,
d_date,
sum(sum(ss_sales_price)) OVER (PARTITION BY ss_item_sk ORDER BY d_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cume_sales
FROM store_sales, date_dim
WHERE (ss_sold_date_sk = d_date_sk)
AND (d_month_seq BETWEEN 1200 AND 1200 + 11)
AND (ss_item_sk IS NOT NULL)
GROUP BY ss_item_sk, d_date
)
SELECT *
FROM
(
SELECT
item_sk,
d_date,
web_sales,
store_sales,
max(web_sales) OVER (PARTITION BY item_sk ORDER BY d_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS web_cumulative,
max(store_sales) OVER (PARTITION BY item_sk ORDER BY d_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS store_cumulative
FROM
(
SELECT
CASE WHEN web.item_sk IS NOT NULL THEN web.item_sk ELSE store.item_sk END AS item_sk,
CASE WHEN web.d_date IS NOT NULL THEN web.d_date ELSE store.d_date END AS d_date,
web.cume_sales AS web_sales,
store.cume_sales AS store_sales
FROM web_v1 AS web
FULL OUTER JOIN store_v1 AS store ON (web.item_sk = store.item_sk) AND (web.d_date = store.d_date)
) AS x
) AS y
WHERE (web_cumulative > store_cumulative)
ORDER BY item_sk, d_date
LIMIT 100;
Q52
SELECT
dt.d_year,
item.i_brand_id AS brand_id,
item.i_brand AS brand,
sum(ss_ext_sales_price) AS ext_price
FROM date_dim AS dt, store_sales, item
WHERE (dt.d_date_sk = store_sales.ss_sold_date_sk)
AND (store_sales.ss_item_sk = item.i_item_sk)
AND (item.i_manager_id = 1)
AND (dt.d_moy = 11)
AND (dt.d_year = 2000)
GROUP BY
dt.d_year,
item.i_brand,
item.i_brand_id
ORDER BY
dt.d_year,
ext_price DESC,
brand_id
LIMIT 100;
Q53
SELECT *
FROM
(
SELECT
i_manufact_id,
sum(ss_sales_price) AS sum_sales,
avg(sum(ss_sales_price)) OVER (PARTITION BY i_manufact_id) AS avg_quarterly_sales
FROM item, store_sales, date_dim, store
WHERE (ss_item_sk = i_item_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (ss_store_sk = s_store_sk)
AND (d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11))
AND (
(
(i_category IN ('Books', 'Children', 'Electronics'))
AND (i_class IN ('personal', 'portable', 'reference', 'self-help'))
AND (i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9'))
)
OR (
(i_category IN ('Women', 'Music', 'Men'))
AND (i_class IN ('accessories', 'classical', 'fragrances', 'pants'))
AND (i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))
)
)
GROUP BY i_manufact_id, d_qoy
) AS tmp1
WHERE (CASE WHEN avg_quarterly_sales > 0 THEN abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales ELSE NULL END > 0.1)
ORDER BY
avg_quarterly_sales,
sum_sales,
i_manufact_id
LIMIT 100;
Q54
WITH
my_customers AS
(
SELECT DISTINCT
c_customer_sk,
c_current_addr_sk
FROM
(
SELECT
cs_sold_date_sk AS sold_date_sk,
cs_bill_customer_sk AS customer_sk,
cs_item_sk AS item_sk
FROM catalog_sales
UNION ALL
SELECT
ws_sold_date_sk AS sold_date_sk,
ws_bill_customer_sk AS customer_sk,
ws_item_sk AS item_sk
FROM web_sales
) AS cs_or_ws_sales, item, date_dim, customer
WHERE (sold_date_sk = d_date_sk)
AND (item_sk = i_item_sk)
AND (i_category = 'Women')
AND (i_class = 'maternity')
AND (c_customer_sk = cs_or_ws_sales.customer_sk)
AND (d_moy = 12)
AND (d_year = 1998)
),
my_revenue AS
(
SELECT
c_customer_sk,
sum(ss_ext_sales_price) AS revenue
FROM my_customers, store_sales, customer_address, store, date_dim
WHERE (c_current_addr_sk = ca_address_sk)
AND (ca_county = s_county)
AND (ca_state = s_state)
AND (ss_sold_date_sk = d_date_sk)
AND (c_customer_sk = ss_customer_sk)
AND (d_month_seq BETWEEN (
SELECT DISTINCT d_month_seq + 1
FROM date_dim
WHERE (d_year = 1998) AND (d_moy = 12)
) AND (
SELECT DISTINCT d_month_seq + 3
FROM date_dim
WHERE (d_year = 1998) AND (d_moy = 12)
))
GROUP BY c_customer_sk
),
segments AS
(
SELECT CAST((revenue / 50) AS int) AS segment
FROM my_revenue
)
SELECT
segment,
count(*) AS num_customers,
segment * 50 AS segment_base
FROM segments
GROUP BY segment
ORDER BY segment, num_customers
LIMIT 100;
Q55
SELECT
i_brand_id AS brand_id,
i_brand AS brand,
sum(ss_ext_sales_price) AS ext_price
FROM date_dim, store_sales, item
WHERE (d_date_sk = ss_sold_date_sk)
AND (ss_item_sk = i_item_sk)
AND (i_manager_id = 28)
AND (d_moy = 11)
AND (d_year = 1999)
GROUP BY i_brand, i_brand_id
ORDER BY ext_price DESC, i_brand_id
LIMIT 100;
Q56
WITH
ss AS
(
SELECT
i_item_id,
sum(ss_ext_sales_price) AS total_sales
FROM store_sales, date_dim, customer_address, item
WHERE (i_item_id IN (
SELECT i_item_id
FROM item
WHERE (i_color IN ('slate', 'blanched', 'burnished'))
))
AND (ss_item_sk = i_item_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (d_moy = 2)
AND (ss_addr_sk = ca_address_sk)
AND (ca_gmt_offset = -5)
GROUP BY i_item_id
),
cs AS
(
SELECT
i_item_id,
sum(cs_ext_sales_price) AS total_sales
FROM catalog_sales, date_dim, customer_address, item
WHERE (i_item_id IN (
SELECT i_item_id
FROM item
WHERE (i_color IN ('slate', 'blanched', 'burnished'))
))
AND (cs_item_sk = i_item_sk)
AND (cs_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (d_moy = 2)
AND (cs_bill_addr_sk = ca_address_sk)
AND (ca_gmt_offset = -5)
GROUP BY i_item_id
),
ws AS
(
SELECT
i_item_id,
sum(ws_ext_sales_price) AS total_sales
FROM web_sales, date_dim, customer_address, item
WHERE (i_item_id IN (
SELECT i_item_id
FROM item
WHERE (i_color IN ('slate', 'blanched', 'burnished'))
))
AND (ws_item_sk = i_item_sk)
AND (ws_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (d_moy = 2)
AND (ws_bill_addr_sk = ca_address_sk)
AND (ca_gmt_offset = -5)
GROUP BY i_item_id
)
SELECT
i_item_id,
sum(total_sales) AS total_sales
FROM
(
SELECT * FROM ss
UNION ALL
SELECT * FROM cs
UNION ALL
SELECT * FROM ws
) AS tmp1
GROUP BY i_item_id
ORDER BY
total_sales,
i_item_id
LIMIT 100;
Q57
WITH
v1 AS
(
SELECT
i_category,
i_brand,
cc_name,
d_year,
d_moy,
sum(cs_sales_price) AS sum_sales,
avg(sum(cs_sales_price)) OVER (PARTITION BY i_category, i_brand, cc_name, d_year) AS avg_monthly_sales,
rank() OVER (PARTITION BY i_category, i_brand, cc_name ORDER BY d_year, d_moy) AS rn
FROM item, catalog_sales, date_dim, call_center
WHERE (cs_item_sk = i_item_sk)
AND (cs_sold_date_sk = d_date_sk)
AND (cc_call_center_sk = cs_call_center_sk)
AND (
(d_year = 1999)
OR ((d_year = 1999 - 1) AND (d_moy = 12))
OR ((d_year = 1999 + 1) AND (d_moy = 1))
)
GROUP BY i_category, i_brand, cc_name, d_year, d_moy
),
v2 AS
(
SELECT
v1.i_category,
v1.i_brand,
v1.cc_name,
v1.d_year,
v1.d_moy,
v1.avg_monthly_sales,
v1.sum_sales,
v1_lag.sum_sales AS psum,
v1_lead.sum_sales AS nsum
FROM v1, v1 AS v1_lag, v1 AS v1_lead
WHERE (v1.i_category = v1_lag.i_category)
AND (v1.i_category = v1_lead.i_category)
AND (v1.i_brand = v1_lag.i_brand)
AND (v1.i_brand = v1_lead.i_brand)
AND (v1.cc_name = v1_lag.cc_name)
AND (v1.cc_name = v1_lead.cc_name)
AND (v1.rn = v1_lag.rn + 1)
AND (v1.rn = v1_lead.rn - 1)
)
SELECT *
FROM v2
WHERE (d_year = 1999)
AND (avg_monthly_sales > 0)
AND (CASE WHEN avg_monthly_sales > 0 THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales ELSE NULL END > 0.1)
ORDER BY sum_sales - avg_monthly_sales, cc_name
LIMIT 100;
The query does not work out-of-the-box due to https://github.com/ClickHouse/ClickHouse/issues/94858. This alternative formulation with a minor fix works:
WITH
v1 AS
(
SELECT
i_category,
i_brand,
cc_name,
d_year,
d_moy,
sum(cs_sales_price) AS sum_sales,
avg(sum(cs_sales_price)) OVER (PARTITION BY i_category, i_brand, cc_name, d_year) AS avg_monthly_sales,
rank() OVER (PARTITION BY i_category, i_brand, cc_name ORDER BY d_year, d_moy) AS rn
FROM item, catalog_sales, date_dim, call_center
WHERE (cs_item_sk = i_item_sk)
AND (cs_sold_date_sk = d_date_sk)
AND (cc_call_center_sk = cs_call_center_sk)
AND (
(d_year = 1999)
OR ((d_year = 1999 - 1) AND (d_moy = 12))
OR ((d_year = 1999 + 1) AND (d_moy = 1))
)
GROUP BY i_category, i_brand, cc_name, d_year, d_moy
),
v2 AS
(
SELECT
v1.i_category,
v1.i_brand,
v1.cc_name,
v1.d_year,
v1.d_moy,
v1.avg_monthly_sales,
v1.sum_sales,
v1_lag.sum_sales AS psum,
v1_lead.sum_sales AS nsum
FROM v1, v1 AS v1_lag, v1 AS v1_lead
WHERE (v1.i_category = v1_lag.i_category)
AND (v1.i_category = v1_lead.i_category)
AND (v1.i_brand = v1_lag.i_brand)
AND (v1.i_brand = v1_lead.i_brand)
AND (v1.cc_name = v1_lag.cc_name)
AND (v1.cc_name = v1_lead.cc_name)
AND (v1.rn = v1_lag.rn + 1)
AND (v1.rn = v1_lead.rn - 1)
)
SELECT *
FROM v2
WHERE (v1.d_year = 1999)
AND (v1.avg_monthly_sales > 0)
AND (CASE WHEN v1.avg_monthly_sales > 0 THEN abs(v1.sum_sales - v1.avg_monthly_sales) / v1.avg_monthly_sales ELSE NULL END > 0.1)
ORDER BY v1.sum_sales - v1.avg_monthly_sales, v1.cc_name
LIMIT 100;
Q58
WITH
ss_items AS
(
SELECT
i_item_id AS item_id,
sum(ss_ext_sales_price) AS ss_item_rev
FROM store_sales, item, date_dim
WHERE (ss_item_sk = i_item_sk)
AND (d_date IN (
SELECT d_date
FROM date_dim
WHERE (d_week_seq = (
SELECT d_week_seq
FROM date_dim
WHERE (d_date = '2000-01-03')
))
))
AND (ss_sold_date_sk = d_date_sk)
GROUP BY i_item_id
),
cs_items AS
(
SELECT
i_item_id AS item_id,
sum(cs_ext_sales_price) AS cs_item_rev
FROM catalog_sales, item, date_dim
WHERE (cs_item_sk = i_item_sk)
AND (d_date IN (
SELECT d_date
FROM date_dim
WHERE (d_week_seq = (
SELECT d_week_seq
FROM date_dim
WHERE (d_date = '2000-01-03')
))
))
AND (cs_sold_date_sk = d_date_sk)
GROUP BY i_item_id
),
ws_items AS
(
SELECT
i_item_id AS item_id,
sum(ws_ext_sales_price) AS ws_item_rev
FROM web_sales, item, date_dim
WHERE (ws_item_sk = i_item_sk)
AND (d_date IN (
SELECT d_date
FROM date_dim
WHERE (d_week_seq = (
SELECT d_week_seq
FROM date_dim
WHERE (d_date = '2000-01-03')
))
))
AND (ws_sold_date_sk = d_date_sk)
GROUP BY i_item_id
)
SELECT
ss_items.item_id,
ss_item_rev,
ss_item_rev / ((CAST(ss_item_rev AS Float64) + cs_item_rev + ws_item_rev) / 3) * 100 AS ss_dev,
cs_item_rev,
cs_item_rev / ((CAST(ss_item_rev AS Float64) + cs_item_rev + ws_item_rev) / 3) * 100 AS cs_dev,
ws_item_rev,
ws_item_rev / ((CAST(ss_item_rev AS Float64) + cs_item_rev + ws_item_rev) / 3) * 100 AS ws_dev,
(ss_item_rev + cs_item_rev + ws_item_rev) / 3 AS average
FROM ss_items, cs_items, ws_items
WHERE (ss_items.item_id = cs_items.item_id)
AND (ss_items.item_id = ws_items.item_id)
AND (ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev)
AND (ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev)
AND (cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev)
AND (cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev)
AND (ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev)
AND (ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev)
ORDER BY
item_id,
ss_item_rev
LIMIT 100;
The query does not work out-of-the-box due to https://github.com/ClickHouse/ClickHouse/issues/94976. This alternative formulation with a minor fix works:
WITH
ss_items AS
(
SELECT
i_item_id AS item_id,
sum(ss_ext_sales_price) AS ss_item_rev
FROM store_sales, item, date_dim
WHERE (ss_item_sk = i_item_sk)
AND (d_date IN (
SELECT d_date
FROM date_dim
WHERE (d_week_seq = (
SELECT d_week_seq
FROM date_dim
WHERE (d_date = '2000-01-03')
))
))
AND (ss_sold_date_sk = d_date_sk)
GROUP BY i_item_id
),
cs_items AS
(
SELECT
i_item_id AS item_id,
sum(cs_ext_sales_price) AS cs_item_rev
FROM catalog_sales, item, date_dim
WHERE (cs_item_sk = i_item_sk)
AND (d_date IN (
SELECT d_date
FROM date_dim
WHERE (d_week_seq = (
SELECT d_week_seq
FROM date_dim
WHERE (d_date = '2000-01-03')
))
))
AND (cs_sold_date_sk = d_date_sk)
GROUP BY i_item_id
),
ws_items AS
(
SELECT
i_item_id AS item_id,
sum(ws_ext_sales_price) AS ws_item_rev
FROM web_sales, item, date_dim
WHERE (ws_item_sk = i_item_sk)
AND (d_date IN (
SELECT d_date
FROM date_dim
WHERE (d_week_seq = (
SELECT d_week_seq
FROM date_dim
WHERE (d_date = '2000-01-03')
))
))
AND (ws_sold_date_sk = d_date_sk)
GROUP BY i_item_id
)
SELECT
ss_items.item_id,
ss_item_rev,
ss_item_rev / ((CAST(ss_item_rev AS Float64) + cs_item_rev + ws_item_rev) / 3) * 100 AS ss_dev,
cs_item_rev,
cs_item_rev / ((CAST(ss_item_rev AS Float64) + cs_item_rev + ws_item_rev) / 3) * 100 AS cs_dev,
ws_item_rev,
ws_item_rev / ((CAST(ss_item_rev AS Float64) + cs_item_rev + ws_item_rev) / 3) * 100 AS ws_dev,
(ss_item_rev + cs_item_rev + ws_item_rev) / 3 AS average
FROM ss_items, cs_items, ws_items
WHERE (ss_items.item_id = cs_items.item_id)
AND (ss_items.item_id = ws_items.item_id)
AND (ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev)
AND (ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev)
AND (cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev)
AND (cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev)
AND (ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev)
AND (ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev)
ORDER BY
ss_items.item_id,
ss_item_rev
LIMIT 100;
Q59
WITH
wss AS
(
SELECT
d_week_seq,
ss_store_sk,
sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END) AS sun_sales,
sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END) AS mon_sales,
sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END) AS tue_sales,
sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END) AS wed_sales,
sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END) AS thu_sales,
sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END) AS fri_sales,
sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) AS sat_sales
FROM store_sales, date_dim
WHERE (d_date_sk = ss_sold_date_sk)
GROUP BY d_week_seq, ss_store_sk
)
SELECT
s_store_name1,
s_store_id1,
d_week_seq1,
sun_sales1 / sun_sales2,
mon_sales1 / mon_sales2,
tue_sales1 / tue_sales2,
wed_sales1 / wed_sales2,
thu_sales1 / thu_sales2,
fri_sales1 / fri_sales2,
sat_sales1 / sat_sales2
FROM
(
SELECT
s_store_name AS s_store_name1,
wss.d_week_seq AS d_week_seq1,
s_store_id AS s_store_id1,
sun_sales AS sun_sales1,
mon_sales AS mon_sales1,
tue_sales AS tue_sales1,
wed_sales AS wed_sales1,
thu_sales AS thu_sales1,
fri_sales AS fri_sales1,
sat_sales AS sat_sales1
FROM wss, store, date_dim AS d
WHERE (d.d_week_seq = wss.d_week_seq)
AND (ss_store_sk = s_store_sk)
AND (d_month_seq BETWEEN 1212 AND 1212 + 11)
) AS y,
(
SELECT
s_store_name AS s_store_name2,
wss.d_week_seq AS d_week_seq2,
s_store_id AS s_store_id2,
sun_sales AS sun_sales2,
mon_sales AS mon_sales2,
tue_sales AS tue_sales2,
wed_sales AS wed_sales2,
thu_sales AS thu_sales2,
fri_sales AS fri_sales2,
sat_sales AS sat_sales2
FROM wss, store, date_dim AS d
WHERE (d.d_week_seq = wss.d_week_seq)
AND (ss_store_sk = s_store_sk)
AND (d_month_seq BETWEEN 1212 + 12 AND 1212 + 23)
) AS x
WHERE (s_store_id1 = s_store_id2)
AND (d_week_seq1 = d_week_seq2 - 52)
ORDER BY s_store_name1, s_store_id1, d_week_seq1
LIMIT 100;
Q60
WITH
ss AS
(
SELECT
i_item_id,
sum(ss_ext_sales_price) AS total_sales
FROM store_sales, date_dim, customer_address, item
WHERE (i_item_id IN (
SELECT i_item_id
FROM item
WHERE (i_category IN ('Music'))
))
AND (ss_item_sk = i_item_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year = 1998)
AND (d_moy = 9)
AND (ss_addr_sk = ca_address_sk)
AND (ca_gmt_offset = -5)
GROUP BY i_item_id
),
cs AS
(
SELECT
i_item_id,
sum(cs_ext_sales_price) AS total_sales
FROM catalog_sales, date_dim, customer_address, item
WHERE (i_item_id IN (
SELECT i_item_id
FROM item
WHERE (i_category IN ('Music'))
))
AND (cs_item_sk = i_item_sk)
AND (cs_sold_date_sk = d_date_sk)
AND (d_year = 1998)
AND (d_moy = 9)
AND (cs_bill_addr_sk = ca_address_sk)
AND (ca_gmt_offset = -5)
GROUP BY i_item_id
),
ws AS
(
SELECT
i_item_id,
sum(ws_ext_sales_price) AS total_sales
FROM web_sales, date_dim, customer_address, item
WHERE (i_item_id IN (
SELECT i_item_id
FROM item
WHERE (i_category IN ('Music'))
))
AND (ws_item_sk = i_item_sk)
AND (ws_sold_date_sk = d_date_sk)
AND (d_year = 1998)
AND (d_moy = 9)
AND (ws_bill_addr_sk = ca_address_sk)
AND (ca_gmt_offset = -5)
GROUP BY i_item_id
)
SELECT
i_item_id,
sum(total_sales) AS total_sales
FROM
(
SELECT * FROM ss
UNION ALL
SELECT * FROM cs
UNION ALL
SELECT * FROM ws
) AS tmp1
GROUP BY i_item_id
ORDER BY
i_item_id,
total_sales
LIMIT 100;
Q61
SELECT
promotions,
total,
CAST(promotions AS decimal(15, 4)) / CAST(total AS decimal(15, 4)) * 100
FROM
(
SELECT sum(ss_ext_sales_price) AS promotions
FROM store_sales, store, promotion, date_dim, customer, customer_address, item
WHERE (ss_sold_date_sk = d_date_sk)
AND (ss_store_sk = s_store_sk)
AND (ss_promo_sk = p_promo_sk)
AND (ss_customer_sk = c_customer_sk)
AND (ca_address_sk = c_current_addr_sk)
AND (ss_item_sk = i_item_sk)
AND (ca_gmt_offset = -5)
AND (i_category = 'Jewelry')
AND ((p_channel_dmail = 'Y') OR (p_channel_email = 'Y') OR (p_channel_tv = 'Y'))
AND (s_gmt_offset = -5)
AND (d_year = 1998)
AND (d_moy = 11)
) AS promotional_sales,
(
SELECT sum(ss_ext_sales_price) AS total
FROM store_sales, store, date_dim, customer, customer_address, item
WHERE (ss_sold_date_sk = d_date_sk)
AND (ss_store_sk = s_store_sk)
AND (ss_customer_sk = c_customer_sk)
AND (ca_address_sk = c_current_addr_sk)
AND (ss_item_sk = i_item_sk)
AND (ca_gmt_offset = -5)
AND (i_category = 'Jewelry')
AND (s_gmt_offset = -5)
AND (d_year = 1998)
AND (d_moy = 11)
) AS all_sales
ORDER BY promotions, total
LIMIT 100;
Q62
SELECT
substr(w_warehouse_name, 1, 20),
sm_type,
web_name,
sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk <= 30) THEN 1 ELSE 0 END) AS "30 days",
sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 30) AND (ws_ship_date_sk - ws_sold_date_sk <= 60) THEN 1 ELSE 0 END) AS "31-60 days",
sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 60) AND (ws_ship_date_sk - ws_sold_date_sk <= 90) THEN 1 ELSE 0 END) AS "61-90 days",
sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 90) AND (ws_ship_date_sk - ws_sold_date_sk <= 120) THEN 1 ELSE 0 END) AS "91-120 days",
sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 120) THEN 1 ELSE 0 END) AS ">120 days"
FROM web_sales, warehouse, ship_mode, web_site, date_dim
WHERE (d_month_seq BETWEEN 1200 AND 1200 + 11)
AND (ws_ship_date_sk = d_date_sk)
AND (ws_warehouse_sk = w_warehouse_sk)
AND (ws_ship_mode_sk = sm_ship_mode_sk)
AND (ws_web_site_sk = web_site_sk)
GROUP BY
substr(w_warehouse_name, 1, 20),
sm_type,
web_name
ORDER BY
substr(w_warehouse_name, 1, 20),
sm_type,
web_name
LIMIT 100;
Q63
SELECT *
FROM
(
SELECT
i_manager_id,
sum(ss_sales_price) AS sum_sales,
avg(sum(ss_sales_price)) OVER (PARTITION BY i_manager_id) AS avg_monthly_sales
FROM item, store_sales, date_dim, store
WHERE (ss_item_sk = i_item_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (ss_store_sk = s_store_sk)
AND (d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11))
AND (
(
(i_category IN ('Books', 'Children', 'Electronics'))
AND (i_class IN ('personal', 'portable', 'reference', 'self-help'))
AND (i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9'))
)
OR (
(i_category IN ('Women', 'Music', 'Men'))
AND (i_class IN ('accessories', 'classical', 'fragrances', 'pants'))
AND (i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))
)
)
GROUP BY i_manager_id, d_moy
) AS tmp1
WHERE (CASE WHEN avg_monthly_sales > 0 THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales ELSE NULL END > 0.1)
ORDER BY
i_manager_id,
avg_monthly_sales,
sum_sales
LIMIT 100;
Q64
WITH
cs_ui AS
(
SELECT
cs_item_sk,
sum(cs_ext_list_price) AS sale,
sum((cr_refunded_cash + cr_reversed_charge) + cr_store_credit) AS refund
FROM catalog_sales, catalog_returns
WHERE (cs_item_sk = cr_item_sk) AND (cs_order_number = cr_order_number)
GROUP BY cs_item_sk
HAVING sum(cs_ext_list_price) > (2 * sum((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))
),
cross_sales AS
(
SELECT
i_product_name AS product_name,
i_item_sk AS item_sk,
s_store_name AS store_name,
s_zip AS store_zip,
ad1.ca_street_number AS b_street_number,
ad1.ca_street_name AS b_street_name,
ad1.ca_city AS b_city,
ad1.ca_zip AS b_zip,
ad2.ca_street_number AS c_street_number,
ad2.ca_street_name AS c_street_name,
ad2.ca_city AS c_city,
ad2.ca_zip AS c_zip,
d1.d_year AS syear,
d2.d_year AS fsyear,
d3.d_year AS s2year,
count(*) AS cnt,
sum(ss_wholesale_cost) AS s1,
sum(ss_list_price) AS s2,
sum(ss_coupon_amt) AS s3
FROM store_sales, store_returns, cs_ui, date_dim AS d1, date_dim AS d2, date_dim AS d3, store, customer, customer_demographics AS cd1, customer_demographics AS cd2, promotion, household_demographics AS hd1, household_demographics AS hd2, customer_address AS ad1, customer_address AS ad2, income_band AS ib1, income_band AS ib2, item
WHERE (ss_store_sk = s_store_sk) AND (ss_sold_date_sk = d1.d_date_sk) AND (ss_customer_sk = c_customer_sk) AND (ss_cdemo_sk = cd1.cd_demo_sk) AND (ss_hdemo_sk = hd1.hd_demo_sk) AND (ss_addr_sk = ad1.ca_address_sk) AND (ss_item_sk = i_item_sk) AND (ss_item_sk = sr_item_sk) AND (ss_ticket_number = sr_ticket_number) AND (ss_item_sk = cs_ui.cs_item_sk) AND (c_current_cdemo_sk = cd2.cd_demo_sk) AND (c_current_hdemo_sk = hd2.hd_demo_sk) AND (c_current_addr_sk = ad2.ca_address_sk) AND (c_first_sales_date_sk = d2.d_date_sk) AND (c_first_shipto_date_sk = d3.d_date_sk) AND (ss_promo_sk = p_promo_sk) AND (hd1.hd_income_band_sk = ib1.ib_income_band_sk) AND (hd2.hd_income_band_sk = ib2.ib_income_band_sk) AND (cd1.cd_marital_status != cd2.cd_marital_status) AND (i_color IN ('purple', 'burlywood', 'indian', 'spring', 'floral', 'medium')) AND ((i_current_price >= 64) AND (i_current_price <= (64 + 10))) AND ((i_current_price >= (64 + 1)) AND (i_current_price <= (64 + 15)))
GROUP BY
i_product_name,
i_item_sk,
s_store_name,
s_zip,
ad1.ca_street_number,
ad1.ca_street_name,
ad1.ca_city,
ad1.ca_zip,
ad2.ca_street_number,
ad2.ca_street_name,
ad2.ca_city,
ad2.ca_zip,
d1.d_year,
d2.d_year,
d3.d_year
)
SELECT
cs1.product_name,
cs1.store_name,
cs1.store_zip,
cs1.b_street_number,
cs1.b_street_name,
cs1.b_city,
cs1.b_zip,
cs1.c_street_number,
cs1.c_street_name,
cs1.c_city,
cs1.c_zip,
cs1.syear,
cs1.cnt,
cs1.s1 AS s11,
cs1.s2 AS s21,
cs1.s3 AS s31,
cs2.s1 AS s12,
cs2.s2 AS s22,
cs2.s3 AS s32,
cs2.syear,
cs2.cnt
FROM cross_sales AS cs1, cross_sales AS cs2
WHERE (cs1.item_sk = cs2.item_sk) AND (cs1.syear = 1999) AND (cs2.syear = (1999 + 1)) AND (cs2.cnt <= cs1.cnt) AND (cs1.store_name = cs2.store_name) AND (cs1.store_zip = cs2.store_zip)
ORDER BY
cs1.product_name,
cs1.store_name,
cs2.cnt,
cs1.s1,
cs2.s1;
Q65
SELECT
s_store_name,
i_item_desc,
sc.revenue,
i_current_price,
i_wholesale_cost,
i_brand
FROM store, item,
(
SELECT
ss_store_sk,
avg(revenue) AS ave
FROM
(
SELECT
ss_store_sk,
ss_item_sk,
sum(ss_sales_price) AS revenue
FROM store_sales, date_dim
WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176+11
GROUP BY
ss_store_sk,
ss_item_sk
) AS sa
GROUP BY ss_store_sk
) AS sb,
(
SELECT
ss_store_sk,
ss_item_sk,
sum(ss_sales_price) AS revenue
FROM store_sales, date_dim
WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176+11
GROUP BY
ss_store_sk,
ss_item_sk
) AS sc
WHERE (sb.ss_store_sk = sc.ss_store_sk) AND (sc.revenue <= (0.1 * sb.ave)) AND (s_store_sk = sc.ss_store_sk) AND (i_item_sk = sc.ss_item_sk)
ORDER BY
s_store_name,
i_item_desc
LIMIT 100;
Q66
SELECT
w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
ship_carriers,
year,
sum(x.jan_sales) AS jan_sales,
sum(x.feb_sales) AS feb_sales,
sum(x.mar_sales) AS mar_sales,
sum(x.apr_sales) AS apr_sales,
sum(x.may_sales) AS may_sales,
sum(x.jun_sales) AS jun_sales,
sum(x.jul_sales) AS jul_sales,
sum(x.aug_sales) AS aug_sales,
sum(x.sep_sales) AS sep_sales,
sum(x.oct_sales) AS oct_sales,
sum(x.nov_sales) AS nov_sales,
sum(x.dec_sales) AS dec_sales,
sum(CAST(x.jan_sales AS Float64) / w_warehouse_sq_ft) AS jan_sales_per_sq_foot,
sum(CAST(x.feb_sales AS Float64) / w_warehouse_sq_ft) AS feb_sales_per_sq_foot,
sum(CAST(x.mar_sales AS Float64) / w_warehouse_sq_ft) AS mar_sales_per_sq_foot,
sum(CAST(x.apr_sales AS Float64) / w_warehouse_sq_ft) AS apr_sales_per_sq_foot,
sum(CAST(x.may_sales AS Float64) / w_warehouse_sq_ft) AS may_sales_per_sq_foot,
sum(CAST(x.jun_sales AS Float64) / w_warehouse_sq_ft) AS jun_sales_per_sq_foot,
sum(CAST(x.jul_sales AS Float64) / w_warehouse_sq_ft) AS jul_sales_per_sq_foot,
sum(CAST(x.aug_sales AS Float64) / w_warehouse_sq_ft) AS aug_sales_per_sq_foot,
sum(CAST(x.sep_sales AS Float64) / w_warehouse_sq_ft) AS sep_sales_per_sq_foot,
sum(CAST(x.oct_sales AS Float64) / w_warehouse_sq_ft) AS oct_sales_per_sq_foot,
sum(CAST(x.nov_sales AS Float64) / w_warehouse_sq_ft) AS nov_sales_per_sq_foot,
sum(CAST(x.dec_sales AS Float64) / w_warehouse_sq_ft) AS dec_sales_per_sq_foot,
sum(x.jan_net) AS jan_net,
sum(x.feb_net) AS feb_net,
sum(x.mar_net) AS mar_net,
sum(x.apr_net) AS apr_net,
sum(x.may_net) AS may_net,
sum(x.jun_net) AS jun_net,
sum(x.jul_net) AS jul_net,
sum(x.aug_net) AS aug_net,
sum(x.sep_net) AS sep_net,
sum(x.oct_net) AS oct_net,
sum(x.nov_net) AS nov_net,
sum(x.dec_net) AS dec_net
FROM
(
SELECT
w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
concat('DHL', ',', 'BARIAN') AS ship_carriers,
d_year AS year,
sum(multiIf(d_moy = 1, ws_ext_sales_price * ws_quantity, 0)) AS jan_sales,
sum(multiIf(d_moy = 2, ws_ext_sales_price * ws_quantity, 0)) AS feb_sales,
sum(multiIf(d_moy = 3, ws_ext_sales_price * ws_quantity, 0)) AS mar_sales,
sum(multiIf(d_moy = 4, ws_ext_sales_price * ws_quantity, 0)) AS apr_sales,
sum(multiIf(d_moy = 5, ws_ext_sales_price * ws_quantity, 0)) AS may_sales,
sum(multiIf(d_moy = 6, ws_ext_sales_price * ws_quantity, 0)) AS jun_sales,
sum(multiIf(d_moy = 7, ws_ext_sales_price * ws_quantity, 0)) AS jul_sales,
sum(multiIf(d_moy = 8, ws_ext_sales_price * ws_quantity, 0)) AS aug_sales,
sum(multiIf(d_moy = 9, ws_ext_sales_price * ws_quantity, 0)) AS sep_sales,
sum(multiIf(d_moy = 10, ws_ext_sales_price * ws_quantity, 0)) AS oct_sales,
sum(multiIf(d_moy = 11, ws_ext_sales_price * ws_quantity, 0)) AS nov_sales,
sum(multiIf(d_moy = 12, ws_ext_sales_price * ws_quantity, 0)) AS dec_sales,
sum(multiIf(d_moy = 1, ws_net_paid * ws_quantity, 0)) AS jan_net,
sum(multiIf(d_moy = 2, ws_net_paid * ws_quantity, 0)) AS feb_net,
sum(multiIf(d_moy = 3, ws_net_paid * ws_quantity, 0)) AS mar_net,
sum(multiIf(d_moy = 4, ws_net_paid * ws_quantity, 0)) AS apr_net,
sum(multiIf(d_moy = 5, ws_net_paid * ws_quantity, 0)) AS may_net,
sum(multiIf(d_moy = 6, ws_net_paid * ws_quantity, 0)) AS jun_net,
sum(multiIf(d_moy = 7, ws_net_paid * ws_quantity, 0)) AS jul_net,
sum(multiIf(d_moy = 8, ws_net_paid * ws_quantity, 0)) AS aug_net,
sum(multiIf(d_moy = 9, ws_net_paid * ws_quantity, 0)) AS sep_net,
sum(multiIf(d_moy = 10, ws_net_paid * ws_quantity, 0)) AS oct_net,
sum(multiIf(d_moy = 11, ws_net_paid * ws_quantity, 0)) AS nov_net,
sum(multiIf(d_moy = 12, ws_net_paid * ws_quantity, 0)) AS dec_net
FROM web_sales, warehouse, date_dim, time_dim, ship_mode
WHERE (ws_warehouse_sk = w_warehouse_sk) AND (ws_sold_date_sk = d_date_sk) AND (ws_sold_time_sk = t_time_sk) AND (ws_ship_mode_sk = sm_ship_mode_sk) AND (d_year = 2001) AND (t_time BETWEEN 30838 AND 30838 + 28800) AND (sm_carrier IN ('DHL', 'BARIAN'))
GROUP BY
w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
d_year
UNION ALL
SELECT
w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
concat('DHL', ',', 'BARIAN') AS ship_carriers,
d_year AS year,
sum(multiIf(d_moy = 1, cs_sales_price * cs_quantity, 0)) AS jan_sales,
sum(multiIf(d_moy = 2, cs_sales_price * cs_quantity, 0)) AS feb_sales,
sum(multiIf(d_moy = 3, cs_sales_price * cs_quantity, 0)) AS mar_sales,
sum(multiIf(d_moy = 4, cs_sales_price * cs_quantity, 0)) AS apr_sales,
sum(multiIf(d_moy = 5, cs_sales_price * cs_quantity, 0)) AS may_sales,
sum(multiIf(d_moy = 6, cs_sales_price * cs_quantity, 0)) AS jun_sales,
sum(multiIf(d_moy = 7, cs_sales_price * cs_quantity, 0)) AS jul_sales,
sum(multiIf(d_moy = 8, cs_sales_price * cs_quantity, 0)) AS aug_sales,
sum(multiIf(d_moy = 9, cs_sales_price * cs_quantity, 0)) AS sep_sales,
sum(multiIf(d_moy = 10, cs_sales_price * cs_quantity, 0)) AS oct_sales,
sum(multiIf(d_moy = 11, cs_sales_price * cs_quantity, 0)) AS nov_sales,
sum(multiIf(d_moy = 12, cs_sales_price * cs_quantity, 0)) AS dec_sales,
sum(multiIf(d_moy = 1, cs_net_paid_inc_tax * cs_quantity, 0)) AS jan_net,
sum(multiIf(d_moy = 2, cs_net_paid_inc_tax * cs_quantity, 0)) AS feb_net,
sum(multiIf(d_moy = 3, cs_net_paid_inc_tax * cs_quantity, 0)) AS mar_net,
sum(multiIf(d_moy = 4, cs_net_paid_inc_tax * cs_quantity, 0)) AS apr_net,
sum(multiIf(d_moy = 5, cs_net_paid_inc_tax * cs_quantity, 0)) AS may_net,
sum(multiIf(d_moy = 6, cs_net_paid_inc_tax * cs_quantity, 0)) AS jun_net,
sum(multiIf(d_moy = 7, cs_net_paid_inc_tax * cs_quantity, 0)) AS jul_net,
sum(multiIf(d_moy = 8, cs_net_paid_inc_tax * cs_quantity, 0)) AS aug_net,
sum(multiIf(d_moy = 9, cs_net_paid_inc_tax * cs_quantity, 0)) AS sep_net,
sum(multiIf(d_moy = 10, cs_net_paid_inc_tax * cs_quantity, 0)) AS oct_net,
sum(multiIf(d_moy = 11, cs_net_paid_inc_tax * cs_quantity, 0)) AS nov_net,
sum(multiIf(d_moy = 12, cs_net_paid_inc_tax * cs_quantity, 0)) AS dec_net
FROM catalog_sales, warehouse, date_dim, time_dim, ship_mode
WHERE (cs_warehouse_sk = w_warehouse_sk) AND (cs_sold_date_sk = d_date_sk) AND (cs_sold_time_sk = t_time_sk) AND (cs_ship_mode_sk = sm_ship_mode_sk) AND (d_year = 2001) AND ((t_time >= 30838) AND (t_time <= (30838 + 28800))) AND (sm_carrier IN ('DHL', 'BARIAN'))
GROUP BY
w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
d_year
) AS x
GROUP BY
w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
ship_carriers,
year
ORDER BY w_warehouse_name
LIMIT 100;
Q67
SELECT *
FROM
(
SELECT
i_category,
i_class,
i_brand,
i_product_name,
d_year,
d_qoy,
d_moy,
s_store_id,
sumsales,
rank() OVER (PARTITION BY i_category ORDER BY sumsales DESC) AS rk
FROM
(
SELECT
i_category,
i_class,
i_brand,
i_product_name,
d_year,
d_qoy,
d_moy,
s_store_id,
sum(coalesce(ss_sales_price * ss_quantity, 0)) AS sumsales
FROM store_sales, date_dim, store, item
WHERE (ss_sold_date_sk = d_date_sk)
AND (ss_item_sk = i_item_sk)
AND (ss_store_sk = s_store_sk)
AND (d_month_seq BETWEEN 1200 AND 1200 + 11)
GROUP BY
i_category,
i_class,
i_brand,
i_product_name,
d_year,
d_qoy,
d_moy,
s_store_id
WITH ROLLUP
) AS dw1
) AS dw2
WHERE (rk <= 100)
ORDER BY
i_category,
i_class,
i_brand,
i_product_name,
d_year,
d_qoy,
d_moy,
s_store_id,
sumsales,
rk
LIMIT 100;
Q68
SELECT
c_last_name,
c_first_name,
ca_city,
bought_city,
ss_ticket_number,
extended_price,
extended_tax,
list_price
FROM
(
SELECT
ss_ticket_number,
ss_customer_sk,
ca_city AS bought_city,
sum(ss_ext_sales_price) AS extended_price,
sum(ss_ext_list_price) AS list_price,
sum(ss_ext_tax) AS extended_tax
FROM store_sales, date_dim, store, household_demographics, customer_address
WHERE (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
AND (store_sales.ss_store_sk = store.s_store_sk)
AND (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (store_sales.ss_addr_sk = customer_address.ca_address_sk)
AND ((date_dim.d_dom >= 1) AND (date_dim.d_dom <= 2))
AND ((household_demographics.hd_dep_count = 4) OR (household_demographics.hd_vehicle_count = 3))
AND (date_dim.d_year IN (1999, 1999 + 1, 1999 + 2))
AND (store.s_city IN ('Midway', 'Fairview'))
GROUP BY
ss_ticket_number,
ss_customer_sk,
ss_addr_sk,
ca_city
) AS dn, customer, customer_address AS current_addr
WHERE (ss_customer_sk = c_customer_sk)
AND (customer.c_current_addr_sk = current_addr.ca_address_sk)
AND (current_addr.ca_city != bought_city)
ORDER BY
c_last_name,
ss_ticket_number
LIMIT 100;
Q69
SELECT
cd_gender,
cd_marital_status,
cd_education_status,
count(*) AS cnt1,
cd_purchase_estimate,
count(*) AS cnt2,
cd_credit_rating,
count(*) AS cnt3
FROM customer AS c, customer_address AS ca, customer_demographics
WHERE (c.c_current_addr_sk = ca.ca_address_sk)
AND (ca_state IN ('KY', 'GA', 'NM'))
AND (cd_demo_sk = c.c_current_cdemo_sk)
AND EXISTS (
SELECT *
FROM store_sales, date_dim
WHERE (c.c_customer_sk = ss_customer_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (d_moy BETWEEN 4 AND 4 + 2)
)
AND (
NOT EXISTS (
SELECT *
FROM web_sales, date_dim
WHERE (c.c_customer_sk = ws_bill_customer_sk)
AND (ws_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (d_moy BETWEEN 4 AND 4 + 2)
)
AND NOT EXISTS (
SELECT *
FROM catalog_sales, date_dim
WHERE (c.c_customer_sk = cs_ship_customer_sk)
AND (cs_sold_date_sk = d_date_sk)
AND (d_year = 2001)
AND (d_moy BETWEEN 4 AND 4 + 2)
)
)
GROUP BY
cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating
ORDER BY
cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating
LIMIT 100;
Q70
SELECT
sum(ss_net_profit) AS total_sum,
s_state,
s_county,
grouping(s_state) + grouping(s_county) AS lochierarchy,
rank() OVER (PARTITION BY grouping(s_state) + grouping(s_county), multiIf(grouping(s_county) = 0, s_state, NULL) ORDER BY sum(ss_net_profit) DESC) AS rank_within_parent
FROM store_sales, date_dim AS d1, store
WHERE ((d1.d_month_seq >= 1200) AND (d1.d_month_seq <= (1200 + 11)))
AND (d1.d_date_sk = ss_sold_date_sk)
AND (s_store_sk = ss_store_sk)
AND (s_state IN (
SELECT s_state
FROM
(
SELECT
s_state AS s_state,
rank() OVER (PARTITION BY s_state ORDER BY sum(ss_net_profit) DESC) AS ranking
FROM store_sales, store, date_dim
WHERE ((d_month_seq >= 1200) AND (d_month_seq <= (1200 + 11)))
AND (d_date_sk = ss_sold_date_sk)
AND (s_store_sk = ss_store_sk)
GROUP BY s_state
) AS tmp1
WHERE (ranking <= 5)
))
GROUP BY
s_state,
s_county
WITH ROLLUP
ORDER BY
lochierarchy DESC,
multiIf(lochierarchy = 0, s_state, NULL),
rank_within_parent
LIMIT 100;
Q71
SELECT
i_brand_id AS brand_id,
i_brand AS brand,
t_hour,
t_minute,
sum(ext_price) AS ext_price
FROM item,
(
SELECT
ws_ext_sales_price AS ext_price,
ws_sold_date_sk AS sold_date_sk,
ws_item_sk AS sold_item_sk,
ws_sold_time_sk AS time_sk
FROM web_sales, date_dim
WHERE (d_date_sk = ws_sold_date_sk)
AND (d_moy = 11)
AND (d_year = 1999)
UNION ALL
SELECT
cs_ext_sales_price AS ext_price,
cs_sold_date_sk AS sold_date_sk,
cs_item_sk AS sold_item_sk,
cs_sold_time_sk AS time_sk
FROM catalog_sales, date_dim
WHERE (d_date_sk = cs_sold_date_sk)
AND (d_moy = 11)
AND (d_year = 1999)
UNION ALL
SELECT
ss_ext_sales_price AS ext_price,
ss_sold_date_sk AS sold_date_sk,
ss_item_sk AS sold_item_sk,
ss_sold_time_sk AS time_sk
FROM store_sales, date_dim
WHERE (d_date_sk = ss_sold_date_sk)
AND (d_moy = 11)
AND (d_year = 1999)
) AS tmp, time_dim
WHERE (sold_item_sk = i_item_sk)
AND (i_manager_id = 1)
AND (time_sk = t_time_sk)
AND ((t_meal_time = 'breakfast') OR (t_meal_time = 'dinner'))
GROUP BY
i_brand,
i_brand_id,
t_hour,
t_minute
ORDER BY
ext_price DESC,
i_brand_id;
Q72
SELECT
i_item_desc,
w_warehouse_name,
d1.d_week_seq,
sum(multiIf(p_promo_sk IS NULL, 1, 0)) AS no_promo,
sum(multiIf(p_promo_sk IS NOT NULL, 1, 0)) AS promo,
count(*) AS total_cnt
FROM catalog_sales
INNER JOIN inventory ON cs_item_sk = inv_item_sk
INNER JOIN warehouse ON w_warehouse_sk = inv_warehouse_sk
INNER JOIN item ON i_item_sk = cs_item_sk
INNER JOIN customer_demographics ON cs_bill_cdemo_sk = cd_demo_sk
INNER JOIN household_demographics ON cs_bill_hdemo_sk = hd_demo_sk
INNER JOIN date_dim AS d1 ON cs_sold_date_sk = d1.d_date_sk
INNER JOIN date_dim AS d2 ON inv_date_sk = d2.d_date_sk
INNER JOIN date_dim AS d3 ON cs_ship_date_sk = d3.d_date_sk
LEFT JOIN promotion ON cs_promo_sk = p_promo_sk
LEFT JOIN catalog_returns ON (cr_item_sk = cs_item_sk) AND (cr_order_number = cs_order_number)
WHERE (d1.d_week_seq = d2.d_week_seq)
AND (inv_quantity_on_hand < cs_quantity)
AND (d3.d_date > (d1.d_date + 5))
AND (hd_buy_potential = '>10000')
AND (d1.d_year = 1999)
AND (cd_marital_status = 'D')
GROUP BY
i_item_desc,
w_warehouse_name,
d_week_seq
ORDER BY
total_cnt DESC,
i_item_desc,
w_warehouse_name,
d_week_seq
LIMIT 100;
Q73
SELECT
c_last_name,
c_first_name,
c_salutation,
c_preferred_cust_flag,
ss_ticket_number,
cnt
FROM
(
SELECT
ss_ticket_number,
ss_customer_sk,
count(*) AS cnt
FROM store_sales, date_dim, store, household_demographics
WHERE (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
AND (store_sales.ss_store_sk = store.s_store_sk)
AND (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
AND ((date_dim.d_dom >= 1) AND (date_dim.d_dom <= 2))
AND ((household_demographics.hd_buy_potential = '>10000') OR (household_demographics.hd_buy_potential = 'Unknown'))
AND (household_demographics.hd_vehicle_count > 0)
AND (multiIf(household_demographics.hd_vehicle_count > 0, household_demographics.hd_dep_count / household_demographics.hd_vehicle_count, NULL) > 1)
AND (date_dim.d_year IN (1999, 1999 + 1, 1999 + 2))
AND (store.s_county IN ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County'))
GROUP BY
ss_ticket_number,
ss_customer_sk
) AS dj, customer
WHERE (ss_customer_sk = c_customer_sk)
AND ((cnt >= 1) AND (cnt <= 5))
ORDER BY
cnt DESC,
c_last_name;
Q74
WITH
year_total AS
(
SELECT
c_customer_id AS customer_id,
c_first_name AS customer_first_name,
c_last_name AS customer_last_name,
d_year AS year,
sum(ss_net_paid) AS year_total,
's' AS sale_type
FROM customer, store_sales, date_dim
WHERE (c_customer_sk = ss_customer_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (d_year IN (2001, 2001 + 1))
GROUP BY
c_customer_id,
c_first_name,
c_last_name,
d_year
UNION ALL
SELECT
c_customer_id AS customer_id,
c_first_name AS customer_first_name,
c_last_name AS customer_last_name,
d_year AS year,
sum(ws_net_paid) AS year_total,
'w' AS sale_type
FROM customer, web_sales, date_dim
WHERE (c_customer_sk = ws_bill_customer_sk)
AND (ws_sold_date_sk = d_date_sk)
AND (d_year IN (2001, 2001 + 1))
GROUP BY
c_customer_id,
c_first_name,
c_last_name,
d_year
)
SELECT
t_s_secyear.customer_id,
t_s_secyear.customer_first_name,
t_s_secyear.customer_last_name
FROM year_total AS t_s_firstyear, year_total AS t_s_secyear, year_total AS t_w_firstyear, year_total AS t_w_secyear
WHERE (t_s_secyear.customer_id = t_s_firstyear.customer_id)
AND (t_s_firstyear.customer_id = t_w_secyear.customer_id)
AND (t_s_firstyear.customer_id = t_w_firstyear.customer_id)
AND (t_s_firstyear.sale_type = 's')
AND (t_w_firstyear.sale_type = 'w')
AND (t_s_secyear.sale_type = 's')
AND (t_w_secyear.sale_type = 'w')
AND (t_s_firstyear.year = 2001)
AND (t_s_secyear.year = (2001 + 1))
AND (t_w_firstyear.year = 2001)
AND (t_w_secyear.year = (2001 + 1))
AND (t_s_firstyear.year_total > 0)
AND (t_w_firstyear.year_total > 0)
AND (multiIf(t_w_firstyear.year_total > 0, t_w_secyear.year_total / t_w_firstyear.year_total, NULL) > multiIf(t_s_firstyear.year_total > 0, t_s_secyear.year_total / t_s_firstyear.year_total, NULL))
ORDER BY 1, 2, 3
LIMIT 100;
Q75
WITH
all_sales AS
(
SELECT
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id,
SUM(sales_cnt) AS sales_cnt,
SUM(sales_amt) AS sales_amt
FROM
(
SELECT
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id,
cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt,
cs_ext_sales_price - COALESCE(cr_return_amount, 0.) AS sales_amt
FROM catalog_sales
INNER JOIN item ON i_item_sk = cs_item_sk
INNER JOIN date_dim ON d_date_sk = cs_sold_date_sk
LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number) AND (cs_item_sk = cr_item_sk)
WHERE (i_category = 'Books')
UNION
SELECT
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id,
ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt,
ss_ext_sales_price - COALESCE(sr_return_amt, 0.) AS sales_amt
FROM store_sales
INNER JOIN item ON i_item_sk = ss_item_sk
INNER JOIN date_dim ON d_date_sk = ss_sold_date_sk
LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number) AND (ss_item_sk = sr_item_sk)
WHERE (i_category = 'Books')
UNION
SELECT
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id,
ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt,
ws_ext_sales_price - COALESCE(wr_return_amt, 0.) AS sales_amt
FROM web_sales
INNER JOIN item ON i_item_sk = ws_item_sk
INNER JOIN date_dim ON d_date_sk = ws_sold_date_sk
LEFT JOIN web_returns ON (ws_order_number = wr_order_number) AND (ws_item_sk = wr_item_sk)
WHERE (i_category = 'Books')
) AS sales_detail
GROUP BY
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id
)
SELECT
prev_yr.d_year AS prev_year,
curr_yr.d_year AS year,
curr_yr.i_brand_id,
curr_yr.i_class_id,
curr_yr.i_category_id,
curr_yr.i_manufact_id,
prev_yr.sales_cnt AS prev_yr_cnt,
curr_yr.sales_cnt AS curr_yr_cnt,
curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff,
curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff
FROM all_sales AS curr_yr, all_sales AS prev_yr
WHERE (curr_yr.i_brand_id = prev_yr.i_brand_id)
AND (curr_yr.i_class_id = prev_yr.i_class_id)
AND (curr_yr.i_category_id = prev_yr.i_category_id)
AND (curr_yr.i_manufact_id = prev_yr.i_manufact_id)
AND (curr_yr.d_year = 2002)
AND (prev_yr.d_year = (2002 - 1))
AND ((CAST(curr_yr.sales_cnt, 'DECIMAL(17, 2)') / CAST(prev_yr.sales_cnt, 'DECIMAL(17, 2)')) < 0.9)
ORDER BY
sales_cnt_diff,
sales_amt_diff
LIMIT 100;
The query does not work out-of-the-box due to https://github.com/ClickHouse/ClickHouse/issues/94671. This alternative formulation with a minor fix works:
WITH
all_sales AS
(
SELECT
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id,
SUM(sales_cnt) AS sales_cnt,
SUM(sales_amt) AS sales_amt
FROM
(
SELECT
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id,
cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt,
cs_ext_sales_price - COALESCE(cr_return_amount, CAST('0.0', 'Decimal(7, 2)')) AS sales_amt
FROM catalog_sales
INNER JOIN item ON i_item_sk = cs_item_sk
INNER JOIN date_dim ON d_date_sk = cs_sold_date_sk
LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number) AND (cs_item_sk = cr_item_sk)
WHERE (i_category = 'Books')
UNION
SELECT
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id,
ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt,
ss_ext_sales_price - COALESCE(sr_return_amt, CAST('0.0', 'Decimal(7, 2)')) AS sales_amt
FROM store_sales
INNER JOIN item ON i_item_sk = ss_item_sk
INNER JOIN date_dim ON d_date_sk = ss_sold_date_sk
LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number) AND (ss_item_sk = sr_item_sk)
WHERE (i_category = 'Books')
UNION
SELECT
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id,
ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt,
ws_ext_sales_price - COALESCE(wr_return_amt, CAST('0.0', 'Decimal(7, 2)')) AS sales_amt
FROM web_sales
INNER JOIN item ON i_item_sk = ws_item_sk
INNER JOIN date_dim ON d_date_sk = ws_sold_date_sk
LEFT JOIN web_returns ON (ws_order_number = wr_order_number) AND (ws_item_sk = wr_item_sk)
WHERE (i_category = 'Books')
) AS sales_detail
GROUP BY
d_year,
i_brand_id,
i_class_id,
i_category_id,
i_manufact_id
)
SELECT
prev_yr.d_year AS prev_year,
curr_yr.d_year AS year,
curr_yr.i_brand_id,
curr_yr.i_class_id,
curr_yr.i_category_id,
curr_yr.i_manufact_id,
prev_yr.sales_cnt AS prev_yr_cnt,
curr_yr.sales_cnt AS curr_yr_cnt,
curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff,
curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff
FROM all_sales AS curr_yr, all_sales AS prev_yr
WHERE (curr_yr.i_brand_id = prev_yr.i_brand_id)
AND (curr_yr.i_class_id = prev_yr.i_class_id)
AND (curr_yr.i_category_id = prev_yr.i_category_id)
AND (curr_yr.i_manufact_id = prev_yr.i_manufact_id)
AND (curr_yr.d_year = 2002)
AND (prev_yr.d_year = (2002 - 1))
AND ((CAST(curr_yr.sales_cnt, 'DECIMAL(17, 2)') / CAST(prev_yr.sales_cnt, 'DECIMAL(17, 2)')) < 0.9)
ORDER BY
sales_cnt_diff,
sales_amt_diff
LIMIT 100;
Q76
SELECT
channel,
col_name,
d_year,
d_qoy,
i_category,
COUNT(*) AS sales_cnt,
SUM(ext_sales_price) AS sales_amt
FROM
(
SELECT
'store' AS channel,
'ss_store_sk' AS col_name,
d_year,
d_qoy,
i_category,
ss_ext_sales_price AS ext_sales_price
FROM store_sales, item, date_dim
WHERE (ss_store_sk IS NULL)
AND (ss_sold_date_sk = d_date_sk)
AND (ss_item_sk = i_item_sk)
UNION ALL
SELECT
'web' AS channel,
'ws_ship_customer_sk' AS col_name,
d_year,
d_qoy,
i_category,
ws_ext_sales_price AS ext_sales_price
FROM web_sales, item, date_dim
WHERE (ws_ship_customer_sk IS NULL)
AND (ws_sold_date_sk = d_date_sk)
AND (ws_item_sk = i_item_sk)
UNION ALL
SELECT
'catalog' AS channel,
'cs_ship_addr_sk' AS col_name,
d_year,
d_qoy,
i_category,
cs_ext_sales_price AS ext_sales_price
FROM catalog_sales, item, date_dim
WHERE (cs_ship_addr_sk IS NULL)
AND (cs_sold_date_sk = d_date_sk)
AND (cs_item_sk = i_item_sk)
) AS foo
GROUP BY
channel,
col_name,
d_year,
d_qoy,
i_category
ORDER BY
channel,
col_name,
d_year,
d_qoy,
i_category
LIMIT 100;
Q77
WITH
ss AS
(
SELECT
s_store_sk,
sum(ss_ext_sales_price) AS sales,
sum(ss_net_profit) AS profit
FROM store_sales, date_dim, store
WHERE (ss_sold_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (ss_store_sk = s_store_sk)
GROUP BY s_store_sk
),
sr AS
(
SELECT
s_store_sk,
sum(sr_return_amt) AS returns,
sum(sr_net_loss) AS profit_loss
FROM store_returns, date_dim, store
WHERE (sr_returned_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (sr_store_sk = s_store_sk)
GROUP BY s_store_sk
),
cs AS
(
SELECT
cs_call_center_sk,
sum(cs_ext_sales_price) AS sales,
sum(cs_net_profit) AS profit
FROM catalog_sales, date_dim
WHERE (cs_sold_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
GROUP BY cs_call_center_sk
),
cr AS
(
SELECT
cr_call_center_sk,
sum(cr_return_amount) AS returns,
sum(cr_net_loss) AS profit_loss
FROM catalog_returns, date_dim
WHERE (cr_returned_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
GROUP BY cr_call_center_sk
),
ws AS
(
SELECT
wp_web_page_sk,
sum(ws_ext_sales_price) AS sales,
sum(ws_net_profit) AS profit
FROM web_sales, date_dim, web_page
WHERE (ws_sold_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (ws_web_page_sk = wp_web_page_sk)
GROUP BY wp_web_page_sk
),
wr AS
(
SELECT
wp_web_page_sk,
sum(wr_return_amt) AS returns,
sum(wr_net_loss) AS profit_loss
FROM web_returns, date_dim, web_page
WHERE (wr_returned_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (wr_web_page_sk = wp_web_page_sk)
GROUP BY wp_web_page_sk
)
SELECT
channel,
id,
sum(sales) AS sales,
sum(returns) AS returns,
sum(profit) AS profit
FROM
(
SELECT
'store channel' AS channel,
ss.s_store_sk AS id,
sales,
coalesce(returns, 0) AS returns,
profit - coalesce(profit_loss, 0) AS profit
FROM ss
LEFT JOIN sr ON ss.s_store_sk = sr.s_store_sk
UNION ALL
SELECT
'catalog channel' AS channel,
cs_call_center_sk AS id,
sales,
returns,
profit - profit_loss AS profit
FROM cs, cr
UNION ALL
SELECT
'web channel' AS channel,
ws.wp_web_page_sk AS id,
sales,
coalesce(returns, 0) AS returns,
profit - coalesce(profit_loss, 0) AS profit
FROM ws
LEFT JOIN wr ON ws.wp_web_page_sk = wr.wp_web_page_sk
) AS x
GROUP BY
channel,
id
WITH ROLLUP
ORDER BY
channel,
id
LIMIT 100;
Q78
WITH
ws AS
(
SELECT
d_year AS ws_sold_year,
ws_item_sk,
ws_bill_customer_sk AS ws_customer_sk,
sum(ws_quantity) AS ws_qty,
sum(ws_wholesale_cost) AS ws_wc,
sum(ws_sales_price) AS ws_sp
FROM web_sales
LEFT JOIN web_returns ON (wr_order_number = ws_order_number) AND (ws_item_sk = wr_item_sk)
INNER JOIN date_dim ON ws_sold_date_sk = d_date_sk
WHERE wr_order_number IS NULL
GROUP BY
d_year,
ws_item_sk,
ws_bill_customer_sk
),
cs AS
(
SELECT
d_year AS cs_sold_year,
cs_item_sk,
cs_bill_customer_sk AS cs_customer_sk,
sum(cs_quantity) AS cs_qty,
sum(cs_wholesale_cost) AS cs_wc,
sum(cs_sales_price) AS cs_sp
FROM catalog_sales
LEFT JOIN catalog_returns ON (cr_order_number = cs_order_number) AND (cs_item_sk = cr_item_sk)
INNER JOIN date_dim ON cs_sold_date_sk = d_date_sk
WHERE cr_order_number IS NULL
GROUP BY
d_year,
cs_item_sk,
cs_bill_customer_sk
),
ss AS
(
SELECT
d_year AS ss_sold_year,
ss_item_sk,
ss_customer_sk,
sum(ss_quantity) AS ss_qty,
sum(ss_wholesale_cost) AS ss_wc,
sum(ss_sales_price) AS ss_sp
FROM store_sales
LEFT JOIN store_returns ON (sr_ticket_number = ss_ticket_number) AND (ss_item_sk = sr_item_sk)
INNER JOIN date_dim ON ss_sold_date_sk = d_date_sk
WHERE sr_ticket_number IS NULL
GROUP BY
d_year,
ss_item_sk,
ss_customer_sk
)
SELECT
ss_sold_year,
ss_item_sk,
ss_customer_sk,
round(ss_qty / (coalesce(ws_qty, 0) + coalesce(cs_qty, 0)), 2) AS ratio,
ss_qty AS store_qty,
ss_wc AS store_wholesale_cost,
ss_sp AS store_sales_price,
coalesce(ws_qty, 0) + coalesce(cs_qty, 0) AS other_chan_qty,
coalesce(ws_wc, 0) + coalesce(cs_wc, 0) AS other_chan_wholesale_cost,
coalesce(ws_sp, 0) + coalesce(cs_sp, 0) AS other_chan_sales_price
FROM ss
LEFT JOIN ws ON (ws_sold_year = ss_sold_year) AND (ws_item_sk = ss_item_sk) AND (ws_customer_sk = ss_customer_sk)
LEFT JOIN cs ON (cs_sold_year = ss_sold_year) AND (cs_item_sk = ss_item_sk) AND (cs_customer_sk = ss_customer_sk)
WHERE ((coalesce(ws_qty, 0) > 0) OR (coalesce(cs_qty, 0) > 0)) AND (ss_sold_year = 2000)
ORDER BY
ss_sold_year,
ss_item_sk,
ss_customer_sk,
ss_qty DESC,
ss_wc DESC,
ss_sp DESC,
other_chan_qty,
other_chan_wholesale_cost,
other_chan_sales_price,
ratio
LIMIT 100;
Q79
SELECT
c_last_name,
c_first_name,
substr(s_city, 1, 30),
ss_ticket_number,
amt,
profit
FROM
(
SELECT
ss_ticket_number,
ss_customer_sk,
store.s_city,
sum(ss_coupon_amt) AS amt,
sum(ss_net_profit) AS profit
FROM store_sales, date_dim, store, household_demographics
WHERE (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
AND (store_sales.ss_store_sk = store.s_store_sk)
AND (store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)
AND ((household_demographics.hd_dep_count = 6) OR (household_demographics.hd_vehicle_count > 2))
AND (date_dim.d_dow = 1)
AND (date_dim.d_year IN (1999, 1999 + 1, 1999 + 2))
AND ((store.s_number_employees >= 200) AND (store.s_number_employees <= 295))
GROUP BY
ss_ticket_number,
ss_customer_sk,
ss_addr_sk,
store.s_city
) AS ms, customer
WHERE (ss_customer_sk = c_customer_sk)
ORDER BY
c_last_name,
c_first_name,
substr(s_city, 1, 30),
profit
LIMIT 100;
Q80
WITH
ssr AS
(
SELECT
s_store_id AS store_id,
sum(ss_ext_sales_price) AS sales,
sum(coalesce(sr_return_amt, 0)) AS returns,
sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit
FROM store_sales
LEFT JOIN store_returns ON (ss_item_sk = sr_item_sk) AND (ss_ticket_number = sr_ticket_number), date_dim, store, item, promotion
WHERE (ss_sold_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (ss_store_sk = s_store_sk)
AND (ss_item_sk = i_item_sk)
AND (i_current_price > 50)
AND (ss_promo_sk = p_promo_sk)
AND (p_channel_tv = 'N')
GROUP BY s_store_id
),
csr AS
(
SELECT
cp_catalog_page_id AS catalog_page_id,
sum(cs_ext_sales_price) AS sales,
sum(coalesce(cr_return_amount, 0)) AS returns,
sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit
FROM catalog_sales
LEFT JOIN catalog_returns ON (cs_item_sk = cr_item_sk) AND (cs_order_number = cr_order_number), date_dim, catalog_page, item, promotion
WHERE (cs_sold_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (cs_catalog_page_sk = cp_catalog_page_sk)
AND (cs_item_sk = i_item_sk)
AND (i_current_price > 50)
AND (cs_promo_sk = p_promo_sk)
AND (p_channel_tv = 'N')
GROUP BY cp_catalog_page_id
),
wsr AS
(
SELECT
web_site_id,
sum(ws_ext_sales_price) AS sales,
sum(coalesce(wr_return_amt, 0)) AS returns,
sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit
FROM web_sales
LEFT JOIN web_returns ON (ws_item_sk = wr_item_sk) AND (ws_order_number = wr_order_number), date_dim, web_site, item, promotion
WHERE (ws_sold_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (ws_web_site_sk = web_site_sk)
AND (ws_item_sk = i_item_sk)
AND (i_current_price > 50)
AND (ws_promo_sk = p_promo_sk)
AND (p_channel_tv = 'N')
GROUP BY web_site_id
)
SELECT
channel,
id,
sum(sales) AS sales,
sum(returns) AS returns,
sum(profit) AS profit
FROM
(
SELECT
'store channel' AS channel,
concat('store', store_id) AS id,
sales,
returns,
profit
FROM ssr
UNION ALL
SELECT
'catalog channel' AS channel,
concat('catalog_page', catalog_page_id) AS id,
sales,
returns,
profit
FROM csr
UNION ALL
SELECT
'web channel' AS channel,
concat('web_site', web_site_id) AS id,
sales,
returns,
profit
FROM wsr
) AS x
GROUP BY
channel,
id
WITH ROLLUP
ORDER BY
channel,
id
LIMIT 100;
The query does not work out-of-the-box due to https://github.com/ClickHouse/ClickHouse/issues/95299. This alternative formulation with a minor fix works:
WITH
ssr AS
(
SELECT
s_store_id AS store_id,
sum(ss_ext_sales_price) AS sales,
sum(coalesce(sr_return_amt, 0)) AS returns,
sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit
FROM store_sales
LEFT JOIN store_returns ON (ss_item_sk = sr_item_sk) AND (ss_ticket_number = sr_ticket_number), date_dim, store, item, promotion
WHERE (ss_sold_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (ss_store_sk = s_store_sk)
AND (ss_item_sk = i_item_sk)
AND (i_current_price > 50)
AND (ss_promo_sk = p_promo_sk)
AND (p_channel_tv = 'N')
GROUP BY s_store_id
),
csr AS
(
SELECT
cp_catalog_page_id AS catalog_page_id,
sum(cs_ext_sales_price) AS sales,
sum(coalesce(cr_return_amount, 0)) AS returns,
sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit
FROM catalog_sales
LEFT JOIN catalog_returns ON (cs_item_sk = cr_item_sk) AND (cs_order_number = cr_order_number), date_dim, catalog_page, item, promotion
WHERE (cs_sold_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (cs_catalog_page_sk = cp_catalog_page_sk)
AND (cs_item_sk = i_item_sk)
AND (i_current_price > 50)
AND (cs_promo_sk = p_promo_sk)
AND (p_channel_tv = 'N')
GROUP BY cp_catalog_page_id
),
wsr AS
(
SELECT
web_site_id,
sum(ws_ext_sales_price) AS sales,
sum(coalesce(wr_return_amt, 0)) AS returns,
sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit
FROM web_sales
LEFT JOIN web_returns ON (ws_item_sk = wr_item_sk) AND (ws_order_number = wr_order_number), date_dim, web_site, item, promotion
WHERE (ws_sold_date_sk = d_date_sk)
AND ((d_date >= CAST('2000-08-23', 'date')) AND (d_date <= (CAST('2000-08-23', 'date') + INTERVAL 30 DAY)))
AND (ws_web_site_sk = web_site_sk)
AND (ws_item_sk = i_item_sk)
AND (i_current_price > 50)
AND (ws_promo_sk = p_promo_sk)
AND (p_channel_tv = 'N')
GROUP BY web_site_id
)
SELECT
channel,
CAST(id AS String) AS id,
sum(sales) AS sales,
sum(returns) AS returns,
sum(profit) AS profit
FROM
(
SELECT
'store channel' AS channel,
concat('store', store_id) AS id,
sales,
returns,
profit
FROM ssr
UNION ALL
SELECT
'catalog channel' AS channel,
concat('catalog_page', catalog_page_id) AS id,
sales,
returns,
profit
FROM csr
UNION ALL
SELECT
'web channel' AS channel,
concat('web_site', web_site_id) AS id,
sales,
returns,
profit
FROM wsr
) AS x
GROUP BY
channel,
id
WITH ROLLUP
ORDER BY
channel,
id
LIMIT 100;
Q81
WITH
customer_total_return AS
(
SELECT
cr_returning_customer_sk AS ctr_customer_sk,
ca_state AS ctr_state,
sum(cr_return_amt_inc_tax) AS ctr_total_return
FROM catalog_returns, date_dim, customer_address
WHERE (cr_returned_date_sk = d_date_sk)
AND (d_year = 2000)
AND (cr_returning_addr_sk = ca_address_sk)
GROUP BY
cr_returning_customer_sk,
ca_state
)
SELECT
c_customer_id,
c_salutation,
c_first_name,
c_last_name,
ca_street_number,
ca_street_name,
ca_street_type,
ca_suite_number,
ca_city,
ca_county,
ca_state,
ca_zip,
ca_country,
ca_gmt_offset,
ca_location_type,
ctr_total_return
FROM customer_total_return AS ctr1, customer_address, customer
WHERE (ctr1.ctr_total_return > (
SELECT avg(ctr_total_return) * 1.2
FROM customer_total_return AS ctr2
WHERE (ctr1.ctr_state = ctr2.ctr_state)
))
AND (ca_address_sk = c_current_addr_sk)
AND (ca_state = 'GA')
AND (ctr1.ctr_customer_sk = c_customer_sk)
ORDER BY
c_customer_id,
c_salutation,
c_first_name,
c_last_name,
ca_street_number,
ca_street_name,
ca_street_type,
ca_suite_number,
ca_city,
ca_county,
ca_state,
ca_zip,
ca_country,
ca_gmt_offset,
ca_location_type,
ctr_total_return
LIMIT 100;
Q82
SELECT
i_item_id,
i_item_desc,
i_current_price
FROM item, inventory, date_dim, store_sales
WHERE (i_current_price BETWEEN 62 AND 62 + 30)
AND (inv_item_sk = i_item_sk)
AND (d_date_sk = inv_date_sk)
AND (d_date BETWEEN CAST('2000-05-25', 'date') AND (CAST('2000-05-25', 'date') + INTERVAL 60 DAY))
AND (i_manufact_id IN (129, 270, 821, 423))
AND (inv_quantity_on_hand BETWEEN 100 AND 500)
AND (ss_item_sk = i_item_sk)
GROUP BY
i_item_id,
i_item_desc,
i_current_price
ORDER BY i_item_id
LIMIT 100;
Q83
WITH
sr_items AS
(
SELECT
i_item_id AS item_id,
sum(sr_return_quantity) AS sr_item_qty
FROM store_returns, item, date_dim
WHERE (sr_item_sk = i_item_sk)
AND (d_date IN (
SELECT d_date
FROM date_dim
WHERE (d_week_seq IN (
SELECT d_week_seq
FROM date_dim
WHERE (d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))
))
))
AND (sr_returned_date_sk = d_date_sk)
GROUP BY i_item_id
),
cr_items AS
(
SELECT
i_item_id AS item_id,
sum(cr_return_quantity) AS cr_item_qty
FROM catalog_returns, item, date_dim
WHERE (cr_item_sk = i_item_sk)
AND (d_date IN (
SELECT d_date
FROM date_dim
WHERE (d_week_seq IN (
SELECT d_week_seq
FROM date_dim
WHERE (d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))
))
))
AND (cr_returned_date_sk = d_date_sk)
GROUP BY i_item_id
),
wr_items AS
(
SELECT
i_item_id AS item_id,
sum(wr_return_quantity) AS wr_item_qty
FROM web_returns, item, date_dim
WHERE (wr_item_sk = i_item_sk)
AND (d_date IN (
SELECT d_date
FROM date_dim
WHERE (d_week_seq IN (
SELECT d_week_seq
FROM date_dim
WHERE (d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))
))
))
AND (wr_returned_date_sk = d_date_sk)
GROUP BY i_item_id
)
SELECT
sr_items.item_id,
sr_item_qty,
((sr_item_qty / ((sr_item_qty + cr_item_qty) + wr_item_qty)) / 3.) * 100 AS sr_dev,
cr_item_qty,
((cr_item_qty / ((sr_item_qty + cr_item_qty) + wr_item_qty)) / 3.) * 100 AS cr_dev,
wr_item_qty,
((wr_item_qty / ((sr_item_qty + cr_item_qty) + wr_item_qty)) / 3.) * 100 AS wr_dev,
((sr_item_qty + cr_item_qty) + wr_item_qty) / 3. AS average
FROM sr_items, cr_items, wr_items
WHERE (sr_items.item_id = cr_items.item_id)
AND (sr_items.item_id = wr_items.item_id)
ORDER BY
sr_items.item_id,
sr_item_qty
LIMIT 100;
Q84
SELECT
c_customer_id AS customer_id,
concat(coalesce(c_last_name, ''), ', ', coalesce(c_first_name, '')) AS customername
FROM customer, customer_address, customer_demographics, household_demographics, income_band, store_returns
WHERE (ca_city = 'Edgewood')
AND (c_current_addr_sk = ca_address_sk)
AND (ib_lower_bound >= 38128)
AND (ib_upper_bound <= (38128 + 50000))
AND (ib_income_band_sk = hd_income_band_sk)
AND (cd_demo_sk = c_current_cdemo_sk)
AND (hd_demo_sk = c_current_hdemo_sk)
AND (sr_cdemo_sk = cd_demo_sk)
ORDER BY c_customer_id
LIMIT 100;
Q85
SELECT
substr(r_reason_desc, 1, 20),
avg(ws_quantity),
avg(wr_refunded_cash),
avg(wr_fee)
FROM web_sales, web_returns, web_page, customer_demographics AS cd1, customer_demographics AS cd2, customer_address, date_dim, reason
WHERE (ws_web_page_sk = wp_web_page_sk)
AND (ws_item_sk = wr_item_sk)
AND (ws_order_number = wr_order_number)
AND (ws_sold_date_sk = d_date_sk)
AND (d_year = 2000)
AND (cd1.cd_demo_sk = wr_refunded_cdemo_sk)
AND (cd2.cd_demo_sk = wr_returning_cdemo_sk)
AND (ca_address_sk = wr_refunded_addr_sk)
AND (r_reason_sk = wr_reason_sk)
AND (
(
(cd1.cd_marital_status = 'M')
AND (cd1.cd_marital_status = cd2.cd_marital_status)
AND (cd1.cd_education_status = 'Advanced Degree')
AND (cd1.cd_education_status = cd2.cd_education_status)
AND ((ws_sales_price >= 100.0) AND (ws_sales_price <= 150.0))
)
OR (
(cd1.cd_marital_status = 'S')
AND (cd1.cd_marital_status = cd2.cd_marital_status)
AND (cd1.cd_education_status = 'College')
AND (cd1.cd_education_status = cd2.cd_education_status)
AND ((ws_sales_price >= 50.0) AND (ws_sales_price <= 100.0))
)
OR (
(cd1.cd_marital_status = 'W')
AND (cd1.cd_marital_status = cd2.cd_marital_status)
AND (cd1.cd_education_status = '2 yr Degree')
AND (cd1.cd_education_status = cd2.cd_education_status)
AND ((ws_sales_price >= 150.0) AND (ws_sales_price <= 200.0))
)
)
AND (
(
(ca_country = 'United States')
AND (ca_state IN ('IN', 'OH', 'NJ'))
AND ((ws_net_profit >= 100) AND (ws_net_profit <= 200))
)
OR (
(ca_country = 'United States')
AND (ca_state IN ('WI', 'CT', 'KY'))
AND ((ws_net_profit >= 150) AND (ws_net_profit <= 300))
)
OR (
(ca_country = 'United States')
AND (ca_state IN ('LA', 'IA', 'AR'))
AND ((ws_net_profit >= 50) AND (ws_net_profit <= 250))
)
)
GROUP BY r_reason_desc
ORDER BY
substr(r_reason_desc, 1, 20),
avg(ws_quantity),
avg(wr_refunded_cash),
avg(wr_fee)
LIMIT 100;
Q86
SELECT
sum(ws_net_paid) AS total_sum,
i_category,
i_class,
grouping(i_category) + grouping(i_class) AS lochierarchy,
rank() OVER (PARTITION BY grouping(i_category) + grouping(i_class), multiIf(grouping(i_class) = 0, i_category, NULL) ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent
FROM web_sales, date_dim AS d1, item
WHERE ((d1.d_month_seq >= 1200) AND (d1.d_month_seq <= (1200 + 11)))
AND (d1.d_date_sk = ws_sold_date_sk)
AND (i_item_sk = ws_item_sk)
GROUP BY
i_category,
i_class
WITH ROLLUP
ORDER BY
lochierarchy DESC,
multiIf(lochierarchy = 0, i_category, NULL),
rank_within_parent
LIMIT 100;
Q87
SELECT count(*)
FROM
(
SELECT DISTINCT
c_last_name,
c_first_name,
d_date
FROM store_sales, date_dim, customer
WHERE (store_sales.ss_sold_date_sk = date_dim.d_date_sk)
AND (store_sales.ss_customer_sk = customer.c_customer_sk)
AND ((d_month_seq >= 1200) AND (d_month_seq <= (1200 + 11)))
EXCEPT
SELECT DISTINCT
c_last_name,
c_first_name,
d_date
FROM catalog_sales, date_dim, customer
WHERE (catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
AND (catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)
AND ((d_month_seq >= 1200) AND (d_month_seq <= (1200 + 11)))
EXCEPT
SELECT DISTINCT
c_last_name,
c_first_name,
d_date
FROM web_sales, date_dim, customer
WHERE (web_sales.ws_sold_date_sk = date_dim.d_date_sk)
AND (web_sales.ws_bill_customer_sk = customer.c_customer_sk)
AND ((d_month_seq >= 1200) AND (d_month_seq <= (1200 + 11)))
) AS cool_cust;
Q88
SELECT *
FROM
(
SELECT count(*) AS h8_30_to_9
FROM store_sales, household_demographics, time_dim, store
WHERE (ss_sold_time_sk = time_dim.t_time_sk)
AND (ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (ss_store_sk = s_store_sk)
AND (time_dim.t_hour = 8)
AND (time_dim.t_minute >= 30)
AND (
((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= (4 + 2)))
OR ((household_demographics.hd_dep_count = 2) AND (household_demographics.hd_vehicle_count <= (2 + 2)))
OR ((household_demographics.hd_dep_count = 0) AND (household_demographics.hd_vehicle_count <= (0 + 2)))
)
AND (store.s_store_name = 'ese')
) AS s1,
(
SELECT count(*) AS h9_to_9_30
FROM store_sales, household_demographics, time_dim, store
WHERE (ss_sold_time_sk = time_dim.t_time_sk)
AND (ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (ss_store_sk = s_store_sk)
AND (time_dim.t_hour = 9)
AND (time_dim.t_minute < 30)
AND (
((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= (4 + 2)))
OR ((household_demographics.hd_dep_count = 2) AND (household_demographics.hd_vehicle_count <= (2 + 2)))
OR ((household_demographics.hd_dep_count = 0) AND (household_demographics.hd_vehicle_count <= (0 + 2)))
)
AND (store.s_store_name = 'ese')
) AS s2,
(
SELECT count(*) AS h9_30_to_10
FROM store_sales, household_demographics, time_dim, store
WHERE (ss_sold_time_sk = time_dim.t_time_sk)
AND (ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (ss_store_sk = s_store_sk)
AND (time_dim.t_hour = 9)
AND (time_dim.t_minute >= 30)
AND (
((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= (4 + 2)))
OR ((household_demographics.hd_dep_count = 2) AND (household_demographics.hd_vehicle_count <= (2 + 2)))
OR ((household_demographics.hd_dep_count = 0) AND (household_demographics.hd_vehicle_count <= (0 + 2)))
)
AND (store.s_store_name = 'ese')
) AS s3,
(
SELECT count(*) AS h10_to_10_30
FROM store_sales, household_demographics, time_dim, store
WHERE (ss_sold_time_sk = time_dim.t_time_sk)
AND (ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (ss_store_sk = s_store_sk)
AND (time_dim.t_hour = 10)
AND (time_dim.t_minute < 30)
AND (
((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= (4 + 2)))
OR ((household_demographics.hd_dep_count = 2) AND (household_demographics.hd_vehicle_count <= (2 + 2)))
OR ((household_demographics.hd_dep_count = 0) AND (household_demographics.hd_vehicle_count <= (0 + 2)))
)
AND (store.s_store_name = 'ese')
) AS s4,
(
SELECT count(*) AS h10_30_to_11
FROM store_sales, household_demographics, time_dim, store
WHERE (ss_sold_time_sk = time_dim.t_time_sk)
AND (ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (ss_store_sk = s_store_sk)
AND (time_dim.t_hour = 10)
AND (time_dim.t_minute >= 30)
AND (
((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= (4 + 2)))
OR ((household_demographics.hd_dep_count = 2) AND (household_demographics.hd_vehicle_count <= (2 + 2)))
OR ((household_demographics.hd_dep_count = 0) AND (household_demographics.hd_vehicle_count <= (0 + 2)))
)
AND (store.s_store_name = 'ese')
) AS s5,
(
SELECT count(*) AS h11_to_11_30
FROM store_sales, household_demographics, time_dim, store
WHERE (ss_sold_time_sk = time_dim.t_time_sk)
AND (ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (ss_store_sk = s_store_sk)
AND (time_dim.t_hour = 11)
AND (time_dim.t_minute < 30)
AND (
((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= (4 + 2)))
OR ((household_demographics.hd_dep_count = 2) AND (household_demographics.hd_vehicle_count <= (2 + 2)))
OR ((household_demographics.hd_dep_count = 0) AND (household_demographics.hd_vehicle_count <= (0 + 2)))
)
AND (store.s_store_name = 'ese')
) AS s6,
(
SELECT count(*) AS h11_30_to_12
FROM store_sales, household_demographics, time_dim, store
WHERE (ss_sold_time_sk = time_dim.t_time_sk)
AND (ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (ss_store_sk = s_store_sk)
AND (time_dim.t_hour = 11)
AND (time_dim.t_minute >= 30)
AND (
((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= (4 + 2)))
OR ((household_demographics.hd_dep_count = 2) AND (household_demographics.hd_vehicle_count <= (2 + 2)))
OR ((household_demographics.hd_dep_count = 0) AND (household_demographics.hd_vehicle_count <= (0 + 2)))
)
AND (store.s_store_name = 'ese')
) AS s7,
(
SELECT count(*) AS h12_to_12_30
FROM store_sales, household_demographics, time_dim, store
WHERE (ss_sold_time_sk = time_dim.t_time_sk)
AND (ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (ss_store_sk = s_store_sk)
AND (time_dim.t_hour = 12)
AND (time_dim.t_minute < 30)
AND (
((household_demographics.hd_dep_count = 4) AND (household_demographics.hd_vehicle_count <= (4 + 2)))
OR ((household_demographics.hd_dep_count = 2) AND (household_demographics.hd_vehicle_count <= (2 + 2)))
OR ((household_demographics.hd_dep_count = 0) AND (household_demographics.hd_vehicle_count <= (0 + 2)))
)
AND (store.s_store_name = 'ese')
) AS s8;
Q89
SELECT *
FROM
(
SELECT
i_category,
i_class,
i_brand,
s_store_name,
s_company_name,
d_moy,
sum(ss_sales_price) AS sum_sales,
avg(sum(ss_sales_price)) OVER (PARTITION BY i_category, i_brand, s_store_name, s_company_name) AS avg_monthly_sales
FROM item, store_sales, date_dim, store
WHERE (ss_item_sk = i_item_sk)
AND (ss_sold_date_sk = d_date_sk)
AND (ss_store_sk = s_store_sk)
AND (d_year IN (1999))
AND (
((i_category IN ('Women', 'Jewelry', 'Men')) AND (i_class IN ('dresses', 'birdal', 'shirts')))
OR ((i_category IN ('Sports', 'Electronics', 'Books')) AND (i_class IN ('football', 'stereo', 'computers')))
)
GROUP BY
i_category,
i_class,
i_brand,
s_store_name,
s_company_name,
d_moy
) AS tmp1
WHERE (multiIf(avg_monthly_sales != 0, abs(sum_sales - avg_monthly_sales) / avg_monthly_sales, NULL) > 0.1)
ORDER BY
sum_sales - avg_monthly_sales,
s_store_name
LIMIT 100;
Q90
SELECT CAST(amc AS DECIMAL(15, 4)) / CAST(pmc AS DECIMAL(15, 4)) AS am_pm_ratio
FROM
(
SELECT count(*) AS amc
FROM web_sales, household_demographics, time_dim, web_page
WHERE (ws_sold_time_sk = time_dim.t_time_sk)
AND (ws_ship_hdemo_sk = household_demographics.hd_demo_sk)
AND (ws_web_page_sk = web_page.wp_web_page_sk)
AND (time_dim.t_hour BETWEEN 8 AND 8 + 1)
AND (household_demographics.hd_dep_count = 6)
AND (web_page.wp_char_count BETWEEN 5000 AND 5200)
) AS at,
(
SELECT count(*) AS pmc
FROM web_sales, household_demographics, time_dim, web_page
WHERE (ws_sold_time_sk = time_dim.t_time_sk)
AND (ws_ship_hdemo_sk = household_demographics.hd_demo_sk)
AND (ws_web_page_sk = web_page.wp_web_page_sk)
AND (time_dim.t_hour BETWEEN 19 AND 19 + 1)
AND (household_demographics.hd_dep_count = 6)
AND (web_page.wp_char_count BETWEEN 5000 AND 5200)
) AS pt
ORDER BY am_pm_ratio
LIMIT 100;
Q91
SELECT
cc_call_center_id AS Call_Center,
cc_name AS Call_Center_Name,
cc_manager AS Manager,
sum(cr_net_loss) AS Returns_Loss
FROM call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics
WHERE (cr_call_center_sk = cc_call_center_sk)
AND (cr_returned_date_sk = d_date_sk)
AND (cr_returning_customer_sk = c_customer_sk)
AND (cd_demo_sk = c_current_cdemo_sk)
AND (hd_demo_sk = c_current_hdemo_sk)
AND (ca_address_sk = c_current_addr_sk)
AND (d_year = 1998)
AND (d_moy = 11)
AND (
((cd_marital_status = 'M') AND (cd_education_status = 'Unknown'))
OR ((cd_marital_status = 'W') AND (cd_education_status = 'Advanced Degree'))
)
AND (hd_buy_potential LIKE 'Unknown%')
AND (ca_gmt_offset = -7)
GROUP BY
cc_call_center_id,
cc_name,
cc_manager,
cd_marital_status,
cd_education_status
ORDER BY sum(cr_net_loss) DESC;
Q92
SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount`
FROM web_sales, item, date_dim
WHERE (i_manufact_id = 350)
AND (i_item_sk = ws_item_sk)
AND ((d_date >= '2000-01-27') AND (d_date <= (CAST('2000-01-27', 'date') + INTERVAL 90 DAY)))
AND (d_date_sk = ws_sold_date_sk)
AND (ws_ext_discount_amt > (
SELECT 1.3 * avg(ws_ext_discount_amt)
FROM web_sales, date_dim
WHERE (ws_item_sk = i_item_sk)
AND ((d_date >= '2000-01-27') AND (d_date <= (CAST('2000-01-27', 'date') + INTERVAL 90 DAY)))
AND (d_date_sk = ws_sold_date_sk)
))
ORDER BY sum(ws_ext_discount_amt)
LIMIT 100;
Q93
SELECT
ss_customer_sk,
sum(act_sales) AS sumsales
FROM
(
SELECT
ss_item_sk,
ss_ticket_number,
ss_customer_sk,
multiIf(sr_return_quantity IS NOT NULL, (ss_quantity - sr_return_quantity) * ss_sales_price, ss_quantity * ss_sales_price) AS act_sales
FROM store_sales
LEFT JOIN store_returns ON (sr_item_sk = ss_item_sk) AND (sr_ticket_number = ss_ticket_number), reason
WHERE (sr_reason_sk = r_reason_sk)
AND (r_reason_desc = 'reason 28')
) AS t
GROUP BY ss_customer_sk
ORDER BY
sumsales,
ss_customer_sk
LIMIT 100;
Q94
SELECT
count(DISTINCT ws_order_number) AS "order count",
sum(ws_ext_ship_cost) AS "total shipping cost",
sum(ws_net_profit) AS "total net profit"
FROM web_sales AS ws1, date_dim, customer_address, web_site
WHERE (d_date BETWEEN '1999-2-01' AND (CAST('1999-2-01', 'date') + INTERVAL 60 DAY))
AND (ws1.ws_ship_date_sk = d_date_sk)
AND (ws1.ws_ship_addr_sk = ca_address_sk)
AND (ca_state = 'IL')
AND (ws1.ws_web_site_sk = web_site_sk)
AND (web_company_name = 'pri')
AND EXISTS (
SELECT *
FROM web_sales AS ws2
WHERE (ws1.ws_order_number = ws2.ws_order_number)
AND (ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
)
AND NOT EXISTS (
SELECT *
FROM web_returns AS wr1
WHERE (ws1.ws_order_number = wr1.wr_order_number)
)
ORDER BY count(DISTINCT ws_order_number)
LIMIT 100;
Q95
WITH
ws_wh AS
(
SELECT
ws1.ws_order_number,
ws1.ws_warehouse_sk AS wh1,
ws2.ws_warehouse_sk AS wh2
FROM web_sales AS ws1, web_sales AS ws2
WHERE (ws1.ws_order_number = ws2.ws_order_number)
AND (ws1.ws_warehouse_sk != ws2.ws_warehouse_sk)
)
SELECT
countDistinct(ws_order_number) AS `order count`,
sum(ws_ext_ship_cost) AS `total shipping cost`,
sum(ws_net_profit) AS `total net profit`
FROM web_sales AS ws1, date_dim, customer_address, web_site
WHERE ((d_date >= '1999-2-01') AND (d_date <= (CAST('1999-2-01', 'date') + INTERVAL 60 DAY)))
AND (ws1.ws_ship_date_sk = d_date_sk)
AND (ws1.ws_ship_addr_sk = ca_address_sk)
AND (ca_state = 'IL')
AND (ws1.ws_web_site_sk = web_site_sk)
AND (web_company_name = 'pri')
AND (ws1.ws_order_number IN (
SELECT ws_order_number
FROM ws_wh
))
AND (ws1.ws_order_number IN (
SELECT wr_order_number
FROM web_returns, ws_wh
WHERE (wr_order_number = ws_wh.ws_order_number)
))
ORDER BY countDistinct(ws_order_number)
LIMIT 100;
Q96
SELECT count(*)
FROM store_sales, household_demographics, time_dim, store
WHERE (ss_sold_time_sk = time_dim.t_time_sk)
AND (ss_hdemo_sk = household_demographics.hd_demo_sk)
AND (ss_store_sk = s_store_sk)
AND (time_dim.t_hour = 20)
AND (time_dim.t_minute >= 30)
AND (household_demographics.hd_dep_count = 7)
AND (store.s_store_name = 'ese')
ORDER BY count(*)
LIMIT 100;
Q97
WITH
ssci AS
(
SELECT
ss_customer_sk AS customer_sk,
ss_item_sk AS item_sk
FROM store_sales, date_dim
WHERE (ss_sold_date_sk = d_date_sk)
AND (d_month_seq BETWEEN 1200 AND 1200 + 11)
GROUP BY
ss_customer_sk,
ss_item_sk
),
csci AS
(
SELECT
cs_bill_customer_sk AS customer_sk,
cs_item_sk AS item_sk
FROM catalog_sales, date_dim
WHERE (cs_sold_date_sk = d_date_sk)
AND (d_month_seq BETWEEN 1200 AND 1200 + 11)
GROUP BY
cs_bill_customer_sk,
cs_item_sk
)
SELECT
sum(CASE WHEN (ssci.customer_sk IS NOT NULL) AND (csci.customer_sk IS NULL) THEN 1 ELSE 0 END) AS store_only,
sum(CASE WHEN (ssci.customer_sk IS NULL) AND (csci.customer_sk IS NOT NULL) THEN 1 ELSE 0 END) AS catalog_only,
sum(CASE WHEN (ssci.customer_sk IS NOT NULL) AND (csci.customer_sk IS NOT NULL) THEN 1 ELSE 0 END) AS store_and_catalog
FROM ssci
FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk) AND (ssci.item_sk = csci.item_sk)
LIMIT 100;
Q98
SELECT
i_item_id,
i_item_desc,
i_category,
i_class,
i_current_price,
sum(ss_ext_sales_price) AS itemrevenue,
sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) OVER (PARTITION BY i_class) AS revenueratio
FROM store_sales, item, date_dim
WHERE (ss_item_sk = i_item_sk)
AND (i_category IN ('Sports', 'Books', 'Home'))
AND (ss_sold_date_sk = d_date_sk)
AND (d_date BETWEEN CAST('1999-02-22', 'date') AND (CAST('1999-02-22', 'date') + INTERVAL 30 DAY))
GROUP BY
i_item_id,
i_item_desc,
i_category,
i_class,
i_current_price
ORDER BY
i_category,
i_class,
i_item_id,
i_item_desc,
revenueratio;
Q99
SELECT
substr(w_warehouse_name, 1, 20),
sm_type,
cc_name,
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30) THEN 1 ELSE 0 END) AS "30 days",
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND (cs_ship_date_sk - cs_sold_date_sk <= 60) THEN 1 ELSE 0 END) AS "31-60 days",
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND (cs_ship_date_sk - cs_sold_date_sk <= 90) THEN 1 ELSE 0 END) AS "61-90 days",
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND (cs_ship_date_sk - cs_sold_date_sk <= 120) THEN 1 ELSE 0 END) AS "91-120 days",
sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120) THEN 1 ELSE 0 END) AS ">120 days"
FROM catalog_sales, warehouse, ship_mode, call_center, date_dim
WHERE (d_month_seq BETWEEN 1200 AND 1200 + 11)
AND (cs_ship_date_sk = d_date_sk)
AND (cs_warehouse_sk = w_warehouse_sk)
AND (cs_ship_mode_sk = sm_ship_mode_sk)
AND (cs_call_center_sk = cc_call_center_sk)
GROUP BY
substr(w_warehouse_name, 1, 20),
sm_type,
cc_name
ORDER BY
substr(w_warehouse_name, 1, 20),
sm_type,
cc_name
LIMIT 100;