Search results for "{{ search.query }}"

No results found for "{{search.query}}". 
View All Results

Heap SQL: Common Queries

Below you will find all of the queries mentioned in our Redshift Guide. Many of these queries can be copy and pasted in to your BI Tool or run directly on your cluster. However, some queries require you to update property name, event names, and date ranges to fit your needs. If you have any questions about executing these queries or have a use case that is not covered please reach out to support@heapanalytics.com.

Basic Queries

Event Counts

SELECT device_type
          ,plan
          ,COUNT(session_id)
  FROM main_production.sessions AS sessions
  INNER JOIN main_production.users AS users ON sessions.user_id = users.user_id
  GROUP BY 1, 2

Average Property Value

  SELECT  [Property_to_Group_By], AVG([Numeric_Property])
  FROM main_production.[event_name] 
  GROUP BY 1
  ORDER BY 1

Aggregate Metrics

Top 10 Events

SELECT 
  all_events.event_table_name AS "Event",
        COUNT(*) AS "Event Count"
FROM main_production.all_events AS all_events
 WHERE time >= DATEADD(day,-30, getdate())
GROUP BY 1
ORDER BY 2 DESC
LIMIT 25

Average Time on Site

SELECT ROUND(AVG(duration)::FLOAT, 4) "Average Session time in Minutes"
  FROM (
          SELECT session_id || '-' || user_id AS "unique"
                  ,MIN(time) AS "start"
                  ,MAX(time) AS "last"
                ,((DATEDIFF('milliseconds', MIN(time), MAX(time))::FLOAT / 1000  ) / 60) AS "duration"
          FROM main_production.all_events
          -- modify your date range here 
          WHERE time >= DATEADD('month', - 2, getDate())
          GROUP BY 1
          )

Average Events per Session

  SELECT Round(Count(DISTINCT event_id) :: DECIMAL / Count(DISTINCT session_id), 2 
       ) AS 
       "average" 
  FROM   main_production.all_events 
  -- modify your date range here 
  WHERE  TIME >= Dateadd('day', -30, Getdate()) 

Average Events per Session Over Time

  SELECT TO_CHAR(main_production.all_events.time, 'YYYY-MM') as month, ROUND(COUNT(DISTINCT event_id)::DECIMAL / COUNT(DISTINCT session_id), 2) AS "average"
  FROM main_production.all_events
  -- modify your date range here
  WHERE time >= DATEADD('month', -6, getdate())
  GROUP BY 1

Average Events per Session by User Property

SELECT user_property, ROUND(COUNT(DISTINCT event_id)::DECIMAL / COUNT(DISTINCT session_id), 2) AS "average"
  FROM main_production.all_events JOIN main_production.users 
    ON main_production.all_events.user_id = main_production.users.user_id
  -- modify your date range here
  WHERE time >= DATEADD('day', -30, getdate())

Average Sessions per User

  SELECT ROUND(COUNT(DISTINCT session_id)::DECIMAL / COUNT(DISTINCT user_id), 2)
  FROM main_production.sessions
  -- modify the date range here
  WHERE time >= DATEADD('day', - 30, GETDATE())

Average Property Value

  SELECT [property_to_group_by], 
         Avg([numeric_property]) 
  FROM   main_production.[event_name] 
  GROUP  BY 1 
  ORDER  BY 1 

Funnels

The Basic Funnel

WITH e1
  AS (
        SELECT DISTINCT user_id
                ,1 AS step_1
                ,MIN(time) AS step_1_time
        -- change to table name of event 1 in your funnel
        FROM main_production.step_1 AS e1
        WHERE time BETWEEN '2016-04-13'
                        AND '2016-04-21'
        GROUP BY 1
        )
        ,e2
  AS (
        SELECT e1.user_id
                ,1 AS step_2
                ,MIN(time) AS step_2_time
        -- change to table name of event 2 in your funnel
        FROM main_production.step_2 AS e2
        INNER JOIN e1 ON e1.user_id = e2.user_id
        WHERE time BETWEEN step_1_time
                        AND '2016-04-21'
        GROUP BY 1
        )
        ,e3
  AS (
        SELECT e3.user_id
                ,1 AS step_3
                ,MIN(time) AS step_3_time
        -- change to table name of event 3 in your funnel
        FROM main_production.step_3 AS e3
        INNER JOIN e2 ON e2.user_id = e3.user_id
        WHERE TIME BETWEEN step_2_time
                        AND '2016-04-21'
        GROUP BY 1
        )
        
  SELECT SUM(step_1) AS "Step 1"
        ,SUM(step_2) AS "Step 2"
        ,SUM(step_3) AS "Step 3"
        ,ROUND(SUM(step_2)/SUM(step_1)::DECIMAL, 2) as step_1to2_conversion
        ,ROUND(SUM(step_3)/SUM(step_2)::DECIMAL,2) as step_2to3_conversion
        ,ROUND(SUM(step_3)/SUM(step_1)::DECIMAL, 2) as overall_conversion
  FROM (
        SELECT e1.user_id
                ,step_1
                ,step_1_time
                ,step_2
                ,step_2_time
                ,step_3
                ,step_3_time
        FROM e1
        LEFT JOIN e2 ON e1.user_id = e2.user_id
        LEFT JOIN e3 ON e2.user_id = e3.user_id
        )

Funnels Grouped by an Event Level Property

WITH e1
  AS (
        SELECT DISTINCT e1.user_id
                ,1 AS step_1
                ,min(TIME) AS step_1_time
                ,utm_source
        FROM main_production.define_event AS e1
        INNER JOIN (
                SELECT user_id
                        ,min(TIME) AS mintime
                FROM main_production.define_event
                WHERE DATE (TIME) BETWEEN '2016-04-13'
                                AND '2016-04-20'
                GROUP BY 1
                ) AS min_value ON e1.user_id = min_value.user_id
                AND e1.TIME = min_value.mintime
        WHERE DATE (TIME) BETWEEN '2016-04-13'
                        AND '2016-04-20'
        GROUP BY 1
                ,4
        )
  ,e2 AS (
        SELECT e1.user_id
                ,1 AS step_2
                ,MIN(time) AS step_2_time
        -- change to table name of event 2 in your funnel
        FROM main_production.step_2 AS e2
        INNER JOIN e1 ON e1.user_id = e2.user_id
        WHERE time BETWEEN step_1_time
                        AND '2016-04-21'
        GROUP BY 1
        )
        ,e3
  AS (
        SELECT e3.user_id
                ,1 AS step_3
                ,MIN(time) AS step_3_time
        -- change to table name of event 3 in your funnel
        FROM main_production.step_3 AS e3
        INNER JOIN e2 ON e2.user_id = e3.user_id
        WHERE TIME BETWEEN step_2_time
                        AND '2016-04-21'
        GROUP BY 1
        )

  SELECT utm_source
        ,sum(step_1) AS "Step 1"
        ,sum(step_2) AS "Step 2"
        ,sum(step_3) AS "Step 3"
  FROM (
        SELECT e1.user_id
                ,step_1
                ,step_1_time
                ,step_2
                ,step_2_time
                ,step_3
                ,step_3_time
                ,utm_source
        FROM e1
        LEFT JOIN e2 ON e1.user_id = e2.user_id
        LEFT JOIN e3 ON e2.user_id = e3.user_id
        )
  GROUP BY 1

Restricting Funnels to One Session

 WITH e1 AS 
  ( 
                SELECT DISTINCT user_id, 
                                session_id , 
                                Min(time) AS step_1_time 
                                -- change to table name of event 1 in your funnel 
                FROM            main_production.step_1 AS e1 
                WHERE           time BETWEEN '2016-04-13' AND             '2016-04-21' 
                GROUP BY        1, 
                                2 ), e2 AS 
  ( 
           SELECT     e1.user_id, 
                      e1.session_id , 
                      Min(time) AS step_2_time 
                      -- change to table name of event 2 in your funnel 
           FROM       main_production.step_2 AS e2 
           INNER JOIN e1 
           ON         e1.user_id = e2.user_id 
           AND        e1.session_id = e2.session_id 
           WHERE      time BETWEEN step_1_time AND        '2016-04-21' 
           GROUP BY   1, 
                      2 ), e3 AS 
  ( 
           SELECT     e3.user_id, 
                      e3.session_id , 
                      Min(time) AS step_3_time 
                      -- change to table name of event 3 in your funnel 
           FROM       main_production.step_3 AS e3 
           INNER JOIN e2 
           ON         e2.user_id = e3.user_id 
           AND        e2.session_id = e3.session_id 
           WHERE      time BETWEEN step_2_time AND        '2016-04-21' 
           GROUP BY   1, 
                      2 ) 
  SELECT count(DISTINCT step_1 as step_one ,count(DISTINCT step_2) AS step_two ,count(DISTINCT step_3) AS step_three ,round(count(DISTINCT step_3)/count(DISTINCT step_1)::decimal, 2) AS conversion_rate
       -- calculate additional percentages here 
       FROM ( 
                 SELECT    e1.user_id AS step_1 , 
                           e1.session_id , 
                           step_1_time , 
                           e2.user_id AS step_2 , 
                           step_2_time , 
                           e3.user_id AS step_3 , 
                           step_3_time 
                 FROM      e1 
                 LEFT JOIN e2 
                 ON        e1.user_id = e2.user_id 
                 AND       e1.session_id = e2.session_id 
                 LEFT JOIN e3 
                 ON        e2.user_id = e3.user_id 
                 AND       e2.session_id = e3.session_id 
                 ORDER BY  1 ) 1

Conversion Rate Over Time

WITH e1 AS(
  SELECT DISTINCT user_id
        ,1 AS step_1
        ,MIN(time) AS step_1_time
        ,TO_CHAR(e1.time, 'YYYY-MM') as "month"
  FROM main_production.step_1 AS e1
  WHERE TIME BETWEEN '2015-04-01' --start time
                AND '2016-04-01'  --end time
  GROUP BY 1,4
  ),

  e2 as (
  SELECT e1.user_id, 1 AS step_2, MIN(e2.time) AS step_2_time, e1.step_1_time
    FROM main_production.[event_2] AS e2
    JOIN e1 ON e1.user_id=e2.user_id AND e1.step_1_time >= DATEADD('day', -30, e2.time)
    WHERE time BETWEEN step_1_time AND '2016-04-01'
    GROUP BY 1, 4
  ),
  e3 as (
  SELECT e3.user_id, 1 AS step_3, MIN(time) AS step_3_time, e2.step_1_time
    FROM main_production.reports_view_any_report AS e3
    JOIN e2 ON e2.user_id=e3.user_id AND e2.step_1_time >= DATEADD('day', -30, e3.time)
    WHERE time BETWEEN step_2_time AND '2016-04-01'
    GROUP BY 1, 4
  )

  SELECT MONTH, ROUND(SUM(step_3)/SUM(step_1)::DECIMAL, 2) AS "Conversion Rate"
  FROM(
    SELECT e1.user_id, step_1, e1.step_1_time, step_3, step_3_time, e1.month
      FROM e1 LEFT JOIN e2 ON e1.user_id = e2.user_id AND e1.step_1_time=e2.step_1_time 
      LEFT JOIN e3 ON e2.user_id= e3.user_id AND e2.step_1_time=e3.step_1_time
  )
  GROUP BY 1
  ORDER BY 1

Time to Conversion/ Average Time to Event

SELECT avg(duration) AS "Average hours until Purchase"
  FROM (
        SELECT user_id
                ,datediff('milliseconds', start_time, end_time)::DECIMAL / 1000 / 60 / 60 / AS "duration"
        FROM (
                SELECT main_production.users.user_id
                        ,min(joindate) AS start_time
                        ,min(main_production.purchase.time) AS end_time
                FROM main_production.sign_up
                JOIN main_production.users ON main_production.purchase.user_id = main_production.users.user_id
                GROUP BY 1
                )
        )

Path Analysis

WITH event_count
  AS (
        SELECT event_table_name
                ,count(*) AS cardinality
        FROM main_production.all_events
        WHERE TIME > dateadd('day', - 30, getDate())
        GROUP BY 1
        )
        ,all_events
  AS (
        SELECT DISTINCT event_id
                ,main_production.all_events.user_id AS user_id
                ,main_production.all_events.event_table_name AS event_name
                ,main_production.all_events.TIME AS occurred_at
                ,cardinality AS number
        FROM main_production.all_events
        LEFT JOIN event_count ON main_production.all_events.event_table_name =event_count.event_table_name
        ORDER BY 1
                ,2
        )
        ,events
  AS (
        SELECT all_events.user_id
                ,all_events.event_id
                ,event_name
                ,occurred_at
                ,cardinality
        FROM all_events
        INNER JOIN (
                SELECT event_id
                        ,user_id
                        ,min(number) AS cardinality
                FROM all_events
                GROUP BY 1
                        ,2
                ) AS event ON all_events.number = event.cardinality
                AND all_events.event_id = event.event_id
                AND all_events.user_id = event.user_id
        ORDER BY 1
                ,2
                ,4
        )
  SELECT e1
        ,e2
        ,e3
        ,e4
        ,e5
        ,COUNT(*) AS occurrances
  FROM (
        --Pivot out first five events in each session
        SELECT user_id
                ,session
                ,MAX(CASE 
                                WHEN event_number = 1
                                        THEN event_name
                                ELSE NULL
                                END) AS e1
                ,MAX(CASE 
                                WHEN event_number = 2
                                        THEN event_name
                                ELSE NULL
                                END) AS e2
                ,MAX(CASE 
                                WHEN event_number = 3
                                        THEN event_name
                                ELSE NULL
                                END) AS e3
                ,MAX(CASE 
                                WHEN event_number = 4
                                        THEN event_name
                                ELSE NULL
                                END) AS e4
                ,MAX(CASE 
                                WHEN event_number = 5
                                        THEN event_name
                                ELSE NULL
                                END) AS e5
        FROM (
                -- Find event number in session
                SELECT z.*
                        ,ROW_NUMBER() OVER (
                                PARTITION BY user_id
                                ,session ORDER BY occurred_at
                                ) AS event_number
                FROM (
                        -- Sum breaks to find sessions
                        SELECT y.*
                                ,SUM(BREAK) OVER (
                                        ORDER BY user_id
                                                ,occurred_at ROWS UNBOUNDED PRECEDING
                                        ) AS session
                        FROM (
                                -- Add flag if last event was more than 10 minutes ago
                                SELECT x.*
                                        ,CASE 
                                                WHEN last_event IS NULL
                                                        OR occurred_at >= last_event + INTERVAL '10 MINUTE'
                                                        THEN 1
                                                ELSE 0
                                                END AS BREAK
                                FROM (
                                        -- Find last event
                                        SELECT *
                                                ,LAG(occurred_at, 1) OVER (
                                                        PARTITION BY user_id ORDER BY occurred_at
                                                        ) AS last_event
                                        FROM events
                                        ) x
                                ) y
                        ) z
                ) a
        WHERE event_number <= 5
        GROUP BY 1
                ,2
        ) final
  GROUP BY 1
        ,2
        ,3
        ,4
        ,5
  ORDER BY 6 DESC LIMIT 20

Attribution

First Touch Properties

SELECT all_sessions.user_id
        ,first_session.first_time AS first_seen
        ,referrer
        ,utm_source
        ,utm_campaign
        ,utm_medium
        ,utm_content
  FROM main_production.sessions AS all_sessions
  INNER JOIN (
        SELECT user_id
                ,MIN(time) AS first_time
        FROM main_production.sessions
        GROUP BY user_id
        ) AS first_session ON all_sessions.user_id = first_session.user_id
        AND all_sessions.time = first_session.first_time
  INNER JOIN main_production.users AS users ON all_sessions.user_id = users.user_id
--[optional] insert where clause here
  ORDER BY 3 DESC

First Touch Property by User Count

SELECT utm_source, COUNT(distinct user_id)
  FROM main_production.sessions AS all_sessions
  INNER JOIN (
        SELECT user_id
                ,MIN(time) AS first_time
        FROM main_production.sessions
        GROUP BY user_id
        ) AS first_session ON all_sessions.user_id = first_session.user_id
        AND all_sessions.time = first_session.first_time
  INNER JOIN main_production.users AS users ON all_sessions.user_id = users.user_id
  GROUP BY 1

Multi Touch Attribution

SELECT initial.utm_campaign AS "INITIAL"
        ,CASE 
                WHEN event_2.utm_campaign IS NULL
                        THEN 'None'
                ELSE event_2.utm_campaign
                END AS "LAST TOUCH"
        ,count(DISTINCT initial.user_id) AS "Number of Users"
  FROM (
        SELECT all_sessions.user_id
                ,CASE 
                        WHEN all_sessions.utm_campaign IS NULL
                                THEN 'None'
                        ELSE all_sessions.utm_campaign
                        END
        FROM main_production.sessions AS all_sessions
        INNER JOIN (
                SELECT user_id
                        ,MIN(time) AS first_time
                FROM main_production.sessions
                GROUP BY user_id
                ) AS first_session ON all_sessions.user_id = first_session.user_id
                AND all_sessions.time = first_session.first_time
        ) AS initial
  JOIN main_production.conversion_event AS event_2 
    ON initial.user_id = event_2.user_id
  GROUP BY 1
        ,2
  ORDER BY 3 DESC

Retention

Month to Month Retention

--get a cohort based on join date
  WITH new_user_activity
  AS (
        SELECT activity.user_id
                ,activity.TIME AS DATE
        FROM main_production.sessions AS activity
        INNER JOIN users ON users.user_id = activity.user_id
                AND users.joindate = activity.TIME
        WHERE date_part('year', joindate) > 2014
        )
        --number of users in cohort
        ,cohort_active_user_count
  AS (
        SELECT to_char(DATE, 'YYYY-MM') AS DATE
                ,count(DISTINCT user_id) AS count
        FROM new_user_activity
        GROUP BY 1
        )
  --change format to change granularity
  SELECT joindate
        ,period
        ,new_users
        ,retained_users
        ,retention
  FROM (
        SELECT to_char(new_user_activity.DATE, 'YYYY-MM') AS "joindate"
                ,'Month ' ||
                --change month to change granularity
                datediff('month', new_user_activity.DATE, future_activity.TIME) AS period
                ,max(cohort_size.count) AS new_users
                ,-- all equal in group
                count(DISTINCT future_activity.user_id) AS retained_users
                ,round(count(DISTINCT future_activity.user_id) / max(cohort_size.count)::DECIMAL, 2) AS retention
        FROM new_user_activity
        LEFT JOIN main_production.sessions AS future_activity ON new_user_activity.user_id = future_activity.user_id
                AND new_user_activity.DATE < future_activity.TIME
                AND date_add('month', 9, new_user_activity.DATE) >= future_activity.TIME
        LEFT JOIN cohort_active_user_count AS cohort_size ON to_char(new_user_activity.DATE, 'YYYY-MM') = cohort_size.DATE
        GROUP BY 1
                ,2
        ) t
  WHERE period IS NOT NULL
  ORDER BY 1
        ,2

Joining Heap with Revenue Data

ARPU per UTM Source

WITH first_touch 
     AS (SELECT all_sessions.user_id, 
                identity, 
                first_session.first_time AS first_seen, 
                utm_source 
         --add any user level properties or session level properties here 
         FROM   main_production.sessions AS all_sessions 
                inner join (SELECT user_id, 
                                   Min(TIME) AS first_time 
                            FROM   main_production.sessions 
                            GROUP  BY user_id) AS first_session 
                        ON all_sessions.user_id = first_session.user_id 
                           AND all_sessions.TIME = first_session.first_time 
                inner join main_production.users AS users 
                        ON all_sessions.user_id = users.user_id) 
  SELECT CASE 
         WHEN utm_source IS NULL THEN 'none' 
         ELSE utm_source 
       END, 
       Round(SUM(order_total) :: DECIMAL, 2) AS total_revenue, 
       Round(SUM(order_total) :: DECIMAL / Count(DISTINCT customer_id) :: 
             DECIMAL, 2) 
                                             AS avg_per_customer 
  FROM   heap.TRANSACTION 
       join first_touch 
         ON customer_id = identity 
  GROUP  BY 1 

Monthly ARPU per UTM Source

WITH first_touch 
     AS (SELECT all_sessions.user_id, 
                identity, 
                first_session.first_time AS first_seen, 
                utm_source 
         --add any user level properties or session level properties here 
         FROM   main_production.sessions AS all_sessions 
                inner join (SELECT user_id, 
                                   Min(TIME) AS first_time 
                            FROM   main_production.sessions 
                            GROUP  BY user_id) AS first_session 
                        ON all_sessions.user_id = first_session.user_id 
                           AND all_sessions.TIME = first_session.first_time 
                inner join main_production.users AS users 
                        ON all_sessions.user_id = users.user_id) 
  SELECT utm_source, 
       CASE 
         WHEN Length(Substring(order_date, 0, Position('/' IN order_date))) = 2 
       THEN 
         '20' 
         || 
       Right(order_date, 2) 
         || 
       '-' 
         || 
       Substring(order_date, 0, Position('/' IN order_date)) 
         ELSE '20' 
              || Right(order_date, 2) 
              ||'-0' 
              || Substring(order_date, 0, Position('/' IN order_date)) 
       END                                   AS order_month, 
       Count(DISTINCT customer_id), 
       Round(SUM(order_total) :: DECIMAL, 2) AS total_revenue, 
       Round(SUM(order_total) :: DECIMAL / Count(DISTINCT customer_id) :: 
             DECIMAL, 2) 
                                             AS avg_per_customer 
  FROM   heap.TRANSACTION 
       join first_touch 
         ON customer_id = identity 
  WHERE  utm_source IS NOT NULL 
  GROUP  BY 1, 
            2 
  ORDER  BY 2

ARPU by Behavior

WITH user_info 
     AS (SELECT identity, 
                users.user_id, 
                SUM(order_total) AS total_revenue, 
                Count(event_id)  AS event_count 
         FROM   main_production.users AS users 
                join heap.TRANSACTION 
                  ON identity = customer_id 
                left join main_production.dashboard_galleries_click_add_new_files AS event 
                       ON users.user_id = event.user_id 
         GROUP  BY 1, 
                   2) 
  SELECT CASE 
         WHEN event_count = 0 THEN 'has not uploaded files' 
         ELSE 'has uploaded files' 
       END, 
       Round(SUM(total_revenue) :: DECIMAL / Count(DISTINCT user_id), 2) AS 
       avg_revenue_per_user 
  FROM   user_info 
  GROUP  BY 1 

Churned Customer Revenue

WITH first_touch 
     AS (SELECT all_sessions.user_id, 
                identity, 
                first_session.first_time AS first_seen, 
                utm_source 
         --add any user level properties or session level properties here 
         FROM   main_production.sessions AS all_sessions 
                inner join (SELECT user_id, 
                                   Min(TIME) AS first_time 
                            FROM   main_production.sessions 
                            GROUP  BY user_id) AS first_session 
                        ON all_sessions.user_id = first_session.user_id 
                           AND all_sessions.TIME = first_session.first_time 
                inner join main_production.users AS users 
                        ON all_sessions.user_id = users.user_id) 
  SELECT utm_source, 
       Round(SUM(order_total) :: DECIMAL, 2) 
       AS churned_customer_total_value, 
       Round(Count(DISTINCT order_id) :: DECIMAL / Count(DISTINCT user_id), 2) 
       AS 
       avg_orders_per_user, 
       Round(SUM(order_total) :: DECIMAL / Count(DISTINCT first_touch.identity), 
       2) AS 
       avg_ltv 
  FROM   first_touch 
       join heap.TRANSACTION 
         ON customer_id = first_touch.identity 
       join main_production.account_modification 
         ON first_touch.identity = main_production.account_modification.identity 
  WHERE  account_change = 'cancel' 
       AND utm_source IS NOT NULL 
  GROUP  BY 1 

Heap SQL: Common Queries