0
Fork 0
mirror of https://github.com/TryGhost/Ghost.git synced 2025-03-11 02:12:21 -05:00

Refactored yinybird pipes to not use analytics_hits (#21765)

- This changes the Tinybird Materialized Views to circumvent a currently
  existing Tinybird bug that prevents iterating the code in its current state.
- The idea is that this will allow us to be more flexible in making changes, as it works
  around some restrictions where Tinybird won't let us change the MV because other
   parts of the pipe depends on it
- The idea is to remove the dependency to `analytics_hits.pipe` in the
materialized views.
- This does create code duplication, but we can clean that up later using includes, 
  or refactor the pipe again later if Tinybird fixes the issues

---------

Co-authored-by: Hannah Wolfe <github.erisds@gmail.com>
This commit is contained in:
Paco González López 2024-12-03 16:54:22 +00:00 committed by GitHub
parent 6bb82af4e7
commit 779b1ef86a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 258 additions and 108 deletions

View file

@ -1,67 +0,0 @@
DESCRIPTION >
Parsed `page_hit` events, implementing `browser` and `device` detection logic.
TOKEN "dashboard" READ
TOKEN "stats page" READ
NODE parsed_hits
DESCRIPTION >
Parse raw page_hit events
SQL >
SELECT
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
JSONExtractString(payload, 'locale') as locale,
JSONExtractString(payload, 'location') as location,
JSONExtractString(payload, 'referrer') as referrer,
JSONExtractString(payload, 'pathname') as pathname,
JSONExtractString(payload, 'href') as href,
JSONExtractString(payload, 'site_uuid') as site_uuid,
JSONExtractString(payload, 'member_uuid') as member_uuid,
JSONExtractString(payload, 'member_status') as member_status,
JSONExtractString(payload, 'post_uuid') as post_uuid,
lower(JSONExtractString(payload, 'user-agent')) as user_agent
FROM analytics_events
where action = 'page_hit'
NODE endpoint
SQL >
SELECT
site_uuid,
timestamp,
action,
version,
session_id,
member_uuid,
member_status,
post_uuid,
location,
domainWithoutWWW(referrer) as source,
pathname,
href,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
then 'bot'
when match(user_agent, 'android')
then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod')
then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox')
then 'firefox'
when match(user_agent, 'chrome|crios')
then 'chrome'
when match(user_agent, 'opera')
then 'opera'
when match(user_agent, 'msie|trident')
then 'ie'
when match(user_agent, 'iphone|ipad|safari')
then 'safari'
else 'Unknown'
END as browser
FROM parsed_hits

View file

@ -1,3 +1,65 @@
NODE parsed_hits
DESCRIPTION >
Parse raw page_hit events
SQL >
SELECT
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
JSONExtractString(payload, 'locale') as locale,
JSONExtractString(payload, 'location') as location,
JSONExtractString(payload, 'referrer') as referrer,
JSONExtractString(payload, 'pathname') as pathname,
JSONExtractString(payload, 'href') as href,
JSONExtractString(payload, 'site_uuid') as site_uuid,
JSONExtractString(payload, 'member_uuid') as member_uuid,
JSONExtractString(payload, 'member_status') as member_status,
JSONExtractString(payload, 'post_uuid') as post_uuid,
lower(JSONExtractString(payload, 'user-agent')) as user_agent
FROM analytics_events
where action = 'page_hit'
NODE analytics_hits_data
SQL >
SELECT
site_uuid,
timestamp,
action,
version,
session_id,
member_uuid,
member_status,
post_uuid,
location,
domainWithoutWWW(referrer) as source,
pathname,
href,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
then 'bot'
when match(user_agent, 'android')
then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod')
then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox')
then 'firefox'
when match(user_agent, 'chrome|crios')
then 'chrome'
when match(user_agent, 'opera')
then 'opera'
when match(user_agent, 'msie|trident')
then 'ie'
when match(user_agent, 'iphone|ipad|safari')
then 'safari'
else 'Unknown'
END as browser
FROM parsed_hits
NODE analytics_pages_1
DESCRIPTION >
Aggregate by pathname and calculate session and views
@ -18,7 +80,7 @@ SQL >
) AS member_status,
uniqState(session_id) AS visits,
countState() AS pageviews
FROM analytics_hits
FROM analytics_hits_data
GROUP BY date, device, browser, location, source, pathname, post_uuid,site_uuid
TYPE MATERIALIZED

View file

@ -1,3 +1,65 @@
NODE parsed_hits
DESCRIPTION >
Parse raw page_hit events
SQL >
SELECT
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
JSONExtractString(payload, 'locale') as locale,
JSONExtractString(payload, 'location') as location,
JSONExtractString(payload, 'referrer') as referrer,
JSONExtractString(payload, 'pathname') as pathname,
JSONExtractString(payload, 'href') as href,
JSONExtractString(payload, 'site_uuid') as site_uuid,
JSONExtractString(payload, 'member_uuid') as member_uuid,
JSONExtractString(payload, 'member_status') as member_status,
JSONExtractString(payload, 'post_uuid') as post_uuid,
lower(JSONExtractString(payload, 'user-agent')) as user_agent
FROM analytics_events
where action = 'page_hit'
NODE analytics_hits_data
SQL >
SELECT
site_uuid,
timestamp,
action,
version,
session_id,
member_uuid,
member_status,
post_uuid,
location,
domainWithoutWWW(referrer) as source,
pathname,
href,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
then 'bot'
when match(user_agent, 'android')
then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod')
then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox')
then 'firefox'
when match(user_agent, 'chrome|crios')
then 'chrome'
when match(user_agent, 'opera')
then 'opera'
when match(user_agent, 'msie|trident')
then 'ie'
when match(user_agent, 'iphone|ipad|safari')
then 'safari'
else 'Unknown'
END as browser
FROM parsed_hits
NODE analytics_sessions_1
DESCRIPTION >
Aggregate by session_id and calculate session metrics
@ -20,7 +82,7 @@ SQL >
minSimpleState(timestamp) AS first_view,
maxSimpleState(timestamp) AS latest_view,
countState() AS pageviews
FROM analytics_hits
FROM analytics_hits_data
GROUP BY date, session_id, site_uuid
TYPE MATERIALIZED

View file

@ -1,14 +1,76 @@
NODE parsed_hits
DESCRIPTION >
Parse raw page_hit events
SQL >
SELECT
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
JSONExtractString(payload, 'locale') as locale,
JSONExtractString(payload, 'location') as location,
JSONExtractString(payload, 'referrer') as referrer,
JSONExtractString(payload, 'pathname') as pathname,
JSONExtractString(payload, 'href') as href,
JSONExtractString(payload, 'site_uuid') as site_uuid,
JSONExtractString(payload, 'member_uuid') as member_uuid,
JSONExtractString(payload, 'member_status') as member_status,
JSONExtractString(payload, 'post_uuid') as post_uuid,
lower(JSONExtractString(payload, 'user-agent')) as user_agent
FROM analytics_events
where action = 'page_hit'
NODE analytics_hits_data
SQL >
SELECT
site_uuid,
timestamp,
action,
version,
session_id,
member_uuid,
member_status,
post_uuid,
location,
domainWithoutWWW(referrer) as source,
pathname,
href,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
then 'bot'
when match(user_agent, 'android')
then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod')
then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox')
then 'firefox'
when match(user_agent, 'chrome|crios')
then 'chrome'
when match(user_agent, 'opera')
then 'opera'
when match(user_agent, 'msie|trident')
then 'ie'
when match(user_agent, 'iphone|ipad|safari')
then 'safari'
else 'Unknown'
END as browser
FROM parsed_hits
NODE analytics_sources_1
DESCRIPTION >
Aggregate by referral and calculate session and views
SQL >
WITH (SELECT domainWithoutWWW(href) FROM analytics_hits LIMIT 1) AS current_domain,
WITH (SELECT domainWithoutWWW(href) FROM analytics_hits_data LIMIT 1) AS current_domain,
sessions AS (
SELECT
session_id, argMin(source, timestamp) AS source,
maxIf(member_status, member_status IN ('paid', 'free', 'undefined')) AS member_status
FROM analytics_hits
FROM analytics_hits_data
GROUP BY session_id
)
SELECT
@ -22,7 +84,7 @@ SQL >
b.member_status AS member_status,
uniqState(a.session_id) AS visits,
countState() AS pageviews
FROM analytics_hits as a
FROM analytics_hits_data as a
INNER JOIN sessions AS b ON a.session_id = b.session_id
GROUP BY a.site_uuid, toDate(a.timestamp), a.device, a.browser, a.location, b.member_status, b.source, a.pathname
HAVING b.source != current_domain

View file

@ -6,6 +6,70 @@ DESCRIPTION >
TOKEN "dashboard" READ
TOKEN "stats page" READ
VERSION 0
NODE parsed_hits
DESCRIPTION >
Parse raw page_hit events
SQL >
SELECT
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
JSONExtractString(payload, 'locale') as locale,
JSONExtractString(payload, 'location') as location,
JSONExtractString(payload, 'referrer') as referrer,
JSONExtractString(payload, 'pathname') as pathname,
JSONExtractString(payload, 'href') as href,
JSONExtractString(payload, 'site_uuid') as site_uuid,
JSONExtractString(payload, 'member_uuid') as member_uuid,
JSONExtractString(payload, 'member_status') as member_status,
JSONExtractString(payload, 'post_uuid') as post_uuid,
lower(JSONExtractString(payload, 'user-agent')) as user_agent
FROM analytics_events
where action = 'page_hit'
NODE analytics_hits_data
SQL >
SELECT
site_uuid,
timestamp,
action,
version,
session_id,
member_uuid,
member_status,
post_uuid,
location,
domainWithoutWWW(referrer) as source,
pathname,
href,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
then 'bot'
when match(user_agent, 'android')
then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod')
then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox')
then 'firefox'
when match(user_agent, 'chrome|crios')
then 'chrome'
when match(user_agent, 'opera')
then 'opera'
when match(user_agent, 'msie|trident')
then 'ie'
when match(user_agent, 'iphone|ipad|safari')
then 'safari'
else 'Unknown'
END as browser
FROM parsed_hits
NODE timeseries
DESCRIPTION >
Generate a timeseries for the last 30 minutes, so we call fill empty data points
@ -22,7 +86,7 @@ DESCRIPTION >
SQL >
%
select toStartOfMinute(timestamp) as t, uniq(session_id) as visits
from analytics_hits
from analytics_hits_data
where
site_uuid = {{String(site_uuid, 'mock_site_uuid', description="Tenant ID", required=True)}}
{% if defined(member_status) %}

View file

@ -1 +0,0 @@
tb pipe data analytics_hits --format CSV

View file

@ -1,32 +0,0 @@
"site_uuid","timestamp","action","version","session_id","member_uuid","member_status","post_uuid","location","source","pathname","href","device","browser"
"mock_site_uuid","2100-01-01 00:06:15","page_hit","1","e5c37e25-ed9e-4940-a2be-bc49149d991a","undefined","undefined","6b8635fb-292f-4422-9fe4-d76cfab2ba31","GB","petty-queen.com","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","bot","Unknown"
"mock_site_uuid","2100-01-01 01:21:17","page_hit","1","1267b782-e5a1-4334-8cf6-771d72bbc28e","d4678fdf-824c-4d5f-a5fe-c713d409faac","free","undefined","ES","","/","https://my-ghost-site.com/","desktop","chrome"
"mock_site_uuid","2100-01-01 01:39:48","page_hit","1","1267b782-e5a1-4334-8cf6-771d72bbc28e","d4678fdf-824c-4d5f-a5fe-c713d409faac","free","undefined","ES","my-ghost-site.com","/","https://my-ghost-site.com/","desktop","chrome"
"mock_site_uuid","2100-01-01 02:21:13","page_hit","1","2a31286e-53b4-41da-a7fd-89d966072af5","df8343d2-e89d-45b7-ba12-988734efcc56","free","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","GB","bing.com","/about/","https://my-ghost-site.com/about/","desktop","ie"
"mock_site_uuid","2100-01-01 02:31:43","page_hit","1","2a31286e-53b4-41da-a7fd-89d966072af5","df8343d2-e89d-45b7-ba12-988734efcc56","free","undefined","GB","my-ghost-site.com","/","https://my-ghost-site.com/","desktop","ie"
"mock_site_uuid","2100-01-02 00:59:45","page_hit","1","f253b9b7-0a1a-4168-8fcf-b20a1668ce4d","65bacac2-8122-4ed0-a11f-ac52aa82beb0","paid","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","GB","google.com","/about/","https://my-ghost-site.com/about/","desktop","firefox"
"mock_site_uuid","2100-01-02 01:12:56","page_hit","1","f253b9b7-0a1a-4168-8fcf-b20a1668ce4d","65bacac2-8122-4ed0-a11f-ac52aa82beb0","paid","undefined","GB","my-ghost-site.com","/","https://my-ghost-site.com/","desktop","firefox"
"mock_site_uuid","2100-01-02 01:16:52","page_hit","1","f253b9b7-0a1a-4168-8fcf-b20a1668ce4d","65bacac2-8122-4ed0-a11f-ac52aa82beb0","paid","undefined","GB","my-ghost-site.com","/","https://my-ghost-site.com/","desktop","firefox"
"mock_site_uuid","2100-01-03 00:01:24","page_hit","1","9c15f99e-c8b1-4145-a073-e7f8649d2fa4","4c14393f-d792-403e-bbdc-aa5af3abbdd9","free","undefined","US","duckduckgo.com","/","https://my-ghost-site.com/","desktop","firefox"
"mock_site_uuid","2100-01-03 01:28:09","page_hit","1","9c15f99e-c8b1-4145-a073-e7f8649d2fa4","4c14393f-d792-403e-bbdc-aa5af3abbdd9","free","6b8635fb-292f-4422-9fe4-d76cfab2ba31","US","my-ghost-site.com","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","firefox"
"mock_site_uuid","2100-01-03 01:41:44","page_hit","1","8a2461a8-91cd-4f01-b066-3de6dc946995","f4c738bc-7327-440c-8007-6a0b306c05e3","free","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","DE","bing.com","/about/","https://my-ghost-site.com/about/","desktop","chrome"
"mock_site_uuid","2100-01-03 01:53:31","page_hit","1","8a2461a8-91cd-4f01-b066-3de6dc946995","f4c738bc-7327-440c-8007-6a0b306c05e3","free","6b8635fb-292f-4422-9fe4-d76cfab2ba31","DE","my-ghost-site.com","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","chrome"
"mock_site_uuid","2100-01-03 02:00:19","page_hit","1","8a2461a8-91cd-4f01-b066-3de6dc946995","f4c738bc-7327-440c-8007-6a0b306c05e3","free","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","DE","my-ghost-site.com","/about/","https://my-ghost-site.com/about/","desktop","chrome"
"mock_site_uuid","2100-01-03 02:51:20","page_hit","1","50785df1-3232-4ff7-8495-d93e06d63f5c","3675e750-09bf-44c9-bc3f-b9aebac37c5d","paid","undefined","FR","search.yahoo.com","/","https://my-ghost-site.com/","desktop","firefox"
"mock_site_uuid","2100-01-03 03:52:39","page_hit","1","50785df1-3232-4ff7-8495-d93e06d63f5c","3675e750-09bf-44c9-bc3f-b9aebac37c5d","paid","undefined","FR","my-ghost-site.com","/","https://my-ghost-site.com/","desktop","firefox"
"mock_site_uuid","2100-01-04 00:25:39","page_hit","1","59478d87-ce95-40fd-a081-65d1e497bcfc","97c79891-2ae9-4eb2-ada8-89d2a998747d","paid","6b8635fb-292f-4422-9fe4-d76cfab2ba31","GB","","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","chrome"
"mock_site_uuid","2100-01-04 01:10:48","page_hit","1","a6b6c4e6-19e3-47a9-afc6-d9870592652e","undefined","undefined","6b8635fb-292f-4422-9fe4-d76cfab2ba31","GB","","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","chrome"
"mock_site_uuid","2100-01-04 01:16:10","page_hit","1","a6b6c4e6-19e3-47a9-afc6-d9870592652e","undefined","undefined","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","GB","my-ghost-site.com","/about/","https://my-ghost-site.com/about/","desktop","chrome"
"mock_site_uuid","2100-01-04 01:20:15","page_hit","1","a6b6c4e6-19e3-47a9-afc6-d9870592652e","undefined","undefined","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","GB","my-ghost-site.com","/about/","https://my-ghost-site.com/about/","desktop","chrome"
"mock_site_uuid","2100-01-04 01:35:41","page_hit","1","e22a7f6f-28da-4715-a199-6f0338b593d4","5369031a-a5cd-4176-83d8-d6ffcb3bcfb8","free","6b8635fb-292f-4422-9fe4-d76cfab2ba31","GB","","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","chrome"
"mock_site_uuid","2100-01-04 01:36:33","page_hit","1","e22a7f6f-28da-4715-a199-6f0338b593d4","5369031a-a5cd-4176-83d8-d6ffcb3bcfb8","free","undefined","GB","my-ghost-site.com","/","https://my-ghost-site.com/","desktop","chrome"
"mock_site_uuid","2100-01-04 01:54:50","page_hit","1","e22a7f6f-28da-4715-a199-6f0338b593d4","5369031a-a5cd-4176-83d8-d6ffcb3bcfb8","free","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","GB","my-ghost-site.com","/about/","https://my-ghost-site.com/about/","desktop","chrome"
"mock_site_uuid","2100-01-05 00:29:59","page_hit","1","490475f1-1fb7-4672-9edd-daa1b411b5f9","undefined","undefined","6b8635fb-292f-4422-9fe4-d76cfab2ba31","GB","baidu.com","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","chrome"
"mock_site_uuid","2100-01-05 00:37:42","page_hit","1","490475f1-1fb7-4672-9edd-daa1b411b5f9","undefined","undefined","undefined","GB","my-ghost-site.com","/","https://my-ghost-site.com/","desktop","chrome"
"mock_site_uuid","2100-01-05 00:38:12","page_hit","1","490475f1-1fb7-4672-9edd-daa1b411b5f9","undefined","undefined","6b8635fb-292f-4422-9fe4-d76cfab2ba31","GB","my-ghost-site.com","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","chrome"
"mock_site_uuid","2100-01-05 01:51:00","page_hit","1","d8e4622f-95cc-4fba-b31b-f38ff72e0975","75a190eb-62da-46d2-972d-a9763c954f42","paid","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","ES","","/about/","https://my-ghost-site.com/about/","desktop","ie"
"mock_site_uuid","2100-01-05 01:53:03","page_hit","1","d8e4622f-95cc-4fba-b31b-f38ff72e0975","75a190eb-62da-46d2-972d-a9763c954f42","paid","6b8635fb-292f-4422-9fe4-d76cfab2ba31","ES","my-ghost-site.com","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","ie"
"mock_site_uuid","2100-01-06 00:51:26","page_hit","1","8d975128-2027-40c6-834a-972cc0293d21","b7e0fca6-27ce-46c0-af57-c591f20dcd51","free","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","FR","","/about/","https://my-ghost-site.com/about/","desktop","safari"
"mock_site_uuid","2100-01-06 01:28:38","page_hit","1","61a2896b-7cf8-4853-86a6-a0e4f87c1e21","undefined","undefined","6b8635fb-292f-4422-9fe4-d76cfab2ba31","GB","search.yahoo.com","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","chrome"
"mock_site_uuid","2100-01-07 01:44:10","page_hit","1","7f1e88e1-da8e-46df-bc69-d04fb29d603d","undefined","undefined","06b1b0c9-fb53-4a15-a060-3db3fde7b1fc","US","wilted-tick.com","/about/","https://my-ghost-site.com/about/","desktop","firefox"
"mock_site_uuid","2100-01-07 02:23:19","page_hit","1","98159299-8111-4dc8-9156-bb339fe9508c","undefined","undefined","06b1b0c9-fb53-4a15-a060-3db3fde7b1dd","US","my-ghost-site.com","/blog/hello-world/","https://my-ghost-site.com/blog/hello-world/","desktop","firefox"