0
Fork 0
mirror of https://github.com/TryGhost/Ghost.git synced 2025-04-01 02:41:39 -05:00

Updated web analytics assets & tests (#22409)

no ref

- rebuilt the Tinybird datafiles
- fixed tests
- changed structure to use includes for the hits pipe

We needed passing tests in order to get the Deploy function to actually
work, plus we needed this pointing to our staging instance. We'll need
to later hook it up to push to production as well using some mechanism.

---------

Co-authored-by: Chris Raible <chris@ghost.org>
This commit is contained in:
Steve Larson 2025-03-06 14:52:29 -06:00 committed by GitHub
parent 1f6be425fb
commit fb7b444f68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 127 additions and 60 deletions

View file

@ -908,15 +908,15 @@ jobs:
- name: Check all the datafiles syntax
run: tb check
- name: Check auth
run: tb --host=${{ secrets.TB_HOST }} --token=${{ secrets.TB_ADMIN_TOKEN }} auth info
run: tb --host=${{ secrets.TB_HOST }} --token=${{ secrets.TB_ADMIN_TOKEN_STAGING }} auth info
- name: Try to delete previous Branch
run: |
output=$(tb --host ${{ secrets.TB_HOST }} --token ${{ secrets.TB_ADMIN_TOKEN }} branch ls)
output=$(tb --host ${{ secrets.TB_HOST }} --token ${{ secrets.TB_ADMIN_TOKEN_STAGING }} branch ls)
# Check if the branch name exists in the output
if echo "$output" | grep -q "\b$TB_BRANCH_NAME\b"; then
tb \
--host ${{ secrets.TB_HOST }} \
--token ${{ secrets.TB_ADMIN_TOKEN }} \
--token ${{ secrets.TB_ADMIN_TOKEN_STAGING }} \
branch rm $TB_BRANCH_NAME \
--yes
else
@ -926,7 +926,7 @@ jobs:
run: |
tb \
--host ${{ secrets.TB_HOST }} \
--token ${{ secrets.TB_ADMIN_TOKEN }} \
--token ${{ secrets.TB_ADMIN_TOKEN_STAGING }} \
branch create $TB_BRANCH_NAME \
${_ENV_FLAGS}
- name: Deploy changes to the test Branch
@ -967,12 +967,12 @@ jobs:
- name: Try to delete previous Branch
if: always()
run: |
output=$(tb --host ${{ secrets.TB_HOST }} --token ${{ secrets.TB_ADMIN_TOKEN }} branch ls)
output=$(tb --host ${{ secrets.TB_HOST }} --token ${{ secrets.TB_ADMIN_TOKEN_STAGING }} branch ls)
# Check if the branch name exists in the output
if echo "$output" | grep -q "\b$TB_BRANCH_NAME\b"; then
tb \
--host ${{ secrets.TB_HOST }} \
--token ${{ secrets.TB_ADMIN_TOKEN }} \
--token ${{ secrets.TB_ADMIN_TOKEN_STAGING }} \
branch rm $TB_BRANCH_NAME \
--yes
else

View file

@ -1,5 +1,7 @@
VERSION 0
TAGS "v0"
SCHEMA >
`site_uuid` String,
`post_uuid` String,

View file

@ -1,5 +1,7 @@
VERSION 0
TAGS "v0"
SCHEMA >
`site_uuid` String,
`date` Date,

View file

@ -1,5 +1,7 @@
VERSION 0
TAGS "v0"
SCHEMA >
`site_uuid` String,
`date` Date,

View file

@ -1,9 +1,6 @@
TOKEN "tracker" APPEND
TOKEN "analytics_events_json_m" APPEND
TOKEN "analytics_events_json_s" APPEND
TAGS "v0"
SCHEMA >
`timestamp` DateTime `json:$.timestamp`,

View file

@ -1,27 +1,10 @@
VERSION 0
NODE parsed_hits
SQL >
SELECT
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
toString(payload.locale) as locale,
toString(payload.location) as location,
toString(payload.referrer) as referrer,
toString(payload.pathname) as pathname,
toString(payload.href) as href,
site_uuid,
toString(payload.member_uuid) as member_uuid,
toString(payload.member_status) as member_status,
toString(payload.post_uuid) as post_uuid,
toString(payload.post_type) as post_type,
lower(toString(getSubcolumn(payload,'user-agent'))) as user_agent
FROM analytics_events
where action = 'page_hit'
TAGS "v0"
NODE _hits
INCLUDE "_parsed_hits.incl"
NODE hits
SQL >
SELECT
@ -71,4 +54,3 @@ SQL >
END as browser
FROM parsed_hits

View file

@ -0,0 +1,25 @@
VERSION 0
TAGS "v0"
NODE parsed_hits
SQL >
SELECT
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
toString(payload.locale) as locale,
toString(payload.location) as location,
toString(payload.referrer) as referrer,
toString(payload.pathname) as pathname,
toString(payload.href) as href,
site_uuid,
toString(payload.member_uuid) as member_uuid,
toString(payload.member_status) as member_status,
toString(payload.post_uuid) as post_uuid,
toString(payload.post_type) as post_type,
lower(toString(getSubcolumn(payload,'user-agent'))) as user_agent
FROM analytics_events
where action = 'page_hit'

View file

@ -1,4 +1,9 @@
VERSION 0
TAGS "v0"
INCLUDE "_parsed_hits.incl"
NODE timeseries
SQL >
@ -65,17 +70,37 @@ SQL >
toStartOfHour(timestamp) as date,
session_id,
member_status,
device,
browser,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
then 'bot'
when match(user_agent, 'android')
then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod')
then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox')
then 'firefox'
when match(user_agent, 'chrome|crios')
then 'chrome'
when match(user_agent, 'opera')
then 'opera'
when match(user_agent, 'msie|trident')
then 'ie'
when match(user_agent, 'iphone|ipad|safari')
then 'safari'
else 'Unknown'
END as browser,
location,
source,
domainWithoutWWW(referrer) as source,
pathname,
uniq(session_id) as visits,
count() as pageviews,
case when min(timestamp) = max(timestamp) then 1 else 0 end as is_bounce,
max(timestamp) as latest_view_aux,
min(timestamp) as first_view_aux
from _hits
from parsed_hits
where toDate(timestamp) = {{ Date(date_from) }}
group by toStartOfHour(timestamp), session_id, site_uuid, member_status, device, browser, location, source, pathname
{% else %}
@ -135,6 +160,4 @@ SQL >
select a.date, b.visits, b.pageviews, b.bounce_rate, b.avg_session_sec
from timeseries a
left join data b using date
left join data b using date

View file

@ -1,4 +1,7 @@
VERSION 0
TAGS "v0"
NODE _top_browsers_0
SQL >

View file

@ -1,4 +1,7 @@
VERSION 0
TAGS "v0"
NODE _top_devices_0
SQL >

View file

@ -1,4 +1,7 @@
VERSION 0
TAGS "v0"
NODE _top_locations_0
SQL >

View file

@ -1,4 +1,7 @@
VERSION 0
TAGS "v0"
DESCRIPTION >
Top Operating Systems ordered by most visits.
Accepts `date_from` and `date_to` date filter. Defaults to last 7 days.

View file

@ -1,4 +1,7 @@
VERSION 0
TAGS "v0"
NODE _top_pages_0
SQL >

View file

@ -1,4 +1,7 @@
VERSION 0
TAGS "v0"
TOKEN "_top_sources_endpoint_read_1325" READ
NODE _top_sources_0

View file

@ -1,4 +1,9 @@
VERSION 0
TAGS "v0"
INCLUDE "_hits.incl"
NODE _pages_0
SQL >
@ -16,7 +21,7 @@ SQL >
maxIfState(member_status, member_status IN ('paid', 'free', 'undefined')) AS member_status,
uniqState(session_id) AS visits,
countState() AS pageviews
FROM _hits
FROM hits
GROUP BY
site_uuid,
date,

View file

@ -1,4 +1,9 @@
VERSION 0
TAGS "v0"
INCLUDE "_hits.incl"
NODE _sessions_0
SQL >
@ -19,7 +24,7 @@ SQL >
maxSimpleState(timestamp) AS latest_view,
countState() AS pageviews
FROM _hits
FROM hits
GROUP BY
site_uuid,
date,

View file

@ -1,11 +1,16 @@
VERSION 0
TAGS "v0"
INCLUDE "_hits.incl"
NODE _sources_0
SQL >
WITH
(
SELECT domainWithoutWWW(href)
FROM _hits
FROM hits
LIMIT 1
) AS current_domain,
sessions AS
@ -14,7 +19,7 @@ SQL >
session_id,
argMin(source, timestamp) AS source,
maxIfState(member_status, member_status IN ('paid', 'free', 'undefined')) AS member_status
FROM _hits
FROM hits
GROUP BY session_id
)
SELECT
@ -33,7 +38,7 @@ SQL >
uniqState(a.session_id) AS visits,
countState() AS pageviews
FROM _hits AS a
FROM hits AS a
INNER JOIN sessions AS b ON a.session_id = b.session_id
GROUP BY
a.site_uuid,

View file

@ -1 +1 @@
tinybird-cli>=4,<5
tinybird-cli>=5.17.0

View file

@ -19,6 +19,7 @@ done
# Allow version to be passed in or default to 0
export TB_VERSION=${TB_VERSION:-0}
echo "Using TB_VERSION: $TB_VERSION"
# Attempt to create the branch and check for errors
if ! tb branch create "$BRANCH_NAME"; then

View file

@ -1 +1 @@
tb pipe data kpis__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV
tb pipe data api_kpis__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV

View file

@ -1 +1 @@
tb pipe data top_browsers__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV
tb pipe data api_top_browsers__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV

View file

@ -1 +1 @@
tb pipe data top_devices__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV
tb pipe data api_top_devices__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV

View file

@ -1 +1 @@
tb pipe data top_locations__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV
tb pipe data api_top_locations__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV

View file

@ -1 +1 @@
tb pipe data top_pages__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV
tb pipe data api_top_pages__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV

View file

@ -1 +1 @@
tb pipe data top_sources__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV
tb pipe data api_top_sources__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV

View file

@ -1 +1 @@
tb pipe data kpis__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-01 --site_uuid mock_site_uuid --format CSV
tb pipe data api_kpis__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-01 --site_uuid mock_site_uuid --format CSV

View file

@ -1 +1 @@
tb pipe data kpis__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --browser chrome
tb pipe data api_kpis__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --browser chrome

View file

@ -1 +1 @@
tb pipe data top_browsers__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --browser chrome
tb pipe data api_top_browsers__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --browser chrome

View file

@ -1 +1 @@
tb pipe data top_pages__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --browser chrome
tb pipe data api_top_pages__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --browser chrome

View file

@ -1 +1 @@
tb pipe data top_sources__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --browser chrome
tb pipe data api_top_sources__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --browser chrome

View file

@ -1 +1 @@
tb pipe data kpis__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --source bing.com
tb pipe data api_kpis__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --source bing.com

View file

@ -1 +1 @@
tb pipe data top_browsers__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --source bing.com
tb pipe data api_top_browsers__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --source bing.com

View file

@ -1 +1 @@
tb pipe data top_pages__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --source bing.com
tb pipe data api_top_pages__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --source bing.com

View file

@ -1 +1 @@
tb pipe data top_sources__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --source bing.com
tb pipe data api_top_sources__v${TB_VERSION:-0} --date_from 2100-01-01 --date_to 2100-01-07 --site_uuid mock_site_uuid --format CSV --source bing.com