Queries
Query with dynamic table range:
SELECT username, action, action_source,
sum((case when (EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')>=0 and EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')<6) then 1 else 0 end)) as night_events,
sum((case when (EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')>=6 and EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')<12) then 1 else 0 end)) as morning_events,
sum((case when (EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')>=12 and EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')<17) then 1 else 0 end)) as afternoon_events,
sum((case when (EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')>=17) then 1 else 0 end)) as evening_events
FROM `YoloDataset.partitioned_table_*`
where
_TABLE_SUFFIX BETWEEN '2016-09-01' and '2017-05-05' and username = 'marcin'
group by username, action, action_source
Arrays
Unnest array and group by it's values
Scenario - you have column with characters and you want to find out what is the distribution of characters in text.
with data as (
SELECT
split(lower(text_column), '') as splitted, entry_type
FROM `table`
)
select entry_type, character, count(character) as occurrences from data cross join unnest(data.splitted) as character
group by entry_type, character
order by entry_type, occurrences desc
Statistics
Percentiles / quantiles query
select
percentiles[offset(10)] as p10,
percentiles[offset(25)] as p25,
percentiles[offset(50)] as p50,
percentiles[offset(75)] as p75,
percentiles[offset(90)] as p90,
from (
select approx_quantiles(space_used, 100) percentiles
from `TABLE`
);
UDF
Custom user defined functions
JavaScript UDF in BigQuery
Example
CREATE TEMP FUNCTION extractClass(probs STRING, isClass BOOL)
RETURNS STRING
LANGUAGE js AS
"""
var o = JSON.parse(probs);
var cls = Object.keys(o).map(function(key) {
return [key, o[key]];
}).sort(function(first, second) {
return second[1] - first[1];
});
return cls[0][(isClass ? 0 : 1)];
""";
JavaScript tokenize text
CREATE TEMP FUNCTION tokenize(path STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS
"""
var r = new RegExp(/[\\s,_\\/\\.\\-]+/);
var tokens = path.toLowerCase().split(r).filter(Boolean);
return tokens;
""";
with data as (
SELECT cls, tokenize(some_text) as tokens FROM `table`
)
select cls, token
from data cross join unnest(data.tokens) as token
limit 100
No matches...