Queries

Query with dynamic table range:

SELECT username, action, action_source,
sum((case when (EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')>=0 and EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')<6) then 1 else 0 end)) as night_events,
sum((case when (EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')>=6 and EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')<12) then 1 else 0 end)) as morning_events,
sum((case when (EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')>=12 and EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')<17) then 1 else 0 end)) as afternoon_events,
sum((case when (EXTRACT(HOUR from timestamp AT TIME ZONE 'CET')>=17) then 1 else 0 end)) as evening_events
FROM `YoloDataset.partitioned_table_*`
where
_TABLE_SUFFIX BETWEEN '2016-09-01' and '2017-05-05' and username = 'marcin'
group by username, action, action_source

Arrays

Unnest array and group by it's values

Scenario - you have column with characters and you want to find out what is the distribution of characters in text.

with data as (
SELECT 
split(lower(text_column), '') as splitted, entry_type
FROM `table` 
)
select entry_type, character, count(character) as occurrences from data cross join unnest(data.splitted) as character
group by entry_type, character
order by entry_type, occurrences desc

Statistics

Percentiles / quantiles query

select
  percentiles[offset(10)] as p10,
  percentiles[offset(25)] as p25,
  percentiles[offset(50)] as p50,
  percentiles[offset(75)] as p75,
  percentiles[offset(90)] as p90,
from (
  select approx_quantiles(space_used, 100) percentiles
  from `TABLE`
);

UDF

Custom user defined functions

JavaScript UDF in BigQuery

Example

CREATE TEMP FUNCTION extractClass(probs STRING, isClass BOOL)
RETURNS STRING
LANGUAGE js AS
"""
  var o = JSON.parse(probs);
  var cls = Object.keys(o).map(function(key) {
    return [key, o[key]];
  }).sort(function(first, second) {
    return second[1] - first[1];
  });
  return cls[0][(isClass ? 0 : 1)];
""";

JavaScript tokenize text


CREATE TEMP FUNCTION tokenize(path STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS
"""
var r = new RegExp(/[\\s,_\\/\\.\\-]+/);
var tokens = path.toLowerCase().split(r).filter(Boolean);
return tokens;
""";

with data as (
SELECT cls, tokenize(some_text) as tokens FROM `table`
)

select cls, token
from data cross join unnest(data.tokens) as token
limit 100
No matches...