Learn BigQuery
In [ ]:
%%bash
pip install --upgrade 'google-cloud-bigquery[bqstorage,pandas]'
In [ ]:
%load_ext google.cloud.bigquery
In [ ]:
%%bigquery df_example
SELECT * FROM (
SELECT
[414016,
5767168,
89019964] AS phone_numbers
UNION ALL
SELECT
[8708227,
37637318] AS phone_numbers
UNION ALL
SELECT
[9349460,
84826376,
9215080,
26331650] AS phone_numbers)
In [ ]:
df_example.to_dict()
Out[ ]:
In [ ]:
phone_number_strings_list = []
for phone_numbers in df_example['phone_numbers']:
phone_number_strings = []
for phone_number in phone_numbers:
phone_number_string = f'{phone_number:08d}'
phone_number_strings.append(f'(+81) {phone_number_string[:4]}-{phone_number_string[-4:]}')
phone_number_strings_list.append(phone_number_strings)
df_example['phone_number_strings'] = phone_number_strings_list
del df_example['phone_numbers']
In [ ]:
df_example.to_dict()
Out[ ]:
In [ ]:
!sudo -HE pip install pandas_gbq
In [ ]:
df_example.to_gbq(project_id = 'sandbox-sheng', destination_table = 'bigquery_learning.customers', if_exists = 'replace')
In [ ]:
%%bigquery
SELECT
*
FROM
`sandbox-sheng.bigquery_learning.customers`
Out[ ]:
IPython Magics for BigQuery¶
In [ ]:
from datetime import datetime, timezone, timedelta
# from 30 days ago to 29 days ago
start_date = datetime.now(timezone(timedelta(hours=+9))) - timedelta(days=30)
end_date = datetime.now(timezone(timedelta(hours=+9))) - timedelta(days=29)
params = {
'start_date': start_date.strftime('%Y-%m-%d %H:%M:%S Asia/Tokyo'),
'end_date': end_date.strftime('%Y-%m-%d %H:%M:%S Asia/Tokyo')
}
In [ ]:
print(params['start_date'])
print(params['end_date'])
In [ ]:
%%time
%%bigquery df --param $params
SELECT
*
FROM
`sandbox-sheng.fluentd.fluentd_test`
WHERE
time >= @start_date
AND time < @end_date
ORDER BY
time ASC
In [ ]:
df
Read BigQuery Reference¶
In [ ]:
%%bigquery
SELECT
ARRAY(
SELECT
CONCAT('(+81) ', SUBSTR(FORMAT('%08d', phone_number), 0, 4), '-', SUBSTR(FORMAT('%08d', phone_number), -4))
FROM
UNNEST(phone_numbers) AS phone_number )[OFFSET(0)] AS phone_number
FROM (
SELECT
[414016,
5767168,
89019964] AS phone_numbers
UNION ALL
SELECT
[8708227,
37637318] AS phone_numbers
UNION ALL
SELECT
[9349460,
84826376,
9215080,
26331650] AS phone_numbers)
Out[ ]:
In [ ]:
%%bigquery
WITH fruits AS
(SELECT "apple" AS fruit
UNION ALL SELECT "pear" AS fruit
UNION ALL SELECT "banana" AS fruit)
SELECT ARRAY_AGG(fruit ORDER BY fruit) AS fruit_basket
FROM fruits
Out[ ]:
In [ ]:
%%bigquery
SELECT
name,
ARRAY_AGG( DISTINCT phone_number
ORDER BY
phone_number ASC )[OFFSET(0)] AS phone_number
FROM (
SELECT
'Andrew' AS name,
414016 AS phone_number
UNION ALL
SELECT
'Andrew' AS name,
5767168 AS phone_number
UNION ALL
SELECT
'Andrew' AS name,
89019964 AS phone_number
UNION ALL
SELECT
'Andrew' AS name,
8708227 AS phone_number
UNION ALL
SELECT
'Andrew' AS name,
37637318 AS phone_number
UNION ALL
SELECT
'Kelly' AS name,
9349460 AS phone_number
UNION ALL
SELECT
'Kelly' AS name,
84826376 AS phone_number
UNION ALL
SELECT
'Kelly' AS name,
9215080 AS phone_number
UNION ALL
SELECT
'Kelly' AS name,
26331650 AS phone_number)
GROUP BY
name
Out[ ]: