In [ ]:
%%bash
pip install --upgrade 'google-cloud-bigquery[bqstorage,pandas]'
Collecting google-cloud-bigquery[bqstorage,pandas]
  Downloading google_cloud_bigquery-2.30.1-py2.py3-none-any.whl (203 kB)
Collecting proto-plus>=1.10.0
  Downloading proto_plus-1.19.8-py3-none-any.whl (45 kB)
Requirement already satisfied: packaging>=14.3 in /opt/conda/lib/python3.9/site-packages (from google-cloud-bigquery[bqstorage,pandas]) (21.2)
Requirement already satisfied: protobuf>=3.12.0 in /opt/conda/lib/python3.9/site-packages (from google-cloud-bigquery[bqstorage,pandas]) (3.18.1)
Collecting google-api-core[grpc]<3.0.0dev,>=1.29.0
  Downloading google_api_core-2.2.2-py2.py3-none-any.whl (95 kB)
Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /opt/conda/lib/python3.9/site-packages (from google-cloud-bigquery[bqstorage,pandas]) (2.8.2)
Collecting grpcio<2.0dev,>=1.38.1
  Downloading grpcio-1.42.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)
Collecting google-resumable-media<3.0dev,>=0.6.0
  Downloading google_resumable_media-2.1.0-py2.py3-none-any.whl (75 kB)
Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /opt/conda/lib/python3.9/site-packages (from google-cloud-bigquery[bqstorage,pandas]) (2.26.0)
Collecting google-cloud-core<3.0.0dev,>=1.4.1
  Downloading google_cloud_core-2.2.1-py2.py3-none-any.whl (29 kB)
Requirement already satisfied: pyarrow<7.0dev,>=3.0.0 in /opt/conda/lib/python3.9/site-packages (from google-cloud-bigquery[bqstorage,pandas]) (6.0.0)
Collecting google-cloud-bigquery-storage<3.0.0dev,>=2.0.0
  Downloading google_cloud_bigquery_storage-2.10.1-py2.py3-none-any.whl (171 kB)
Requirement already satisfied: pandas>=0.24.2 in /opt/conda/lib/python3.9/site-packages (from google-cloud-bigquery[bqstorage,pandas]) (1.3.4)
Requirement already satisfied: setuptools>=40.3.0 in /opt/conda/lib/python3.9/site-packages (from google-api-core[grpc]<3.0.0dev,>=1.29.0->google-cloud-bigquery[bqstorage,pandas]) (59.1.1)
Collecting googleapis-common-protos<2.0dev,>=1.52.0
  Downloading googleapis_common_protos-1.53.0-py2.py3-none-any.whl (198 kB)
Collecting google-auth<3.0dev,>=1.25.0
  Downloading google_auth-2.3.3-py2.py3-none-any.whl (155 kB)
Collecting grpcio-status<2.0dev,>=1.33.2
  Downloading grpcio_status-1.42.0-py3-none-any.whl (10.0 kB)
Collecting libcst>=0.2.5
  Downloading libcst-0.3.23-py3-none-any.whl (517 kB)
Collecting google-crc32c<2.0dev,>=1.0
  Downloading google_crc32c-1.3.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (36 kB)
Requirement already satisfied: six>=1.5.2 in /opt/conda/lib/python3.9/site-packages (from grpcio<2.0dev,>=1.38.1->google-cloud-bigquery[bqstorage,pandas]) (1.16.0)
Requirement already satisfied: pyparsing<3,>=2.0.2 in /opt/conda/lib/python3.9/site-packages (from packaging>=14.3->google-cloud-bigquery[bqstorage,pandas]) (2.4.7)
Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.9/site-packages (from pandas>=0.24.2->google-cloud-bigquery[bqstorage,pandas]) (2021.3)
Requirement already satisfied: numpy>=1.17.3 in /opt/conda/lib/python3.9/site-packages (from pandas>=0.24.2->google-cloud-bigquery[bqstorage,pandas]) (1.20.3)
Collecting protobuf>=3.12.0
  Downloading protobuf-3.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.9/site-packages (from requests<3.0.0dev,>=2.18.0->google-cloud-bigquery[bqstorage,pandas]) (2021.10.8)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.9/site-packages (from requests<3.0.0dev,>=2.18.0->google-cloud-bigquery[bqstorage,pandas]) (1.26.7)
Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.9/site-packages (from requests<3.0.0dev,>=2.18.0->google-cloud-bigquery[bqstorage,pandas]) (2.0.0)
Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.9/site-packages (from requests<3.0.0dev,>=2.18.0->google-cloud-bigquery[bqstorage,pandas]) (3.1)
Collecting rsa<5,>=3.1.4
  Downloading rsa-4.8-py3-none-any.whl (39 kB)
Collecting cachetools<5.0,>=2.0.0
  Downloading cachetools-4.2.4-py3-none-any.whl (10 kB)
Collecting pyasn1-modules>=0.2.1
  Downloading pyasn1_modules-0.2.8-py2.py3-none-any.whl (155 kB)
Collecting typing-inspect>=0.4.0
  Downloading typing_inspect-0.7.1-py3-none-any.whl (8.4 kB)
Requirement already satisfied: typing-extensions>=3.7.4.2 in /opt/conda/lib/python3.9/site-packages (from libcst>=0.2.5->google-cloud-bigquery-storage<3.0.0dev,>=2.0.0->google-cloud-bigquery[bqstorage,pandas]) (3.10.0.2)
Requirement already satisfied: pyyaml>=5.2 in /opt/conda/lib/python3.9/site-packages (from libcst>=0.2.5->google-cloud-bigquery-storage<3.0.0dev,>=2.0.0->google-cloud-bigquery[bqstorage,pandas]) (6.0)
Collecting pyasn1<0.5.0,>=0.4.6
  Downloading pyasn1-0.4.8-py2.py3-none-any.whl (77 kB)
Collecting mypy-extensions>=0.3.0
  Downloading mypy_extensions-0.4.3-py2.py3-none-any.whl (4.5 kB)
Installing collected packages: pyasn1, rsa, pyasn1-modules, protobuf, cachetools, mypy-extensions, grpcio, googleapis-common-protos, google-auth, typing-inspect, grpcio-status, google-crc32c, google-api-core, proto-plus, libcst, google-resumable-media, google-cloud-core, google-cloud-bigquery-storage, google-cloud-bigquery
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.18.1
    Uninstalling protobuf-3.18.1:
      Successfully uninstalled protobuf-3.18.1
Successfully installed cachetools-4.2.4 google-api-core-2.2.2 google-auth-2.3.3 google-cloud-bigquery-2.30.1 google-cloud-bigquery-storage-2.10.1 google-cloud-core-2.2.1 google-crc32c-1.3.0 google-resumable-media-2.1.0 googleapis-common-protos-1.53.0 grpcio-1.42.0 grpcio-status-1.42.0 libcst-0.3.23 mypy-extensions-0.4.3 proto-plus-1.19.8 protobuf-3.19.1 pyasn1-0.4.8 pyasn1-modules-0.2.8 rsa-4.8 typing-inspect-0.7.1
In [ ]:
%load_ext google.cloud.bigquery
In [ ]:
%%bigquery df_example
SELECT * FROM (
  SELECT
    [414016,
    5767168,
    89019964] AS phone_numbers
  UNION ALL
  SELECT
    [8708227,
    37637318] AS phone_numbers
  UNION ALL
  SELECT
    [9349460,
    84826376,
    9215080,
    26331650] AS phone_numbers)
In [ ]:
df_example.to_dict()
Out[ ]:
{'phone_numbers': {0: [414016, 5767168, 89019964],
  1: [8708227, 37637318],
  2: [9349460, 84826376, 9215080, 26331650]}}
In [ ]:
phone_number_strings_list = []
for phone_numbers in df_example['phone_numbers']:
  phone_number_strings = []
  for phone_number in phone_numbers:
    phone_number_string = f'{phone_number:08d}'
    phone_number_strings.append(f'(+81) {phone_number_string[:4]}-{phone_number_string[-4:]}')
  phone_number_strings_list.append(phone_number_strings)
df_example['phone_number_strings'] = phone_number_strings_list
del df_example['phone_numbers']
In [ ]:
df_example.to_dict()
Out[ ]:
{'phone_number_strings': {0: ['(+81) 0041-4016',
   '(+81) 0576-7168',
   '(+81) 8901-9964'],
  1: ['(+81) 0870-8227', '(+81) 3763-7318'],
  2: ['(+81) 0934-9460',
   '(+81) 8482-6376',
   '(+81) 0921-5080',
   '(+81) 2633-1650']}}
In [ ]:
!sudo -HE pip install pandas_gbq
Collecting pandas_gbq
  Downloading https://files.pythonhosted.org/packages/c0/cb/d82930a55728359eb40e42e487d83315b09cb2ba316f0e889f5d4b886614/pandas_gbq-0.14.1-py3-none-any.whl
Requirement already satisfied: google-auth in /opt/tljh/user/lib/python3.6/site-packages (from pandas_gbq) (1.6.3)
Requirement already satisfied: pandas>=0.20.1 in /opt/tljh/user/lib/python3.6/site-packages (from pandas_gbq) (0.25.1)
Collecting google-auth-oauthlib (from pandas_gbq)
  Downloading https://files.pythonhosted.org/packages/b1/0e/0636cc1448a7abc444fb1b3a63655e294e0d2d49092dc3de05241be6d43c/google_auth_oauthlib-0.4.6-py2.py3-none-any.whl
Collecting pydata-google-auth (from pandas_gbq)
  Downloading https://files.pythonhosted.org/packages/d8/61/e4e0bae8906f3d2f460bc46c1ccd4a94caf7eaf65aa92421c48d7c56ef70/pydata_google_auth-1.2.0-py2.py3-none-any.whl
Requirement already satisfied: google-cloud-bigquery[bqstorage,pandas]<3.0.0dev,>=1.11.1 in /opt/tljh/user/lib/python3.6/site-packages (from pandas_gbq) (1.21.0)
Requirement already satisfied: setuptools in /opt/tljh/user/lib/python3.6/site-packages (from pandas_gbq) (41.4.0)
Requirement already satisfied: rsa>=3.1.4 in /opt/tljh/user/lib/python3.6/site-packages (from google-auth->pandas_gbq) (4.0)
Requirement already satisfied: cachetools>=2.0.0 in /opt/tljh/user/lib/python3.6/site-packages (from google-auth->pandas_gbq) (3.1.1)
Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/tljh/user/lib/python3.6/site-packages (from google-auth->pandas_gbq) (0.2.6)
Requirement already satisfied: six>=1.9.0 in /opt/tljh/user/lib/python3.6/site-packages (from google-auth->pandas_gbq) (1.12.0)
Requirement already satisfied: python-dateutil>=2.6.1 in /opt/tljh/user/lib/python3.6/site-packages (from pandas>=0.20.1->pandas_gbq) (2.8.0)
Requirement already satisfied: pytz>=2017.2 in /opt/tljh/user/lib/python3.6/site-packages (from pandas>=0.20.1->pandas_gbq) (2019.2)
Requirement already satisfied: numpy>=1.13.3 in /opt/tljh/user/lib/python3.6/site-packages (from pandas>=0.20.1->pandas_gbq) (1.17.2)
Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/tljh/user/lib/python3.6/site-packages (from google-auth-oauthlib->pandas_gbq) (1.2.0)
Requirement already satisfied: google-cloud-core<2.0dev,>=1.0.3 in /opt/tljh/user/lib/python3.6/site-packages (from google-cloud-bigquery[bqstorage,pandas]<3.0.0dev,>=1.11.1->pandas_gbq) (1.0.3)
Requirement already satisfied: protobuf>=3.6.0 in /opt/tljh/user/lib/python3.6/site-packages (from google-cloud-bigquery[bqstorage,pandas]<3.0.0dev,>=1.11.1->pandas_gbq) (3.13.0)
Requirement already satisfied: google-resumable-media!=0.4.0,<0.5.0dev,>=0.3.1 in /opt/tljh/user/lib/python3.6/site-packages (from google-cloud-bigquery[bqstorage,pandas]<3.0.0dev,>=1.11.1->pandas_gbq) (0.4.1)
Collecting google-cloud-bigquery-storage<2.0.0dev,>=0.6.0; extra == "bqstorage" (from google-cloud-bigquery[bqstorage,pandas]<3.0.0dev,>=1.11.1->pandas_gbq)
  Downloading https://files.pythonhosted.org/packages/42/9a/003822d79a535472c089ca39fb384b74b8a3624f4d5a1715c4c52059418d/google_cloud_bigquery_storage-1.1.0-py2.py3-none-any.whl (135kB)
     |████████████████████████████████| 143kB 37.3MB/s eta 0:00:01
Collecting pyarrow!=0.14.0,>=0.13.0; extra == "bqstorage" (from google-cloud-bigquery[bqstorage,pandas]<3.0.0dev,>=1.11.1->pandas_gbq)
  Downloading https://files.pythonhosted.org/packages/5d/61/4160ed11e0c149182baafc3a5bed1fb04395ba40705556d03c9244fb57d4/pyarrow-6.0.0-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (24.3MB)
     |████████████████████████████████| 24.4MB 43.3MB/s eta 0:00:01
Requirement already satisfied: pyasn1>=0.1.3 in /opt/tljh/user/lib/python3.6/site-packages (from rsa>=3.1.4->google-auth->pandas_gbq) (0.4.7)
Requirement already satisfied: requests>=2.0.0 in /opt/tljh/user/lib/python3.6/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->pandas_gbq) (2.25.1)
Requirement already satisfied: oauthlib>=3.0.0 in /opt/tljh/user/lib/python3.6/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->pandas_gbq) (3.1.0)
Requirement already satisfied: google-api-core<2.0.0dev,>=1.14.0 in /opt/tljh/user/lib/python3.6/site-packages (from google-cloud-core<2.0dev,>=1.0.3->google-cloud-bigquery[bqstorage,pandas]<3.0.0dev,>=1.11.1->pandas_gbq) (1.14.2)
Requirement already satisfied: idna<3,>=2.5 in /opt/tljh/user/lib/python3.6/site-packages (from requests>=2.0.0->requests-oauthlib>=0.7.0->google-auth-oauthlib->pandas_gbq) (2.8)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/tljh/user/lib/python3.6/site-packages (from requests>=2.0.0->requests-oauthlib>=0.7.0->google-auth-oauthlib->pandas_gbq) (1.24.3)
Requirement already satisfied: chardet<5,>=3.0.2 in /opt/tljh/user/lib/python3.6/site-packages (from requests>=2.0.0->requests-oauthlib>=0.7.0->google-auth-oauthlib->pandas_gbq) (3.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /opt/tljh/user/lib/python3.6/site-packages (from requests>=2.0.0->requests-oauthlib>=0.7.0->google-auth-oauthlib->pandas_gbq) (2020.4.5.2)
Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /opt/tljh/user/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.14.0->google-cloud-core<2.0dev,>=1.0.3->google-cloud-bigquery[bqstorage,pandas]<3.0.0dev,>=1.11.1->pandas_gbq) (1.6.0)
Installing collected packages: google-auth-oauthlib, pydata-google-auth, pandas-gbq, google-cloud-bigquery-storage, pyarrow
Successfully installed google-auth-oauthlib-0.4.6 google-cloud-bigquery-storage-1.1.0 pandas-gbq-0.14.1 pyarrow-6.0.0 pydata-google-auth-1.2.0
In [ ]:
df_example.to_gbq(project_id = 'sandbox-sheng', destination_table = 'bigquery_learning.customers', if_exists = 'replace')
/home/jupyter-sheng_wei/.local/lib/python3.6/site-packages/google/auth/_default.py:66: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK. We recommend that most server applications use service accounts instead. If your application continues to use end user credentials from Cloud SDK, you might receive a "quota exceeded" or "API not enabled" error. For more information about service accounts, see https://cloud.google.com/docs/authentication/
  warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)
1it [00:04,  4.10s/it]
In [ ]:
%%bigquery
SELECT
  *
FROM
  `sandbox-sheng.bigquery_learning.customers`
Out[ ]:
phone_number_strings
0 ['(+81) 0041-4016', '(+81) 0576-7168', '(+81) ...
1 ['(+81) 0870-8227', '(+81) 3763-7318']
2 ['(+81) 0934-9460', '(+81) 8482-6376', '(+81) ...
In [ ]:
from datetime import datetime, timezone, timedelta
# from 30 days ago to 29 days ago
start_date = datetime.now(timezone(timedelta(hours=+9))) - timedelta(days=30)
end_date = datetime.now(timezone(timedelta(hours=+9))) - timedelta(days=29)
params = {
    'start_date': start_date.strftime('%Y-%m-%d %H:%M:%S Asia/Tokyo'),
    'end_date': end_date.strftime('%Y-%m-%d %H:%M:%S Asia/Tokyo')
}
In [ ]:
print(params['start_date'])
print(params['end_date'])
2021-10-14 09:21:28 Asia/Tokyo
2021-10-15 09:21:28 Asia/Tokyo
In [ ]:
%%time
%%bigquery df --param $params
SELECT
  *
FROM
  `sandbox-sheng.fluentd.fluentd_test`
WHERE
  time >= @start_date
  AND time < @end_date
ORDER BY
  time ASC
In [ ]:
df
In [ ]:
%%bigquery
SELECT
  ARRAY(
  SELECT
    CONCAT('(+81) ', SUBSTR(FORMAT('%08d', phone_number), 0, 4), '-', SUBSTR(FORMAT('%08d', phone_number), -4))
  FROM
    UNNEST(phone_numbers) AS phone_number )[OFFSET(0)] AS phone_number
FROM (
  SELECT
    [414016,
    5767168,
    89019964] AS phone_numbers
  UNION ALL
  SELECT
    [8708227,
    37637318] AS phone_numbers
  UNION ALL
  SELECT
    [9349460,
    84826376,
    9215080,
    26331650] AS phone_numbers)
Out[ ]:
phone_number
0 (+81) 0041-4016
1 (+81) 0870-8227
2 (+81) 0934-9460
In [ ]:
%%bigquery
WITH fruits AS
  (SELECT "apple" AS fruit
   UNION ALL SELECT "pear" AS fruit
   UNION ALL SELECT "banana" AS fruit)
SELECT ARRAY_AGG(fruit ORDER BY fruit) AS fruit_basket
FROM fruits
Out[ ]:
fruit_basket
0 [apple, banana, pear]
In [ ]:
%%bigquery
SELECT
  name,
  ARRAY_AGG( DISTINCT phone_number
  ORDER BY
    phone_number ASC )[OFFSET(0)] AS phone_number
FROM (
  SELECT
    'Andrew' AS name,
    414016 AS phone_number
  UNION ALL
  SELECT
    'Andrew' AS name,
    5767168 AS phone_number
  UNION ALL
  SELECT
    'Andrew' AS name,
    89019964 AS phone_number
  UNION ALL
  SELECT
    'Andrew' AS name,
    8708227 AS phone_number
  UNION ALL
  SELECT
    'Andrew' AS name,
    37637318 AS phone_number
  UNION ALL
  SELECT
    'Kelly' AS name,
    9349460 AS phone_number
  UNION ALL
  SELECT
    'Kelly' AS name,
    84826376 AS phone_number
  UNION ALL
  SELECT
    'Kelly' AS name,
    9215080 AS phone_number
  UNION ALL
  SELECT
    'Kelly' AS name,
    26331650 AS phone_number)
GROUP BY
  name
Out[ ]:
name phone_number
0 Andrew 414016
1 Kelly 9215080

Comments

2019-10-19