Skip to content

Commit

Permalink
Add test for BQ operations using location (#9206)
Browse files Browse the repository at this point in the history
  • Loading branch information
turbaszek committed Jun 10, 2020
1 parent a26afbf commit fb1c8b8
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 115 deletions.
234 changes: 124 additions & 110 deletions airflow/providers/google/cloud/example_dags/example_bigquery_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project")
DATASET_NAME = os.environ.get("GCP_BIGQUERY_DATASET_NAME", "test_dataset")
LOCATION = "southamerica-east1"

TABLE_1 = "table1"
TABLE_2 = "table2"

Expand All @@ -54,113 +56,125 @@

default_args = {"start_date": days_ago(1)}

with models.DAG(
"example_bigquery_queries",
default_args=default_args,
schedule_interval=None, # Override to match your needs
tags=["example"],
) as dag:
create_dataset = BigQueryCreateEmptyDatasetOperator(
task_id="create-dataset", dataset_id=DATASET_NAME
)

create_table_1 = BigQueryCreateEmptyTableOperator(
task_id="create_table_1",
dataset_id=DATASET_NAME,
table_id=TABLE_1,
schema_fields=SCHEMA,
)

create_table_2 = BigQueryCreateEmptyTableOperator(
task_id="create_table_2",
dataset_id=DATASET_NAME,
table_id=TABLE_2,
schema_fields=SCHEMA,
)

create_dataset >> [create_table_1, create_table_2]

delete_dataset = BigQueryDeleteDatasetOperator(
task_id="delete_dataset", dataset_id=DATASET_NAME, delete_contents=True
)

# [START howto_operator_bigquery_insert_job]
insert_query_job = BigQueryInsertJobOperator(
task_id="insert_query_job",
configuration={
"query": {
"query": INSERT_ROWS_QUERY,
"useLegacySql": False,
}
},
)
# [END howto_operator_bigquery_insert_job]

execute_insert_query = BigQueryExecuteQueryOperator(
task_id="execute_insert_query", sql=INSERT_ROWS_QUERY, use_legacy_sql=False
)

bigquery_execute_multi_query = BigQueryExecuteQueryOperator(
task_id="execute_multi_query",
sql=[
f"SELECT * FROM {DATASET_NAME}.{TABLE_2}",
f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_2}",
],
use_legacy_sql=False,
)

execute_query_save = BigQueryExecuteQueryOperator(
task_id="execute_query_save",
sql=f"SELECT * FROM {DATASET_NAME}.{TABLE_1}",
use_legacy_sql=False,
destination_dataset_table=f"{DATASET_NAME}.{TABLE_2}",
)

# [START howto_operator_bigquery_get_data]
get_data = BigQueryGetDataOperator(
task_id="get_data",
dataset_id=DATASET_NAME,
table_id=TABLE_1,
max_results=10,
selected_fields="value,name",
)
# [END howto_operator_bigquery_get_data]

get_data_result = BashOperator(
task_id="get_data_result",
bash_command="echo \"{{ task_instance.xcom_pull('get_data') }}\"",
)

# [START howto_operator_bigquery_check]
check_count = BigQueryCheckOperator(
task_id="check_count",
sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}",
use_legacy_sql=False,
)
# [END howto_operator_bigquery_check]

# [START howto_operator_bigquery_value_check]
check_value = BigQueryValueCheckOperator(
task_id="check_value",
sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}",
pass_value=4,
use_legacy_sql=False,
)
# [END howto_operator_bigquery_value_check]

# [START howto_operator_bigquery_interval_check]
check_interval = BigQueryIntervalCheckOperator(
task_id="check_interval",
table=f"{DATASET_NAME}.{TABLE_1}",
days_back=1,
metrics_thresholds={"COUNT(*)": 1.5},
use_legacy_sql=False,
)
# [END howto_operator_bigquery_interval_check]

[create_table_1, create_table_2] >> insert_query_job

insert_query_job >> execute_insert_query
execute_insert_query >> get_data >> get_data_result >> delete_dataset
execute_insert_query >> execute_query_save >> bigquery_execute_multi_query >> delete_dataset
execute_insert_query >> [check_count, check_value, check_interval] >> delete_dataset
for location in [None, LOCATION]:
dag_id = "example_bigquery_queries_location" if location else "example_bigquery_queries"

with models.DAG(
dag_id,
default_args=default_args,
schedule_interval=None, # Override to match your needs
tags=["example"],
) as dag_with_locations:
create_dataset = BigQueryCreateEmptyDatasetOperator(
task_id="create-dataset", dataset_id=DATASET_NAME, location=location,
)

create_table_1 = BigQueryCreateEmptyTableOperator(
task_id="create_table_1",
dataset_id=DATASET_NAME,
table_id=TABLE_1,
schema_fields=SCHEMA,
location=location,
)

create_table_2 = BigQueryCreateEmptyTableOperator(
task_id="create_table_2",
dataset_id=DATASET_NAME,
table_id=TABLE_2,
schema_fields=SCHEMA,
location=location,
)

create_dataset >> [create_table_1, create_table_2]

delete_dataset = BigQueryDeleteDatasetOperator(
task_id="delete_dataset", dataset_id=DATASET_NAME, delete_contents=True
)

# [START howto_operator_bigquery_insert_job]
insert_query_job = BigQueryInsertJobOperator(
task_id="insert_query_job",
configuration={
"query": {
"query": INSERT_ROWS_QUERY,
"useLegacySql": False,
}
},
location=location,
)
# [END howto_operator_bigquery_insert_job]

execute_insert_query = BigQueryExecuteQueryOperator(
task_id="execute_insert_query", sql=INSERT_ROWS_QUERY, use_legacy_sql=False, location=location
)

bigquery_execute_multi_query = BigQueryExecuteQueryOperator(
task_id="execute_multi_query",
sql=[
f"SELECT * FROM {DATASET_NAME}.{TABLE_2}",
f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_2}",
],
use_legacy_sql=False,
location=location,
)

execute_query_save = BigQueryExecuteQueryOperator(
task_id="execute_query_save",
sql=f"SELECT * FROM {DATASET_NAME}.{TABLE_1}",
use_legacy_sql=False,
destination_dataset_table=f"{DATASET_NAME}.{TABLE_2}",
location=location,
)

# [START howto_operator_bigquery_get_data]
get_data = BigQueryGetDataOperator(
task_id="get_data",
dataset_id=DATASET_NAME,
table_id=TABLE_1,
max_results=10,
selected_fields="value,name",
location=location,
)
# [END howto_operator_bigquery_get_data]

get_data_result = BashOperator(
task_id="get_data_result",
bash_command="echo \"{{ task_instance.xcom_pull('get_data') }}\"",
)

# [START howto_operator_bigquery_check]
check_count = BigQueryCheckOperator(
task_id="check_count",
sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}",
use_legacy_sql=False,
location=location
)
# [END howto_operator_bigquery_check]

# [START howto_operator_bigquery_value_check]
check_value = BigQueryValueCheckOperator(
task_id="check_value",
sql=f"SELECT COUNT(*) FROM {DATASET_NAME}.{TABLE_1}",
pass_value=4,
use_legacy_sql=False,
location=location,
)
# [END howto_operator_bigquery_value_check]

# [START howto_operator_bigquery_interval_check]
check_interval = BigQueryIntervalCheckOperator(
task_id="check_interval",
table=f"{DATASET_NAME}.{TABLE_1}",
days_back=1,
metrics_thresholds={"COUNT(*)": 1.5},
use_legacy_sql=False,
location=location,
)
# [END howto_operator_bigquery_interval_check]

[create_table_1, create_table_2] >> insert_query_job

insert_query_job >> execute_insert_query
execute_insert_query >> get_data >> get_data_result >> delete_dataset
execute_insert_query >> execute_query_save >> bigquery_execute_multi_query >> delete_dataset
execute_insert_query >> [check_count, check_value, check_interval] >> delete_dataset
10 changes: 5 additions & 5 deletions docs/howto/operator/gcp/bigquery.rst
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ that row.

.. exampleinclude:: ../../../../airflow/providers/google/cloud/example_dags/example_bigquery_queries.py
:language: python
:dedent: 4
:dedent: 8
:start-after: [START howto_operator_bigquery_get_data]
:end-before: [END howto_operator_bigquery_get_data]

Expand Down Expand Up @@ -260,7 +260,7 @@ proper query job configuration.

.. exampleinclude:: ../../../../airflow/providers/google/cloud/example_dags/example_bigquery_queries.py
:language: python
:dedent: 4
:dedent: 8
:start-after: [START howto_operator_bigquery_insert_job]
:end-before: [END howto_operator_bigquery_insert_job]

Expand Down Expand Up @@ -290,7 +290,7 @@ return ``False`` the check is failed and errors out.

.. exampleinclude:: ../../../../airflow/providers/google/cloud/example_dags/example_bigquery_queries.py
:language: python
:dedent: 4
:dedent: 8
:start-after: [START howto_operator_bigquery_check]
:end-before: [END howto_operator_bigquery_check]

Expand All @@ -308,7 +308,7 @@ or numeric value. If numeric, you can also specify ``tolerance``.

.. exampleinclude:: ../../../../airflow/providers/google/cloud/example_dags/example_bigquery_queries.py
:language: python
:dedent: 4
:dedent: 8
:start-after: [START howto_operator_bigquery_value_check]
:end-before: [END howto_operator_bigquery_value_check]

Expand All @@ -323,7 +323,7 @@ tolerance of the ones from ``days_back`` before you can use

.. exampleinclude:: ../../../../airflow/providers/google/cloud/example_dags/example_bigquery_queries.py
:language: python
:dedent: 4
:dedent: 8
:start-after: [START howto_operator_bigquery_interval_check]
:end-before: [END howto_operator_bigquery_interval_check]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,18 @@ def setUp(self):
def test_run_example_dag_operations(self):
self.run_dag('example_bigquery_operations', CLOUD_DAG_FOLDER)

@provide_gcp_context(GCP_BIGQUERY_KEY)
def test_run_example_dag_operations_location(self):
self.run_dag('example_bigquery_operations_location', CLOUD_DAG_FOLDER)

@provide_gcp_context(GCP_BIGQUERY_KEY)
def test_run_example_dag_queries(self):
self.run_dag('example_bigquery_queries', CLOUD_DAG_FOLDER)

@provide_gcp_context(GCP_BIGQUERY_KEY)
def test_run_example_dag_queries_location(self):
self.run_dag('example_bigquery_queries_location', CLOUD_DAG_FOLDER)

@provide_gcp_context(GCP_BIGQUERY_KEY)
def tearDown(self):
self.delete_gcs_bucket(DATA_EXPORT_BUCKET_NAME)
Expand Down

0 comments on commit fb1c8b8

Please sign in to comment.
  翻译: