Skip to content

Commit

Permalink
Add timeout and retry to the BigQueryInsertJobOperator (#22395)
Browse files Browse the repository at this point in the history
  • Loading branch information
Łukasz Wyszomirski authored Mar 21, 2022
1 parent 82d2fa7 commit 14e6b65
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 3 deletions.
7 changes: 6 additions & 1 deletion airflow/providers/google/cloud/hooks/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -1503,6 +1503,8 @@ def insert_job(
project_id: Optional[str] = None,
location: Optional[str] = None,
nowait: bool = False,
retry: Retry = DEFAULT_RETRY,
timeout: Optional[float] = None,
) -> BigQueryJob:
"""
Executes a BigQuery job. Waits for the job to complete and returns job id.
Expand All @@ -1520,6 +1522,9 @@ def insert_job(
:param project_id: Google Cloud Project where the job is running
:param location: location the job is running
:param nowait: specify whether to insert job without waiting for the result
:param retry: How to retry the RPC.
:param timeout: The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
"""
location = location or self.location
job_id = job_id or self._custom_job_id(configuration)
Expand Down Expand Up @@ -1552,7 +1557,7 @@ def insert_job(
job._begin()
else:
# Start the job and wait for it to complete and get the result.
job.result()
job.result(timeout=timeout, retry=retry)
return job

def run_with_configuration(self, configuration: dict) -> str:
Expand Down
12 changes: 11 additions & 1 deletion airflow/providers/google/cloud/operators/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

import attr
from google.api_core.exceptions import Conflict
from google.api_core.retry import Retry
from google.cloud.bigquery import DEFAULT_RETRY

from airflow.exceptions import AirflowException
from airflow.models import BaseOperator, BaseOperatorLink
Expand Down Expand Up @@ -2052,6 +2054,8 @@ class BigQueryInsertJobOperator(BaseOperator):
Service Account Token Creator IAM role to the directly preceding identity, with first
account from the list granting this role to the originating account (templated).
:param cancel_on_kill: Flag which indicates whether cancel the hook's job or not, when on_kill is called
:param result_retry: How to retry the `result` call that retrieves rows
:param result_timeout: The number of seconds to wait for `result` method before using `result_retry`
"""

template_fields: Sequence[str] = (
Expand All @@ -2075,6 +2079,8 @@ def __init__(
delegate_to: Optional[str] = None,
impersonation_chain: Optional[Union[str, Sequence[str]]] = None,
cancel_on_kill: bool = True,
result_retry: Retry = DEFAULT_RETRY,
result_timeout: Optional[float] = None,
**kwargs,
) -> None:
super().__init__(**kwargs)
Expand All @@ -2088,6 +2094,8 @@ def __init__(
self.reattach_states: Set[str] = reattach_states or set()
self.impersonation_chain = impersonation_chain
self.cancel_on_kill = cancel_on_kill
self.result_retry = result_retry
self.result_timeout = result_timeout
self.hook: Optional[BigQueryHook] = None

def prepare_template(self) -> None:
Expand All @@ -2107,6 +2115,8 @@ def _submit_job(
project_id=self.project_id,
location=self.location,
job_id=job_id,
timeout=self.result_timeout,
retry=self.result_retry,
)

@staticmethod
Expand Down Expand Up @@ -2151,7 +2161,7 @@ def execute(self, context: Any):
)
if job.state in self.reattach_states:
# We are reattaching to a job
job.result()
job.result(timeout=self.result_timeout, retry=self.result_retry)
self._handle_job_error(job)
else:
# Same job configuration so we need force_rerun
Expand Down
10 changes: 9 additions & 1 deletion tests/providers/google/cloud/operators/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from unittest.mock import MagicMock

import pytest
from google.cloud.bigquery import DEFAULT_RETRY
from google.cloud.exceptions import Conflict

from airflow.exceptions import AirflowException
Expand Down Expand Up @@ -840,6 +841,8 @@ def test_execute_success(self, mock_hook, mock_md5):
location=TEST_DATASET_LOCATION,
job_id=real_job_id,
project_id=TEST_GCP_PROJECT_ID,
retry=DEFAULT_RETRY,
timeout=None,
)

assert result == real_job_id
Expand Down Expand Up @@ -947,7 +950,10 @@ def test_execute_reattach(self, mock_hook, mock_md5):
project_id=TEST_GCP_PROJECT_ID,
)

job.result.assert_called_once_with()
job.result.assert_called_once_with(
retry=DEFAULT_RETRY,
timeout=None,
)

assert result == real_job_id

Expand Down Expand Up @@ -988,6 +994,8 @@ def test_execute_force_rerun(self, mock_hook, mock_uuid, mock_md5):
location=TEST_DATASET_LOCATION,
job_id=real_job_id,
project_id=TEST_GCP_PROJECT_ID,
retry=DEFAULT_RETRY,
timeout=None,
)

assert result == real_job_id
Expand Down

0 comments on commit 14e6b65

Please sign in to comment.
  翻译: