Skip to content

Commit

Permalink
perf: decrease the threshold in which we use the BQ Storage Read API (#…
Browse files Browse the repository at this point in the history
…1925)

* perf: decrease the threshold in which we use the BQ Storage Read API

* fix unit test

* update comment
  • Loading branch information
tswast authored May 21, 2024
1 parent 0dac714 commit eaa1a52
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
12 changes: 11 additions & 1 deletion google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,17 @@

# How many of the total rows need to be downloaded already for us to skip
# calling the BQ Storage API?
ALMOST_COMPLETELY_CACHED_RATIO = 0.333
#
# In microbenchmarks on 2024-05-21, I (tswast@) measure that at about 2 MB of
# remaining results, it's faster to use the BQ Storage Read API to download
# the results than use jobs.getQueryResults. Since we don't have a good way to
# know the remaining bytes, we estimate by remaining number of rows.
#
# Except when rows themselves are larger, I observe that the a single page of
# results will be around 10 MB. Therefore, the proportion of rows already
# downloaded should be 10 (first page) / 12 (all results) or less for it to be
# worth it to make a call to jobs.getQueryResults.
ALMOST_COMPLETELY_CACHED_RATIO = 0.833333


def _reference_getter(table):
Expand Down
10 changes: 9 additions & 1 deletion tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2307,9 +2307,17 @@ def test__is_almost_completely_cached_returns_true_with_some_rows_remaining(self
rows = [
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
{"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
{"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
{"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]},
{"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]},
{"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]},
{"f": [{"v": "Perry Masonry"}, {"v": "33"}]},
]
first_page = {"pageToken": "next-page", "rows": rows}
iterator = self._make_one(first_page_response=first_page, total_rows=6)
iterator = self._make_one(
first_page_response=first_page, total_rows=len(rows) + 1
)
self.assertTrue(iterator._is_almost_completely_cached())

def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self):
Expand Down

0 comments on commit eaa1a52

Please sign in to comment.
  翻译: