Skip to content

Commit

Permalink
By default PIP will install all packages in .local folder (#14125)
Browse files Browse the repository at this point in the history
In order to optimize the Docker image, we use the ~/.local
folder copied from build imge (this gives huge optimisations
regarding the docker image size). So far we instructed the users
to add --user flag manually when installing any packages when they
extend the images, however this has proven to be problematic as
users rarely read the whole documentation and simply try what they
know.

This PR attempts to fix it. `PIP_USER` variable is set to `true`
in the final image, which means that the installation by default
will use ~/.local folder as target. This can be disabled by
unsetting the variable or setting it to `false`.

Also since pylint version has been released to 2.7.0, it fixes
a few pylint versions so that we can update to the latest constraints.
  • Loading branch information
potiuk authored Feb 21, 2021
1 parent beed530 commit ca35bd7
Show file tree
Hide file tree
Showing 11 changed files with 32 additions and 13 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,9 @@ LABEL org.apache.airflow.distro="debian" \
org.opencontainers.image.title="Production Airflow Image" \
org.opencontainers.image.description="Installed Apache Airflow"

# By default PIP will install everything in ~/.local
ARG PIP_USER="true"
ENV PIP_USER=${PIP_USER}

ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
CMD ["--help"]
1 change: 1 addition & 0 deletions airflow/kubernetes/refresh_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def _load_from_exec_plugin(self):
return True
except Exception as e: # pylint: disable=W0703
logging.error(str(e))
return None

def refresh_api_key(self, client_configuration):
"""Refresh API key if expired"""
Expand Down
2 changes: 1 addition & 1 deletion airflow/models/dagbag.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def _load_modules_from_zip(self, filepath, safe_mode):
if not might_contain_dag(zip_info.filename, safe_mode, current_zip_file):
# todo: create ignore list
# Don't want to spam user with skip messages
if not self.has_logged or True:
if not self.has_logged:
self.has_logged = True
self.log.info(
"File %s:%s assumed to contain no DAGs. Skipping.", filepath, zip_info.filename
Expand Down
6 changes: 4 additions & 2 deletions airflow/providers/amazon/aws/log/cloudwatch_task_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,15 @@ def hook(self):
from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook

return AwsLogsHook(aws_conn_id=remote_conn_id, region_name=self.region_name)
except Exception: # pylint: disable=broad-except
except Exception as e: # pylint: disable=broad-except
self.log.error(
'Could not create an AwsLogsHook with connection id "%s". '
'Please make sure that airflow[aws] is installed and '
'the Cloudwatch logs connection exists.',
'the Cloudwatch logs connection exists. Exception: "%s"',
remote_conn_id,
e,
)
return None

def _render_filename(self, ti, try_number):
# Replace unsupported log group name characters
Expand Down
6 changes: 4 additions & 2 deletions airflow/providers/amazon/aws/log/s3_task_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ def hook(self):
from airflow.providers.amazon.aws.hooks.s3 import S3Hook

return S3Hook(remote_conn_id)
except Exception: # pylint: disable=broad-except
except Exception as e: # pylint: disable=broad-except
self.log.exception(
'Could not create an S3Hook with connection id "%s". '
'Please make sure that airflow[aws] is installed and '
'the S3 connection exists.',
'the S3 connection exists. Exception : "%s"',
remote_conn_id,
e,
)
return None

def set_context(self, ti):
super().set_context(ti)
Expand Down
1 change: 1 addition & 0 deletions airflow/providers/google/cloud/operators/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ def execute(self, context):
status = e.resp.status
if status == 404:
self.log.info('The function does not exist in this project')
return None
else:
self.log.error('An error occurred. Exiting.')
raise e
Expand Down
16 changes: 10 additions & 6 deletions airflow/providers/microsoft/azure/log/wasb_task_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,15 @@ def hook(self):
from airflow.providers.microsoft.azure.hooks.wasb import WasbHook

return WasbHook(remote_conn_id)
except AzureHttpError:
except AzureHttpError as e:
self.log.error(
'Could not create an WasbHook with connection id "%s". '
'Please make sure that airflow[azure] is installed and '
'the Wasb connection exists.',
'the Wasb connection exists. Exception "%s"',
remote_conn_id,
e,
)
return None

def set_context(self, ti) -> None:
super().set_context(ti)
Expand Down Expand Up @@ -136,8 +138,9 @@ def wasb_log_exists(self, remote_log_location: str) -> bool:
"""
try:
return self.hook.check_for_blob(self.wasb_container, remote_log_location)
except Exception: # pylint: disable=broad-except
pass
# pylint: disable=broad-except
except Exception as e:
self.log.debug('Exception when trying to check remote location: "%s"', e)
return False

def wasb_read(self, remote_log_location: str, return_error: bool = False):
Expand All @@ -153,12 +156,13 @@ def wasb_read(self, remote_log_location: str, return_error: bool = False):
"""
try:
return self.hook.read_file(self.wasb_container, remote_log_location)
except AzureHttpError:
except AzureHttpError as e:
msg = f'Could not read logs from {remote_log_location}'
self.log.exception(msg)
self.log.exception("Message: '%s', exception '%s'", msg, e)
# return error if needed
if return_error:
return msg
return ''

def wasb_write(self, log: str, remote_log_location: str, append: bool = True) -> None:
"""
Expand Down
1 change: 1 addition & 0 deletions airflow/serialization/serialized_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ def _deserialize(cls, encoded_var: Any) -> Any: # pylint: disable=too-many-retu
elif type_ == DAT.SET:
return {cls._deserialize(v) for v in var}
elif type_ == DAT.TUPLE:
# pylint: disable=consider-using-generator
return tuple([cls._deserialize(v) for v in var])
else:
raise TypeError(f'Invalid type {type_!s} in deserialization.')
Expand Down
1 change: 1 addition & 0 deletions airflow/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ class AllowListValidator:

def __init__(self, allow_list=None):
if allow_list:
# pylint: disable=consider-using-generator
self.allow_list = tuple([item.strip().lower() for item in allow_list.split(',')])
else:
self.allow_list = None
Expand Down
5 changes: 3 additions & 2 deletions airflow/www/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3648,8 +3648,9 @@ def action_clear(self, task_instances, session=None):
flash(f"{len(task_instances)} task instances have been cleared")
self.update_redirect()
return redirect(self.get_redirect())
except Exception: # noqa pylint: disable=broad-except
flash('Failed to clear task instances', 'error')
except Exception as e: # noqa pylint: disable=broad-except
flash(f'Failed to clear task instances: "{e}"', 'error')
return None

@provide_session
def set_task_instance_state(self, tis, target_state, session=None):
Expand Down
3 changes: 3 additions & 0 deletions docs/apache-airflow/production-deployment.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ You should be aware, about a few things:
FROM apache/airflow:2.0.1
RUN pip install --no-cache-dir --user my-awesome-pip-dependency-to-add
* As of 2.0.1 image the ``--user`` flag is turned on by default by setting ``PIP_USER`` environment variable
to ``true``. This can be disabled by un-setting the variable or by setting it to ``false``.


* If your apt, or PyPI dependencies require some of the build-essentials, then your best choice is
to follow the "Customize the image" route. However it requires to checkout sources of Apache Airflow,
Expand Down

0 comments on commit ca35bd7

Please sign in to comment.
  翻译: