Skip to content
"""Build Environment used for isolation during sdist building
"""
import logging
import os
import pathlib
import site
import sys
import textwrap
from collections import OrderedDict
from types import TracebackType
from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type, Union
from pip._vendor.certifi import where
from pip._vendor.packaging.requirements import Requirement
from pip._vendor.packaging.version import Version
from pip import __file__ as pip_location
from pip._internal.cli.spinners import open_spinner
from pip._internal.locations import get_platlib, get_purelib, get_scheme
from pip._internal.metadata import get_default_environment, get_environment
from pip._internal.utils.subprocess import call_subprocess
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
if TYPE_CHECKING:
from pip._internal.index.package_finder import PackageFinder
logger = logging.getLogger(__name__)
def _dedup(a: str, b: str) -> Union[Tuple[str], Tuple[str, str]]:
return (a, b) if a != b else (a,)
class _Prefix:
def __init__(self, path: str) -> None:
self.path = path
self.setup = False
scheme = get_scheme("", prefix=path)
self.bin_dir = scheme.scripts
self.lib_dirs = _dedup(scheme.purelib, scheme.platlib)
def get_runnable_pip() -> str:
"""Get a file to pass to a Python executable, to run the currently-running pip.
This is used to run a pip subprocess, for installing requirements into the build
environment.
"""
source = pathlib.Path(pip_location).resolve().parent
if not source.is_dir():
# This would happen if someone is using pip from inside a zip file. In that
# case, we can use that directly.
return str(source)
return os.fsdecode(source / "__pip-runner__.py")
def _get_system_sitepackages() -> Set[str]:
"""Get system site packages
Usually from site.getsitepackages,
but fallback on `get_purelib()/get_platlib()` if unavailable
(e.g. in a virtualenv created by virtualenv<20)
Returns normalized set of strings.
"""
if hasattr(site, "getsitepackages"):
system_sites = site.getsitepackages()
else:
# virtualenv < 20 overwrites site.py without getsitepackages
# fallback on get_purelib/get_platlib.
# this is known to miss things, but shouldn't in the cases
# where getsitepackages() has been removed (inside a virtualenv)
system_sites = [get_purelib(), get_platlib()]
return {os.path.normcase(path) for path in system_sites}
class BuildEnvironment:
"""Creates and manages an isolated environment to install build deps"""
def __init__(self) -> None:
temp_dir = TempDirectory(kind=tempdir_kinds.BUILD_ENV, globally_managed=True)
self._prefixes = OrderedDict(
(name, _Prefix(os.path.join(temp_dir.path, name)))
for name in ("normal", "overlay")
)
self._bin_dirs: List[str] = []
self._lib_dirs: List[str] = []
for prefix in reversed(list(self._prefixes.values())):
self._bin_dirs.append(prefix.bin_dir)
self._lib_dirs.extend(prefix.lib_dirs)
# Customize site to:
# - ensure .pth files are honored
# - prevent access to system site packages
system_sites = _get_system_sitepackages()
self._site_dir = os.path.join(temp_dir.path, "site")
if not os.path.exists(self._site_dir):
os.mkdir(self._site_dir)
with open(
os.path.join(self._site_dir, "sitecustomize.py"), "w", encoding="utf-8"
) as fp:
fp.write(
textwrap.dedent(
"""
import os, site, sys
# First, drop system-sites related paths.
original_sys_path = sys.path[:]
known_paths = set()
for path in {system_sites!r}:
site.addsitedir(path, known_paths=known_paths)
system_paths = set(
os.path.normcase(path)
for path in sys.path[len(original_sys_path):]
)
original_sys_path = [
path for path in original_sys_path
if os.path.normcase(path) not in system_paths
]
sys.path = original_sys_path
# Second, add lib directories.
# ensuring .pth file are processed.
for path in {lib_dirs!r}:
assert not path in sys.path
site.addsitedir(path)
"""
).format(system_sites=system_sites, lib_dirs=self._lib_dirs)
)
def __enter__(self) -> None:
self._save_env = {
name: os.environ.get(name, None)
for name in ("PATH", "PYTHONNOUSERSITE", "PYTHONPATH")
}
path = self._bin_dirs[:]
old_path = self._save_env["PATH"]
if old_path:
path.extend(old_path.split(os.pathsep))
pythonpath = [self._site_dir]
os.environ.update(
{
"PATH": os.pathsep.join(path),
"PYTHONNOUSERSITE": "1",
"PYTHONPATH": os.pathsep.join(pythonpath),
}
)
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> None:
for varname, old_value in self._save_env.items():
if old_value is None:
os.environ.pop(varname, None)
else:
os.environ[varname] = old_value
def check_requirements(
self, reqs: Iterable[str]
) -> Tuple[Set[Tuple[str, str]], Set[str]]:
"""Return 2 sets:
- conflicting requirements: set of (installed, wanted) reqs tuples
- missing requirements: set of reqs
"""
missing = set()
conflicting = set()
if reqs:
env = (
get_environment(self._lib_dirs)
if hasattr(self, "_lib_dirs")
else get_default_environment()
)
for req_str in reqs:
req = Requirement(req_str)
# We're explicitly evaluating with an empty extra value, since build
# environments are not provided any mechanism to select specific extras.
if req.marker is not None and not req.marker.evaluate({"extra": ""}):
continue
dist = env.get_distribution(req.name)
if not dist:
missing.add(req_str)
continue
if isinstance(dist.version, Version):
installed_req_str = f"{req.name}=={dist.version}"
else:
installed_req_str = f"{req.name}==={dist.version}"
if not req.specifier.contains(dist.version, prereleases=True):
conflicting.add((installed_req_str, req_str))
# FIXME: Consider direct URL?
return conflicting, missing
def install_requirements(
self,
finder: "PackageFinder",
requirements: Iterable[str],
prefix_as_string: str,
*,
kind: str,
) -> None:
prefix = self._prefixes[prefix_as_string]
assert not prefix.setup
prefix.setup = True
if not requirements:
return
self._install_requirements(
get_runnable_pip(),
finder,
requirements,
prefix,
kind=kind,
)
@staticmethod
def _install_requirements(
pip_runnable: str,
finder: "PackageFinder",
requirements: Iterable[str],
prefix: _Prefix,
*,
kind: str,
) -> None:
args: List[str] = [
sys.executable,
pip_runnable,
"install",
"--ignore-installed",
"--no-user",
"--prefix",
prefix.path,
"--no-warn-script-location",
]
if logger.getEffectiveLevel() <= logging.DEBUG:
args.append("-v")
for format_control in ("no_binary", "only_binary"):
formats = getattr(finder.format_control, format_control)
args.extend(
(
"--" + format_control.replace("_", "-"),
",".join(sorted(formats or {":none:"})),
)
)
index_urls = finder.index_urls
if index_urls:
args.extend(["-i", index_urls[0]])
for extra_index in index_urls[1:]:
args.extend(["--extra-index-url", extra_index])
else:
args.append("--no-index")
for link in finder.find_links:
args.extend(["--find-links", link])
for host in finder.trusted_hosts:
args.extend(["--trusted-host", host])
if finder.allow_all_prereleases:
args.append("--pre")
if finder.prefer_binary:
args.append("--prefer-binary")
args.append("--")
args.extend(requirements)
extra_environ = {"_PIP_STANDALONE_CERT": where()}
with open_spinner(f"Installing {kind}") as spinner:
call_subprocess(
args,
command_desc=f"pip subprocess to install {kind}",
spinner=spinner,
extra_environ=extra_environ,
)
class NoOpBuildEnvironment(BuildEnvironment):
"""A no-op drop-in replacement for BuildEnvironment"""
def __init__(self) -> None:
pass
def __enter__(self) -> None:
pass
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> None:
pass
def cleanup(self) -> None:
pass
def install_requirements(
self,
finder: "PackageFinder",
requirements: Iterable[str],
prefix_as_string: str,
*,
kind: str,
) -> None:
raise NotImplementedError()
"""Cache Management
"""
import hashlib
import json
import logging
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Set
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
from pip._vendor.packaging.utils import canonicalize_name
from pip._internal.exceptions import InvalidWheelFilename
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.format_control import FormatControl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
from pip._internal.utils.urls import path_to_url
logger = logging.getLogger(__name__)
ORIGIN_JSON_NAME = "origin.json"
def _hash_dict(d: Dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
return hashlib.sha224(s.encode("ascii")).hexdigest()
class Cache:
"""An abstract class - provides cache directories for data from links
:param cache_dir: The root of the cache.
:param format_control: An object of FormatControl class to limit
binaries being read from the cache.
:param allowed_formats: which formats of files the cache should store.
('binary' and 'source' are the only allowed values)
"""
def __init__(
self, cache_dir: str, format_control: FormatControl, allowed_formats: Set[str]
) -> None:
super().__init__()
assert not cache_dir or os.path.isabs(cache_dir)
self.cache_dir = cache_dir or None
self.format_control = format_control
self.allowed_formats = allowed_formats
_valid_formats = {"source", "binary"}
assert self.allowed_formats.union(_valid_formats) == _valid_formats
def _get_cache_path_parts(self, link: Link) -> List[str]:
"""Get parts of part that must be os.path.joined with cache_dir"""
# We want to generate an url to use as our cache key, we don't want to
# just re-use the URL because it might have other items in the fragment
# and we don't care about those.
key_parts = {"url": link.url_without_fragment}
if link.hash_name is not None and link.hash is not None:
key_parts[link.hash_name] = link.hash
if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment
# Include interpreter name, major and minor version in cache key
# to cope with ill-behaved sdists that build a different wheel
# depending on the python version their setup.py is being run on,
# and don't encode the difference in compatibility tags.
# https://github.com/pypa/pip/issues/7296
key_parts["interpreter_name"] = interpreter_name()
key_parts["interpreter_version"] = interpreter_version()
# Encode our key url with sha224, we'll use this because it has similar
# security properties to sha256, but with a shorter total output (and
# thus less secure). However the differences don't make a lot of
# difference for our use case here.
hashed = _hash_dict(key_parts)
# We want to nest the directories some to prevent having a ton of top
# level directories where we might run out of sub directories on some
# FS.
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
return parts
def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
can_not_cache = not self.cache_dir or not canonical_package_name or not link
if can_not_cache:
return []
formats = self.format_control.get_allowed_formats(canonical_package_name)
if not self.allowed_formats.intersection(formats):
return []
candidates = []
path = self.get_path_for_link(link)
if os.path.isdir(path):
for candidate in os.listdir(path):
candidates.append((candidate, path))
return candidates
def get_path_for_link(self, link: Link) -> str:
"""Return a directory to store cached items in for link."""
raise NotImplementedError()
def get(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
"""Returns a link to a cached item if it exists, otherwise returns the
passed link.
"""
raise NotImplementedError()
class SimpleWheelCache(Cache):
"""A cache of wheels for future installs."""
def __init__(self, cache_dir: str, format_control: FormatControl) -> None:
super().__init__(cache_dir, format_control, {"binary"})
def get_path_for_link(self, link: Link) -> str:
"""Return a directory to store cached wheels for link
Because there are M wheels for any one sdist, we provide a directory
to cache them in, and then consult that directory when looking up
cache hits.
We only insert things into the cache if they have plausible version
numbers, so that we don't contaminate the cache with things that were
not unique. E.g. ./package might have dozens of installs done for it
and build a version of 0.0...and if we built and cached a wheel, we'd
end up using the same wheel even if the source has been edited.
:param link: The link of the sdist for which this will cache wheels.
"""
parts = self._get_cache_path_parts(link)
assert self.cache_dir
# Store wheels within the root cache_dir
return os.path.join(self.cache_dir, "wheels", *parts)
def get(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
candidates = []
if not package_name:
return link
canonical_package_name = canonicalize_name(package_name)
for wheel_name, wheel_dir in self._get_candidates(link, canonical_package_name):
try:
wheel = Wheel(wheel_name)
except InvalidWheelFilename:
continue
if canonicalize_name(wheel.name) != canonical_package_name:
logger.debug(
"Ignoring cached wheel %s for %s as it "
"does not match the expected distribution name %s.",
wheel_name,
link,
package_name,
)
continue
if not wheel.supported(supported_tags):
# Built for a different python/arch/etc
continue
candidates.append(
(
wheel.support_index_min(supported_tags),
wheel_name,
wheel_dir,
)
)
if not candidates:
return link
_, wheel_name, wheel_dir = min(candidates)
return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
class EphemWheelCache(SimpleWheelCache):
"""A SimpleWheelCache that creates it's own temporary cache directory"""
def __init__(self, format_control: FormatControl) -> None:
self._temp_dir = TempDirectory(
kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
globally_managed=True,
)
super().__init__(self._temp_dir.path, format_control)
class CacheEntry:
def __init__(
self,
link: Link,
persistent: bool,
):
self.link = link
self.persistent = persistent
self.origin: Optional[DirectUrl] = None
origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME
if origin_direct_url_path.exists():
self.origin = DirectUrl.from_json(origin_direct_url_path.read_text())
class WheelCache(Cache):
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache
This Cache allows for gracefully degradation, using the ephem wheel cache
when a certain link is not found in the simple wheel cache first.
"""
def __init__(
self, cache_dir: str, format_control: Optional[FormatControl] = None
) -> None:
if format_control is None:
format_control = FormatControl()
super().__init__(cache_dir, format_control, {"binary"})
self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
self._ephem_cache = EphemWheelCache(format_control)
def get_path_for_link(self, link: Link) -> str:
return self._wheel_cache.get_path_for_link(link)
def get_ephem_path_for_link(self, link: Link) -> str:
return self._ephem_cache.get_path_for_link(link)
def get(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
cache_entry = self.get_cache_entry(link, package_name, supported_tags)
if cache_entry is None:
return link
return cache_entry.link
def get_cache_entry(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Optional[CacheEntry]:
"""Returns a CacheEntry with a link to a cached item if it exists or
None. The cache entry indicates if the item was found in the persistent
or ephemeral cache.
"""
retval = self._wheel_cache.get(
link=link,
package_name=package_name,
supported_tags=supported_tags,
)
if retval is not link:
return CacheEntry(retval, persistent=True)
retval = self._ephem_cache.get(
link=link,
package_name=package_name,
supported_tags=supported_tags,
)
if retval is not link:
return CacheEntry(retval, persistent=False)
return None
@staticmethod
def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None:
origin_path = Path(cache_dir) / ORIGIN_JSON_NAME
if origin_path.is_file():
origin = DirectUrl.from_json(origin_path.read_text())
# TODO: use DirectUrl.equivalent when https://github.com/pypa/pip/pull/10564
# is merged.
if origin.url != download_info.url:
logger.warning(
"Origin URL %s in cache entry %s does not match download URL %s. "
"This is likely a pip bug or a cache corruption issue.",
origin.url,
cache_dir,
download_info.url,
)
origin_path.write_text(download_info.to_json(), encoding="utf-8")
"""Subpackage containing all of pip's command line interface related code
"""
# This file intentionally does not import submodules