Source code for fastf1.req

"""
All HTTP requests that are performed by FastF1 go through its caching and
rate limiting system.

Caching is enabled by default in FastF1 and most of the time, you do not need
to worry about caching at all. It will simply happen automatically in the
background and speed up your programs. Disabling the cache is highly
discouraged and will generally slow down your programs.

Rate limits are applied at all times. Requests that can be served from the
cache do not count towards any rate limits. Having the cache enabled can
therefore virtually increase the rate limits.

When rate limits are exceeded, FastF1 will either...

- throttle the rate of requests, if small delays are sufficient to stay within
  the limit (soft rate limit)

- raise a :class:`fastf1.RateLimitExceededError` (hard rate limit)

"""

import collections
import datetime
import functools
import math
import os
import pickle
import re
import sys
import time
import warnings
from typing import (
    Literal,
    Optional
)

import requests
from requests_cache import CacheMixin

from fastf1.logger import get_logger


_logger = get_logger(__name__)


# A NOTE TO EVERYBODY WHO READS THIS CODE
# ##############################################
# Rate limits are defined for technical reasons.
# They are not created to simply annoy you even if they may feel annoying.
#
# Some of the APIs that FastF1 accesses are provided by individuals, free
# of charge and in their spare time. Because of that, they may have very
# limited server capacity. We should accept that and be grateful that they
# even exist in the first place.
# Other APIs may be provided by larger companies. But consequently they
# also need to cope with more traffic. We should accept their API limits as
# well.
#
# IN SHORT:
# Please do not edit API limits! If you run into API limits, it is more often
# than not the case that your code can be optimized to prevent this.
# Please optimize your code!
#
# Violating the API limits may get you or even the whole FastF1 project blocked
# from accessing a specific API. This has happened before and just causes
# unnecessary hassle for many people.


class _MinIntervalLimitDelay:
    """Ensure that there is at least a minimum delay between each request.

    Sleeps for the remaining amount of time if the last request was more recent
    than allowed by the minimum interval rule.
    """
    def __init__(self, interval: float):
        self._interval: float = interval
        self._t_last: float = 0.0

    def limit(self):
        t_now = time.time()
        if (delta := (t_now - self._t_last)) < self._interval:
            time.sleep(self._interval - delta)
            t_now += self._interval - delta
        self._t_last = t_now


class _CallsPerIntervalLimitRaise:
    """Ensures that there is a maximum number of requests within a fixed
    interval of time.

    If the maximum number of allowed requests within this interval is exceeded,
    a :class:`RateLimitExceeded` exception is raised.
    """
    def __init__(self, calls: int, interval: float, info: str):
        self._interval: float = interval
        self._timestamps = collections.deque(maxlen=calls)
        self._info = info

    def limit(self):
        self._timestamps.append(time.time())
        if len(self._timestamps) == self._timestamps.maxlen:
            if self._timestamps[0] > (time.time() - self._interval):
                raise RateLimitExceededError(self._info)


class _SessionWithRateLimiting(requests.Session):
    """Apply rate limiters to requests that match a URL pattern.
    """
    _RATE_LIMITS = {
        # limits on ergast.com
        re.compile(r"^https?://(\w+\.)?ergast\.com.*"): [
            _MinIntervalLimitDelay(0.25),
            # soft limit 4 calls/sec
            _CallsPerIntervalLimitRaise(200, 60*60, "ergast.com: 200 calls/h")
            # hard limit 200 calls/h
        ],
        # general limits on all other APIs
        re.compile(r"^https?://.+\..+"): [
            _MinIntervalLimitDelay(0.25),
            # soft limit 4 calls/sec
            _CallsPerIntervalLimitRaise(500, 60 * 60, "any API: 500 calls/h")
            # hard limit 200 calls/h
        ],
    }

    def send(self, request, **kwargs):
        # patches rate limiting into `requests.send`
        for pattern, limiters in self._RATE_LIMITS.items():
            # match url pattern
            if pattern.match(request.url):
                for lim in limiters:
                    # apply all defined limiters
                    lim.limit()

        return super().send(request, **kwargs)


class _CachedSessionWithRateLimiting(CacheMixin, _SessionWithRateLimiting):
    """Equivalent of ``requests_cache.CachedSession```but using
    :class:`_SessionWithRateLimiting` as base instead of ``requests.Session``.
    """
    pass


class _MetaCache(type):
    def __repr__(self):
        # implements __repr__ for the Cache class itself
        if self._CACHE_DIR:
            path = self._CACHE_DIR
            size = self._convert_size(self._get_size(path))
            return f"FastF1 cache ({size}) {path}"

        return "FastF1 cache - not configured"



[docs]
class Cache(metaclass=_MetaCache):
    """Pickle and requests based API cache.

    Fast-F1 will per default enable caching. While this can be disabled, it
    should almost always be left enabled to speed up the runtime of your
    scripts and to prevent exceeding the rate limit of api servers.

    The default cache directory is defined, in order of precedence, in one
    of the following ways:

    #. A call to :func:`enable_cache`
    #. The value of the environment variable ``FASTF1_CACHE``
    #. An OS dependent default cache directory

    See below for more information on default cache directories.

    The following class-level functions are used to set up, enable and
    (temporarily) disable caching.

    .. autosummary::
        enable_cache
        clear_cache
        get_cache_info
        disabled
        set_disabled
        set_enabled
        offline_mode

    The parsed API data will be saved as a pickled object.
    Raw GET and POST requests are cached in a sqlite db using the
    'requests-cache' module.

    Requests that can be served from the cache do not count towards any
    API rate limits.

    The cache has two "stages":

    - Stage 1: Caching of raw GET requests. This works for all requests.
      Cache control is employed to refresh the cached data periodically.
    - Stage 2: Caching of the parsed data. This saves a lot of time when
      running your scripts,  as parsing of the data is computationally
      expensive. Stage 2 caching is only used for some api functions.

    You can explicitly configure right at the beginning of your script:

        >>> import fastf1
        >>> fastf1.Cache.enable_cache('path/to/cache')  # doctest: +SKIP
        # change cache directory to an existing empty directory on your machine
        >>> session = fastf1.get_session(2021, 5, 'Q')
        >>> # ...

    An alternative way to set the cache directory is to configure an
    environment variable `FASTF1_CACHE`. However, this value will be
    ignored if `Cache.enable_cache()` is called.

    If no explicit location is provided, Fast-F1 will use a default location
    depending on operating system.

    - Windows: `%LOCALAPPDATA%\\\\Temp\\\\fastf1`
    - macOS: `~/Library/Caches/fastf1`
    - Linux: `~/.cache/fastf1` if `~/.cache` exists otherwise `~/.fastf1`

    Cached data can be deleted at any time to reclaim disk space. However,
    this also means you will have to redownload the same data again if you
    need which will lead to reduced performance.
    """
    _CACHE_DIR = None
    # version of the api parser code (unrelated to release version number)
    _API_CORE_VERSION = 15
    _IGNORE_VERSION = False
    _FORCE_RENEW = False

    _requests_session_cached: Optional[_CachedSessionWithRateLimiting] = None
    _requests_session: requests.Session = _SessionWithRateLimiting()
    _default_cache_enabled = False  # flag to ensure that warning about disabled cache is logged once only # noqa: E501
    _tmp_disabled = False
    _ci_mode = False

    _request_counter = 0  # count uncached requests for debugging purposes


[docs]
    @classmethod
    def enable_cache(
            cls, cache_dir: str, ignore_version: bool = False,
            force_renew: bool = False,
            use_requests_cache: bool = True):
        """Enables the API cache.

        Args:
            cache_dir: Path to the directory which should be used to store
                cached data. Path needs to exist.
            ignore_version: Ignore if cached data was created with a different
                version of the API parser (not recommended: this can cause
                crashes or unrecognized errors as incompatible data may be
                loaded)
            force_renew: Ignore existing cached data. Download data and update
                the cache instead.
            use_requests_cache: Do caching of the raw GET and POST requests.
        """
        # Allow users to use paths such as %LOCALAPPDATA%
        cache_dir = os.path.expandvars(cache_dir)

        # Allow users to use paths such as ~user or ~/
        cache_dir = os.path.expanduser(cache_dir)

        if not os.path.exists(cache_dir):
            raise NotADirectoryError("Cache directory does not exist! Please "
                                     "check for typos or create it first.")
        cls._CACHE_DIR = cache_dir
        cls._IGNORE_VERSION = ignore_version
        cls._FORCE_RENEW = force_renew
        if use_requests_cache:
            cls._requests_session_cached = _CachedSessionWithRateLimiting(
                cache_name=os.path.join(cache_dir, 'fastf1_http_cache'),
                backend='sqlite',
                allowable_methods=('GET', 'POST'),
                expire_after=datetime.timedelta(hours=12),
                cache_control=True,
                stale_if_error=True,
                filter_fn=cls._custom_cache_filter
            )
            if force_renew:
                cls._requests_session_cached.cache.clear()


    @classmethod
    def requests_get(cls, url: str, **kwargs):
        """Wraps `requests.Session().get()` with caching if enabled.

        All GET requests that require caching should be performed through this
        wrapper. Caching will be done if the module-wide cache has been
        enabled. Else, `requests.Session().get()` will be called without any
        caching.
        """
        cls._enable_default_cache()
        if (cls._requests_session_cached is None) or cls._tmp_disabled:
            cls._request_counter += 1
            return cls._requests_session.get(url, **kwargs)

        if cls._ci_mode:
            # try to return a cached response first
            resp = cls._cached_request(
                'GET', url, only_if_cached=True, **kwargs)
            # 504 indicates that no cached response was found
            if resp.status_code != 504:
                return resp

        cls._request_counter += 1
        return cls._cached_request('GET', url, **kwargs)

    @classmethod
    def requests_post(cls, url: str, **kwargs):
        """Wraps `requests.Session().post()` with caching if enabled.

        All POST requests that require caching should be performed through this
        wrapper. Caching will be done if the module-wide cache has been
        enabled. Else, `requests.Session().get()` will be called without any
        caching.
        """
        cls._enable_default_cache()
        if (cls._requests_session_cached is None) or cls._tmp_disabled:
            cls._request_counter += 1
            return cls._requests_session.post(url, **kwargs)

        if cls._ci_mode:
            # try to return a cached response first
            resp = cls._cached_request(
                'POST', url, only_if_cached=True, **kwargs)
            # 504 indicates that no cached response was found
            if resp.status_code != 504:
                return resp

        cls._request_counter += 1
        return cls._cached_request('POST', url, **kwargs)

    @classmethod
    def _cached_request(cls,
                        method: Literal['GET', 'POST'],
                        url: str,
                        **kwargs):

        if method == 'GET':
            func = cls._requests_session_cached.get
        elif method == 'POST':
            func = cls._requests_session_cached.post
        else:
            raise ValueError("Invalid method. Must be 'GET' or 'POST'.")

        # catch TypeError raised by outdated requests-cache version if the
        # cache was created with a newer version
        # github.com/requests-cache/requests-cache/issues/973
        try:
            response = func(url, **kwargs)
        except TypeError:
            warnings.warn("You are using an outdated version of "
                          "requests-cache. Consider upgrading.", UserWarning)
            cls._requests_session_cached.cache.delete(urls=[url])
            response = func(url, **kwargs)
        return response

    @classmethod
    def delete_response(cls, url):
        """Deletes a single cached response from the cache, if caching is
        enabled. If caching is not enabled, this call is ignored."""
        if cls._requests_session_cached is not None:
            cls._requests_session_cached.cache.delete(urls=[url])

    @staticmethod
    def _custom_cache_filter(response: requests.Response):
        # this function provides custom filtering to decide which responses
        # get cached

        # workaround for Ergast returning error with status code 200
        if 'Unable to select database' in response.text:
            return False

        return True


[docs]
    @classmethod
    def clear_cache(cls, cache_dir=None, deep=False):
        """Clear all cached data.

        Deletes all files in the cache directory. By default, it will clear
        the default cache directory. However, if a cache directory is
        provided as an argument this will be cleared instead. Optionally,
        the requests cache can be cleared too.

        Can be called without enabling the cache first.

        Deleting specific events or sessions is not supported but can be done
        manually (stage 2 cache). The cached data is structured by year, event
        and session. The structure is more or less self-explanatory. To delete
        specific events or sessions delete the corresponding folder within the
        cache directory. Deleting specific requests from the requests cache
        (stage 1) is not possible. To delete the requests cache only, delete
        the sqlite file in the root of the cache directory.

        Args:
            cache_dir (str): Path to the directory which is used to store
                cached data.
            deep (bool): Clear the requests cache (stage 1) too.
        """
        if cache_dir is None:
            if cls._CACHE_DIR is None:
                cache_dir = cls._get_default_cache_path()
            else:
                cache_dir = cls._CACHE_DIR

        # We need to expand the directory to support ~/
        cache_dir = os.path.expandvars(cache_dir)
        cache_dir = os.path.expanduser(cache_dir)
        if not os.path.exists(cache_dir):
            raise NotADirectoryError("Cache directory does not exist!")

        for dirpath, dirnames, filenames in os.walk(cache_dir):
            for filename in filenames:
                if filename.endswith('.ff1pkl'):
                    os.remove(os.path.join(dirpath, filename))

        if deep:
            cache_db_path = os.path.join(cache_dir, 'fastf1_http_cache.sqlite')
            if os.path.exists(cache_db_path):
                os.remove(cache_db_path)


    @classmethod
    def api_request_wrapper(cls, func):
        """Wrapper function for adding stage 2 caching to api functions.

        Args:
            func: function to be wrapped

        Returns:
            The wrapped function
        """

        @functools.wraps(func)
        def _cached_api_request(api_path, **func_kwargs):
            if cls._CACHE_DIR and not cls._tmp_disabled:
                # caching is enabled
                func_name = str(func.__name__)
                cache_file_path = cls._get_cache_file_path(api_path, func_name)

                if os.path.isfile(cache_file_path):
                    if cls._ci_mode:
                        # skip pickle cache in ci mode so that API parser code
                        # is always executed. Only http cache is active
                        return func(api_path, **func_kwargs)

                    # file exists already, try to load it
                    try:
                        cached = pickle.load(open(cache_file_path, 'rb'))
                    except:  # noqa: E722 (bare except)
                        # don't like the bare exception clause but who knows
                        # which dependency will raise which internal exception
                        # after it was updated
                        cached = None

                    if (cached is not None) and cls._data_ok_for_use(cached):
                        # cached data is ok for use, return it
                        _logger.info(f"Using cached data for {func_name}")
                        return cached['data']

                    else:
                        # cached data needs to be downloaded again and updated
                        _logger.info(f"Updating cache for {func_name}...")
                        data = func(api_path, **func_kwargs)

                        if data is not None:
                            cls._write_cache(data, cache_file_path)
                            _logger.info("Cache updated!")
                            return data

                        _logger.critical(
                            "A cache update is required but the data failed "
                            "to download. Cannot continue!\nYou may force to "
                            "ignore a cache version mismatch by using the "
                            "`ignore_version=True` keyword when enabling the "
                            "cache (not recommended)."
                        )
                        exit()

                else:  # cached data does not yet exist for this api request
                    _logger.info(f"No cached data found for {func_name}. "
                                 f"Loading data...")
                    data = func(api_path, **func_kwargs)
                    if data is not None:
                        cls._write_cache(data, cache_file_path)
                        _logger.info("Data has been written to cache!")
                        return data

                    _logger.critical("Failed to load data!")
                    exit()

            else:  # cache was not enabled
                if not cls._tmp_disabled:
                    cls._enable_default_cache()
                return func(api_path, **func_kwargs)

        return _cached_api_request

    @classmethod
    def _get_cache_file_path(cls, api_path, name):
        # extend the cache dir path using the api path and a file name
        # leading '/static/' is dropped form api path
        cache_dir_path = os.path.join(cls._CACHE_DIR, api_path[8:])
        if not os.path.exists(cache_dir_path):
            # create subfolders if they don't yet exist
            os.makedirs(cache_dir_path)

        file_name = name + '.ff1pkl'
        cache_file_path = os.path.join(cache_dir_path, file_name)
        return cache_file_path

    @classmethod
    def _data_ok_for_use(cls, cached):
        # check if cached data is ok or needs to be downloaded again
        if cls._FORCE_RENEW:
            return False
        elif cls._IGNORE_VERSION:
            return True
        elif cached['version'] == cls._API_CORE_VERSION:
            return True
        return False

    @classmethod
    def _write_cache(cls, data, cache_file_path, **kwargs):
        new_cached = dict(
            **{'version': cls._API_CORE_VERSION, 'data': data},
            **kwargs
        )
        with open(cache_file_path, 'wb') as cache_file_obj:
            pickle.dump(new_cached, cache_file_obj)

    @classmethod
    def _get_default_cache_path(cls):
        if sys.platform == "linux":
            # If .cache exists we will use it. Otherwise, ~/
            tmp = os.path.expanduser("~/.cache")
            if os.path.exists(tmp):
                return r"~/.cache/fastf1"
            else:
                return r"~/.fastf1"
        elif sys.platform == "darwin":
            return r"~/Library/Caches/fastf1"
        elif sys.platform == "win32":
            return r"%LOCALAPPDATA%\Temp\fastf1"
        else:
            return None

    @classmethod
    def _enable_default_cache(cls):
        if not cls._CACHE_DIR and not cls._default_cache_enabled:
            cache_dir = None
            if "FASTF1_CACHE" in os.environ:
                cache_dir = os.environ.get("FASTF1_CACHE")
            else:
                cache_dir = cls._get_default_cache_path()

            if cache_dir is not None:
                # Ensure the default cache folder exists
                cache_dir = os.path.expandvars(cache_dir)
                cache_dir = os.path.expanduser(cache_dir)
                if not os.path.exists(cache_dir):
                    try:
                        os.mkdir(cache_dir, mode=0o0700)
                    except Exception as err:
                        _logger.error(f"Failed to create cache directory "
                                      f"{cache_dir}. Error {err}")
                        raise

                # Enable cache with default
                cls.enable_cache(cache_dir)
                _logger.warning(
                    f"DEFAULT CACHE ENABLED! "
                    f"({cls._convert_size(cls._get_size(cache_dir))}) "
                    f"{cache_dir}"
                )
            else:
                # warn only once and only if cache is not enabled
                _logger.warning(
                    "\n\nNO CACHE! Api caching has not been enabled! \n\t"
                    "It is highly recommended to enable this feature for much "
                    "faster data loading!\n\t"
                    "Use `fastf1.Cache.enable_cache('path/to/cache/')`\n")

                cls._default_cache_enabled = True


[docs]
    @classmethod
    def disabled(cls):
        """Returns a context manager object that creates a context within
        which the cache is temporarily disabled.

        Example::

            with Cache.disabled():
                # no caching takes place here
                ...

        .. note::
            The context manager is not multithreading-safe
        """
        return _NoCacheContext()



[docs]
    @classmethod
    def set_disabled(cls):
        """Disable the cache while keeping the configuration intact.

        This disables stage 1 and stage 2 caching!

        You can enable the cache at any time using :func:`set_enabled`

        .. note:: You may prefer to use :func:`disabled` to get a context
            manager object and disable the cache only within a specific
            context.

        .. note::
            This function is not multithreading-safe
        """
        cls._tmp_disabled = True



[docs]
    @classmethod
    def set_enabled(cls):
        """Enable the cache after it has been disabled with
        :func:`set_disabled`.

        .. warning::
            To enable the cache it needs to be configured properly. You need
            to call :func`enable_cache` once to enable the cache initially.
            :func:`set_enabled` and :func:`set_disabled` only serve to
            (temporarily) disable the cache for specific parts of code that
            should be run without caching.

        .. note::
            This function is not multithreading-safe
        """
        cls._tmp_disabled = False



[docs]
    @classmethod
    def offline_mode(cls, enabled: bool):
        """Enable or disable offline mode.

        In this mode, no actual requests will be sent and only cached data is
        returned. This can be useful for freezing the state of the cache or
        working with an unstable internet connection.

        Args:
            enabled: sets the state of offline mode to 'enabled' (``True``)
                or 'disabled' (``False``)
        """
        if cls._requests_session_cached is None:
            cls._enable_default_cache()
        cls._requests_session_cached.settings.only_if_cached = enabled


    @classmethod
    def ci_mode(cls, enabled: bool):
        """Enable or disable CI mode.

        In this mode, cached requests will be reused even if they are expired.
        Only uncached data will actually be requested and is then cached. This
        means, as long as CI mode is enabled, every request is only ever made
        once and reused indefinitely.

        This serves two purposes. First, reduce the number of requests that is
        sent on when a large number of tests is run in parallel, potentially
        in multiple environments simultaneously. Second, make test runs more
        predictable because data usually does not change between runs.

        Additionally, the pickle cache (stage 2) is disabled completely, so
        no parsed data is cached. This means that the API parser code is
        always executed and not skipped due to caching.
        """
        cls._ci_mode = enabled


[docs]
    @classmethod
    def get_cache_info(cls) -> tuple[Optional[str], Optional[int]]:
        """Returns information about the cache directory and its size.

        If the cache is not configured, None will be returned for both the
        cache path and the cache size.

        Returns:
            A tuple of ``(path, size)`` if the cache is configured, else
            ``(None, None)``. The cache size is given in bytes.
        """
        path = cls._CACHE_DIR
        if path is not None:
            size = cls._get_size(path)
        else:
            size = None

        return path, size


    @classmethod
    def _convert_size(cls, size_bytes):  # https://stackoverflow.com/questions/5194057/better-way-to-convert-file-sizes-in-python # noqa: E501
        if size_bytes == 0:
            return "0B"
        size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
        i = int(math.floor(math.log(size_bytes, 1024)))
        p = math.pow(1024, i)
        s = round(size_bytes / p, 2)
        return f"{s} {size_name[i]}"

    @classmethod
    def _get_size(cls, start_path='.'):  # https://stackoverflow.com/questions/1392413/calculating-a-directorys-size-using-python # noqa: E501
        total_size = 0
        for dirpath, dirnames, filenames in os.walk(start_path):
            for f in filenames:
                fp = os.path.join(dirpath, f)
                # skip if it is symbolic link
                if not os.path.islink(fp):
                    total_size += os.path.getsize(fp)

        return total_size



class _NoCacheContext:
    def __enter__(self):
        Cache.set_disabled()

    def __exit__(self, exc_type, exc_val, exc_tb):
        Cache.set_enabled()


# TODO: document

[docs]
class RateLimitExceededError(Exception):
    """Raised if a hard rate limit is exceeded."""
    pass