Source code for fastf1.req

"""
All HTTP requests that are performed by FastF1 go through its caching and
rate limiting system.

Caching is enabled by default in FastF1 and most of the time, you do not need
to worry about caching at all. It will simply happen automatically in the
background and speed up your programs. Disabling the cache is highly
discouraged and will generally slow down your programs.

Rate limits are applied at all times. Requests that can be served from the
cache do not count towards any rate limits. Having the cache enabled can
therefore virtually increase the rate limits.

When rate limits are exceeded, FastF1 will either...

- throttle the rate of requests, if small delays are sufficient to stay within
  the limit (soft rate limit)

- raise a :class:`fastf1.RateLimitExceededError` (hard rate limit)

"""

import collections
import datetime
import functools
import math
import os
import pickle
import re
import sys
import time
import warnings
from typing import (
    Literal,
    Optional
)

import requests
from requests_cache import CacheMixin

from fastf1.logger import get_logger


_logger = get_logger(__name__)


# A NOTE TO EVERYBODY WHO READS THIS CODE
# ##############################################
# Rate limits are defined for technical reasons.
# They are not created to simply annoy you even if they may feel annoying.
#
# Some of the APIs that FastF1 accesses are provided by individuals, free
# of charge and in their spare time. Because of that, they may have very
# limited server capacity. We should accept that and be grateful that they
# even exist in the first place.
# Other APIs may be provided by larger companies. But consequently they
# also need to cope with more traffic. We should accept their API limits as
# well.
#
# IN SHORT:
# Please do not edit API limits! If you run into API limits, it is more often
# than not the case that your code can be optimized to prevent this.
# Please optimize your code!
#
# Violating the API limits may get you or even the whole FastF1 project blocked
# from accessing a specific API. This has happened before and just causes
# unnecessary hassle for many people.


class _MinIntervalLimitDelay:
    """Ensure that there is at least a minimum delay between each request.

    Sleeps for the remaining amount of time if the last request was more recent
    than allowed by the minimum interval rule.
    """
    def __init__(self, interval: float):
        self._interval: float = interval
        self._t_last: float = 0.0

    def limit(self):
        t_now = time.time()
        if (delta := (t_now - self._t_last)) < self._interval:
            time.sleep(self._interval - delta)
            t_now += self._interval - delta
        self._t_last = t_now


class _CallsPerIntervalLimitRaise:
    """Ensures that there is a maximum number of requests within a fixed
    interval of time.

    If the maximum number of allowed requests within this interval is exceeded,
    a :class:`RateLimitExceeded` exception is raised.
    """
    def __init__(self, calls: int, interval: float, info: str):
        self._interval: float = interval
        self._timestamps = collections.deque(maxlen=calls)
        self._info = info

    def limit(self):
        self._timestamps.append(time.time())
        if len(self._timestamps) == self._timestamps.maxlen:
            if self._timestamps[0] > (time.time() - self._interval):
                raise RateLimitExceededError(self._info)


class _SessionWithRateLimiting(requests.Session):
    """Apply rate limiters to requests that match a URL pattern.
    """
    _RATE_LIMITS = {
        # limits on ergast.com
        re.compile(r"^https?://(\w+\.)?ergast\.com.*"): [
            _MinIntervalLimitDelay(0.25),
            # soft limit 4 calls/sec
            _CallsPerIntervalLimitRaise(200, 60*60, "ergast.com: 200 calls/h")
            # hard limit 200 calls/h
        ],
        # general limits on all other APIs
        re.compile(r"^https?://.+\..+"): [
            _MinIntervalLimitDelay(0.25),
            # soft limit 4 calls/sec
            _CallsPerIntervalLimitRaise(500, 60 * 60, "any API: 500 calls/h")
            # hard limit 200 calls/h
        ],
    }

    def send(self, request, **kwargs):
        # patches rate limiting into `requests.send`
        for pattern, limiters in self._RATE_LIMITS.items():
            # match url pattern
            if pattern.match(request.url):
                for lim in limiters:
                    # apply all defined limiters
                    lim.limit()

        return super().send(request, **kwargs)


class _CachedSessionWithRateLimiting(CacheMixin, _SessionWithRateLimiting):
    """Equivalent of ``requests_cache.CachedSession```but using
    :class:`_SessionWithRateLimiting` as base instead of ``requests.Session``.
    """
    pass


class _MetaCache(type):
    def __repr__(self):
        # implements __repr__ for the Cache class itself
        if self._CACHE_DIR:
            path = self._CACHE_DIR
            size = self._convert_size(self._get_size(path))
            return f"FastF1 cache ({size}) {path}"

        return "FastF1 cache - not configured"


[docs] class Cache(metaclass=_MetaCache): """Pickle and requests based API cache. Fast-F1 will per default enable caching. While this can be disabled, it should almost always be left enabled to speed up the runtime of your scripts and to prevent exceeding the rate limit of api servers. The default cache directory is defined, in order of precedence, in one of the following ways: #. A call to :func:`enable_cache` #. The value of the environment variable ``FASTF1_CACHE`` #. An OS dependent default cache directory See below for more information on default cache directories. The following class-level functions are used to set up, enable and (temporarily) disable caching. .. autosummary:: fastf1.Cache.enable_cache fastf1.Cache.clear_cache fastf1.Cache.get_cache_info fastf1.Cache.disabled fastf1.Cache.set_disabled fastf1.Cache.set_enabled fastf1.Cache.offline_mode The parsed API data will be saved as a pickled object. Raw GET and POST requests are cached in a sqlite db using the 'requests-cache' module. Requests that can be served from the cache do not count towards any API rate limits. The cache has two "stages": - Stage 1: Caching of raw GET requests. This works for all requests. Cache control is employed to refresh the cached data periodically. - Stage 2: Caching of the parsed data. This saves a lot of time when running your scripts, as parsing of the data is computationally expensive. Stage 2 caching is only used for some api functions. You can explicitly configure right at the beginning of your script: >>> import fastf1 >>> fastf1.Cache.enable_cache('path/to/cache') # doctest: +SKIP # change cache directory to an existing empty directory on your machine >>> session = fastf1.get_session(2021, 5, 'Q') >>> # ... An alternative way to set the cache directory is to configure an environment variable `FASTF1_CACHE`. However, this value will be ignored if `Cache.enable_cache()` is called. If no explicit location is provided, Fast-F1 will use a default location depending on operating system. - Windows: `%LOCALAPPDATA%\\\\Temp\\\\fastf1` - macOS: `~/Library/Caches/fastf1` - Linux: `~/.cache/fastf1` if `~/.cache` exists otherwise `~/.fastf1` Cached data can be deleted at any time to reclaim disk space. However, this also means you will have to redownload the same data again if you need which will lead to reduced performance. """ _CACHE_DIR = None # version of the api parser code (unrelated to release version number) _API_CORE_VERSION = 14 _IGNORE_VERSION = False _FORCE_RENEW = False _requests_session_cached: Optional[_CachedSessionWithRateLimiting] = None _requests_session: requests.Session = _SessionWithRateLimiting() _default_cache_enabled = False # flag to ensure that warning about disabled cache is logged once only # noqa: E501 _tmp_disabled = False _ci_mode = False _request_counter = 0 # count uncached requests for debugging purposes
[docs] @classmethod def enable_cache( cls, cache_dir: str, ignore_version: bool = False, force_renew: bool = False, use_requests_cache: bool = True): """Enables the API cache. Args: cache_dir: Path to the directory which should be used to store cached data. Path needs to exist. ignore_version: Ignore if cached data was created with a different version of the API parser (not recommended: this can cause crashes or unrecognized errors as incompatible data may be loaded) force_renew: Ignore existing cached data. Download data and update the cache instead. use_requests_cache: Do caching of the raw GET and POST requests. """ # Allow users to use paths such as %LOCALAPPDATA% cache_dir = os.path.expandvars(cache_dir) # Allow users to use paths such as ~user or ~/ cache_dir = os.path.expanduser(cache_dir) if not os.path.exists(cache_dir): raise NotADirectoryError("Cache directory does not exist! Please " "check for typos or create it first.") cls._CACHE_DIR = cache_dir cls._IGNORE_VERSION = ignore_version cls._FORCE_RENEW = force_renew if use_requests_cache: cls._requests_session_cached = _CachedSessionWithRateLimiting( cache_name=os.path.join(cache_dir, 'fastf1_http_cache'), backend='sqlite', allowable_methods=('GET', 'POST'), expire_after=datetime.timedelta(hours=12), cache_control=True, stale_if_error=True, filter_fn=cls._custom_cache_filter ) if force_renew: cls._requests_session_cached.cache.clear()
@classmethod def requests_get(cls, url: str, **kwargs): """Wraps `requests.Session().get()` with caching if enabled. All GET requests that require caching should be performed through this wrapper. Caching will be done if the module-wide cache has been enabled. Else, `requests.Session().get()` will be called without any caching. """ cls._enable_default_cache() if (cls._requests_session_cached is None) or cls._tmp_disabled: cls._request_counter += 1 return cls._requests_session.get(url, **kwargs) if cls._ci_mode: # try to return a cached response first resp = cls._cached_request( 'GET', url, only_if_cached=True, **kwargs) # 504 indicates that no cached response was found if resp.status_code != 504: return resp cls._request_counter += 1 return cls._cached_request('GET', url, **kwargs) @classmethod def requests_post(cls, url: str, **kwargs): """Wraps `requests.Session().post()` with caching if enabled. All POST requests that require caching should be performed through this wrapper. Caching will be done if the module-wide cache has been enabled. Else, `requests.Session().get()` will be called without any caching. """ cls._enable_default_cache() if (cls._requests_session_cached is None) or cls._tmp_disabled: cls._request_counter += 1 return cls._requests_session.post(url, **kwargs) if cls._ci_mode: # try to return a cached response first resp = cls._cached_request( 'POST', url, only_if_cached=True, **kwargs) # 504 indicates that no cached response was found if resp.status_code != 504: return resp cls._request_counter += 1 return cls._cached_request('POST', url, **kwargs) @classmethod def _cached_request(cls, method: Literal['GET', 'POST'], url: str, **kwargs): if method == 'GET': func = cls._requests_session_cached.get elif method == 'POST': func = cls._requests_session_cached.post else: raise ValueError("Invalid method. Must be 'GET' or 'POST'.") # catch TypeError raised by outdated requests-cache version if the # cache was created with a newer version # github.com/requests-cache/requests-cache/issues/973 try: response = func(url, **kwargs) except TypeError: warnings.warn("You are using an outdated version of " "requests-cache. Consider upgrading.", UserWarning) cls._requests_session_cached.cache.delete(urls=[url]) response = func(url, **kwargs) return response @classmethod def delete_response(cls, url): """Deletes a single cached response from the cache, if caching is enabled. If caching is not enabled, this call is ignored.""" if cls._requests_session_cached is not None: cls._requests_session_cached.cache.delete(urls=[url]) @staticmethod def _custom_cache_filter(response: requests.Response): # this function provides custom filtering to decide which responses # get cached # workaround for Ergast returning error with status code 200 if 'Unable to select database' in response.text: return False return True
[docs] @classmethod def clear_cache(cls, cache_dir=None, deep=False): """Clear all cached data. Deletes all files in the cache directory. By default, it will clear the default cache directory. However, if a cache directory is provided as an argument this will be cleared instead. Optionally, the requests cache can be cleared too. Can be called without enabling the cache first. Deleting specific events or sessions is not supported but can be done manually (stage 2 cache). The cached data is structured by year, event and session. The structure is more or less self-explanatory. To delete specific events or sessions delete the corresponding folder within the cache directory. Deleting specific requests from the requests cache (stage 1) is not possible. To delete the requests cache only, delete the sqlite file in the root of the cache directory. Args: cache_dir (str): Path to the directory which is used to store cached data. deep (bool): Clear the requests cache (stage 1) too. """ if cache_dir is None: if cls._CACHE_DIR is None: cache_dir = cls._get_default_cache_path() else: cache_dir = cls._CACHE_DIR # We need to expand the directory to support ~/ cache_dir = os.path.expandvars(cache_dir) cache_dir = os.path.expanduser(cache_dir) if not os.path.exists(cache_dir): raise NotADirectoryError("Cache directory does not exist!") for dirpath, dirnames, filenames in os.walk(cache_dir): for filename in filenames: if filename.endswith('.ff1pkl'): os.remove(os.path.join(dirpath, filename)) if deep: cache_db_path = os.path.join(cache_dir, 'fastf1_http_cache.sqlite') if os.path.exists(cache_db_path): os.remove(cache_db_path)
@classmethod def api_request_wrapper(cls, func): """Wrapper function for adding stage 2 caching to api functions. Args: func: function to be wrapped Returns: The wrapped function """ @functools.wraps(func) def _cached_api_request(api_path, **func_kwargs): if cls._CACHE_DIR and not cls._tmp_disabled: # caching is enabled func_name = str(func.__name__) cache_file_path = cls._get_cache_file_path(api_path, func_name) if os.path.isfile(cache_file_path): if cls._ci_mode: # skip pickle cache in ci mode so that API parser code # is always executed. Only http cache is active return func(api_path, **func_kwargs) # file exists already, try to load it try: cached = pickle.load(open(cache_file_path, 'rb')) except: # noqa: E722 (bare except) # don't like the bare exception clause but who knows # which dependency will raise which internal exception # after it was updated cached = None if (cached is not None) and cls._data_ok_for_use(cached): # cached data is ok for use, return it _logger.info(f"Using cached data for {func_name}") return cached['data'] else: # cached data needs to be downloaded again and updated _logger.info(f"Updating cache for {func_name}...") data = func(api_path, **func_kwargs) if data is not None: cls._write_cache(data, cache_file_path) _logger.info("Cache updated!") return data _logger.critical( "A cache update is required but the data failed " "to download. Cannot continue!\nYou may force to " "ignore a cache version mismatch by using the " "`ignore_version=True` keyword when enabling the " "cache (not recommended)." ) exit() else: # cached data does not yet exist for this api request _logger.info(f"No cached data found for {func_name}. " f"Loading data...") data = func(api_path, **func_kwargs) if data is not None: cls._write_cache(data, cache_file_path) _logger.info("Data has been written to cache!") return data _logger.critical("Failed to load data!") exit() else: # cache was not enabled if not cls._tmp_disabled: cls._enable_default_cache() return func(api_path, **func_kwargs) return _cached_api_request @classmethod def _get_cache_file_path(cls, api_path, name): # extend the cache dir path using the api path and a file name # leading '/static/' is dropped form api path cache_dir_path = os.path.join(cls._CACHE_DIR, api_path[8:]) if not os.path.exists(cache_dir_path): # create subfolders if they don't yet exist os.makedirs(cache_dir_path) file_name = name + '.ff1pkl' cache_file_path = os.path.join(cache_dir_path, file_name) return cache_file_path @classmethod def _data_ok_for_use(cls, cached): # check if cached data is ok or needs to be downloaded again if cls._FORCE_RENEW: return False elif cls._IGNORE_VERSION: return True elif cached['version'] == cls._API_CORE_VERSION: return True return False @classmethod def _write_cache(cls, data, cache_file_path, **kwargs): new_cached = dict( **{'version': cls._API_CORE_VERSION, 'data': data}, **kwargs ) with open(cache_file_path, 'wb') as cache_file_obj: pickle.dump(new_cached, cache_file_obj) @classmethod def _get_default_cache_path(cls): if sys.platform == "linux": # If .cache exists we will use it. Otherwise, ~/ tmp = os.path.expanduser("~/.cache") if os.path.exists(tmp): return r"~/.cache/fastf1" else: return r"~/.fastf1" elif sys.platform == "darwin": return r"~/Library/Caches/fastf1" elif sys.platform == "win32": return r"%LOCALAPPDATA%\Temp\fastf1" else: return None @classmethod def _enable_default_cache(cls): if not cls._CACHE_DIR and not cls._default_cache_enabled: cache_dir = None if "FASTF1_CACHE" in os.environ: cache_dir = os.environ.get("FASTF1_CACHE") else: cache_dir = cls._get_default_cache_path() if cache_dir is not None: # Ensure the default cache folder exists cache_dir = os.path.expandvars(cache_dir) cache_dir = os.path.expanduser(cache_dir) if not os.path.exists(cache_dir): try: os.mkdir(cache_dir, mode=0o0700) except Exception as err: _logger.error(f"Failed to create cache directory " f"{cache_dir}. Error {err}") raise # Enable cache with default cls.enable_cache(cache_dir) _logger.warning( f"DEFAULT CACHE ENABLED! " f"({cls._convert_size(cls._get_size(cache_dir))}) " f"{cache_dir}" ) else: # warn only once and only if cache is not enabled _logger.warning( "\n\nNO CACHE! Api caching has not been enabled! \n\t" "It is highly recommended to enable this feature for much " "faster data loading!\n\t" "Use `fastf1.Cache.enable_cache('path/to/cache/')`\n") cls._default_cache_enabled = True
[docs] @classmethod def disabled(cls): """Returns a context manager object that creates a context within which the cache is temporarily disabled. Example:: with Cache.disabled(): # no caching takes place here ... .. note:: The context manager is not multithreading-safe """ return _NoCacheContext()
[docs] @classmethod def set_disabled(cls): """Disable the cache while keeping the configuration intact. This disables stage 1 and stage 2 caching! You can enable the cache at any time using :func:`set_enabled` .. note:: You may prefer to use :func:`disabled` to get a context manager object and disable the cache only within a specific context. .. note:: This function is not multithreading-safe """ cls._tmp_disabled = True
[docs] @classmethod def set_enabled(cls): """Enable the cache after it has been disabled with :func:`set_disabled`. .. warning:: To enable the cache it needs to be configured properly. You need to call :func`enable_cache` once to enable the cache initially. :func:`set_enabled` and :func:`set_disabled` only serve to (temporarily) disable the cache for specific parts of code that should be run without caching. .. note:: This function is not multithreading-safe """ cls._tmp_disabled = False
[docs] @classmethod def offline_mode(cls, enabled: bool): """Enable or disable offline mode. In this mode, no actual requests will be sent and only cached data is returned. This can be useful for freezing the state of the cache or working with an unstable internet connection. Args: enabled: sets the state of offline mode to 'enabled' (``True``) or 'disabled' (``False``) """ if cls._requests_session_cached is None: cls._enable_default_cache() cls._requests_session_cached.settings.only_if_cached = enabled
@classmethod def ci_mode(cls, enabled: bool): """Enable or disable CI mode. In this mode, cached requests will be reused even if they are expired. Only uncached data will actually be requested and is then cached. This means, as long as CI mode is enabled, every request is only ever made once and reused indefinitely. This serves two purposes. First, reduce the number of requests that is sent on when a large number of tests is run in parallel, potentially in multiple environments simultaneously. Second, make test runs more predictable because data usually does not change between runs. Additionally, the pickle cache (stage 2) is disabled completely, so no parsed data is cached. This means that the API parser code is always executed and not skipped due to caching. """ cls._ci_mode = enabled
[docs] @classmethod def get_cache_info(cls) -> tuple[Optional[str], Optional[int]]: """Returns information about the cache directory and its size. If the cache is not configured, None will be returned for both the cache path and the cache size. Returns: A tuple of ``(path, size)`` if the cache is configured, else ``(None, None)``. The cache size is given in bytes. """ path = cls._CACHE_DIR if path is not None: size = cls._get_size(path) else: size = None return path, size
@classmethod def _convert_size(cls, size_bytes): # https://stackoverflow.com/questions/5194057/better-way-to-convert-file-sizes-in-python # noqa: E501 if size_bytes == 0: return "0B" size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") i = int(math.floor(math.log(size_bytes, 1024))) p = math.pow(1024, i) s = round(size_bytes / p, 2) return f"{s} {size_name[i]}" @classmethod def _get_size(cls, start_path='.'): # https://stackoverflow.com/questions/1392413/calculating-a-directorys-size-using-python # noqa: E501 total_size = 0 for dirpath, dirnames, filenames in os.walk(start_path): for f in filenames: fp = os.path.join(dirpath, f) # skip if it is symbolic link if not os.path.islink(fp): total_size += os.path.getsize(fp) return total_size
class _NoCacheContext: def __enter__(self): Cache.set_disabled() def __exit__(self, exc_type, exc_val, exc_tb): Cache.set_enabled() # TODO: document
[docs] class RateLimitExceededError(Exception): """Raised if a hard rate limit is exceeded.""" pass