Skip to content

hostess

aws

aws.ec2

EBS_VOLUME_TYPES = ('gp2', 'gp3', 'io1', 'io2', 'st1', 'sc1') module-attribute

all current-generation EBS volume types

InstanceDescription = dict[str, Union[dict, str]] module-attribute

concise version of an EC2 API Instance data structure (see https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Instance.html)

InstanceIdentifier = str module-attribute

stringified IP (e.g. '111.11.11.1' or full instance id (e.g. 'i-0243d3f8g0a85cb18'), used as an instance identifier by some functions in this module.

InstanceState = Literal['running', 'stopped', 'terminated', 'stopping', 'pending', 'shutting-down'] module-attribute

valid EC2 instance state names

Cluster

Class offering an interface to multiple EC2 instances at once.

Source code in hostess/aws/ec2.py
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
class Cluster:
    """Class offering an interface to multiple EC2 instances at once."""

    def __init__(self, instances: Collection[Instance]):
        """
        Args:
            instances: Instance objects to incorporate into this Cluster.
        """
        self.instances = tuple(instances)
        self.fleet_request = None

    def _async_method_call(
        self, method_name: str, *args: Any, **kwargs: Any
    ) -> list[Any]:
        """
        Internal wrapper function: make multithreaded calls to a specified
        method of all this Cluster's Instances with shared arguments.

        Args:
             method_name: named method of Instance to call on all our Instances
             *args: args to pass to these method calls
             **kwargs: kwargs to pass to these method calls

        Returns:
            list containing results of method call from each Instance, 
                including raised Exceptions for failed calls
        """
        exc = ThreadPoolExecutor(len(self))
        futures = []
        for instance in self.instances:
            futures.append(
                exc.submit(getattr(instance, method_name), *args, **kwargs)
            )
        while not all(f.done() for f in futures):
            time.sleep(0.01)
        return [
            f.exception() if f.exception() is not None else f.result()
            for f in futures
        ]

    @staticmethod
    def _format_map_arguments(argseq, kwargseq):
        """internal method for preprocessing mapped call arguments"""
        if (argseq is None) and (kwargseq is None):
            raise TypeError("Must pass at least one of argseq or kwargseq.")
        if not any(
                map(lambda x: isinstance(x, (cycle, NoneType)), (argseq, kwargseq))
        ):
            if len(argseq) != len(kwargseq):
                raise ValueError(
                    "sequences of args and kwargs must have matching lengths."
                )
        argseq = cycle([()]) if argseq is None else argseq
        kwargseq = cycle([{}]) if kwargseq is None else kwargseq
        return argseq, kwargseq

    def _async_method_map(
        self,
        method: str,
        argseq: Optional[Union[Sequence[Sequence], cycle]] = None,
        kwargseq: Optional[Union[Sequence[Mapping[str, Any]], cycle]] = None,
        max_concurrent: int = 1,
        task_delay: float | None = None,
        poll: float = 0.03,
    ) -> ServerPool:
        """
        Internal wrapper function: make multithreaded calls to a specified
        method of this Cluster's Instances with arbitrary number and
        homogeneity of arguments.

        Args:
            method: name of method of Instance to call on Instances
            argseq: optional args to pass to these method calls -- one
                sequence of args per Instance. either `args` or `kwargs` must
                be defined.
            kwargseq: optional kwargs to pass to these method calls -- one
                `dict` or other `Mapping` of kwargs per Instance.

        Returns:
            list containing result of method call from each Instance,
                including raised Exceptions for failed calls
        """
        argseq, kwargseq = self._format_map_arguments(argseq, kwargseq)
        pool = ServerPool(self.instances, max_concurrent, poll, task_delay)
        # attempt to prevent accidentally mapping an infinite number of tasks
        if isinstance(argseq, cycle) and isinstance(kwargseq, cycle):
            pool.max_concurrent = 1
            for _, args, kwargs in zip(self.instances, argseq, kwargseq):
                pool.apply(method, args, kwargs)
        else:
            for args, kwargs in zip(argseq, kwargseq):
                pool.apply(method, args, kwargs)
        return pool

    def _async_transfer_map(
        self,
        method: str,
        argseq: Optional[Union[Sequence[Sequence], cycle]] = None,
        kwargseq: Optional[Union[Sequence[Mapping[str, Any]], cycle]] = None,
    ) -> list[Any]:
        """"
        Internal wrapper function: make multithreaded calls to a specified
        file I/O method of all this Cluster's Instances.

        Args:
            method: name of file I/O method of Instance
            argseq: optional args to pass to calls -- one sequence of args,
                one sequence of args per Instance, or a `cycle`. either
                `args` or `kwargs` must be defined.
            kwargseq: optional kwargs to pass to these method calls -- a single
                `dict` or other `Mapping`, one `Mapping` of kwargs per
                Instance, or a `cycle`.

        Returns:
            list containing result of method call from each Instance,
                including raised Exceptions for failed calls
        """
        # TODO: there may be some formatting redundancy here
        argseq, kwargseq = self._format_map_arguments(argseq, kwargseq)
        exc, futures = ThreadPoolExecutor(len(self)), []
        for args, kwargs, instance in zip(argseq, kwargseq, self.instances):
            futures.append(
                exc.submit(getattr(instance, method), *args, **kwargs)
            )
        while not all(f.done() for f in futures):
            time.sleep(0.01)
        return [
            f.exception() if f.exception() is not None else f.result()
            for f in futures
        ]

    @staticmethod
    def _dispatch_cycle_arguments(argseq, kwargseq):
        if isinstance(argseq, str):
            argseq = cycle(((argseq,),))
        elif (argseq is not None) and (len(argseq) > 0):
            if (
                isinstance(argseq[0], str)
                or not isinstance(argseq[0], Sequence)
            ):
                argseq = cycle((argseq,))
            elif not isinstance(argseq, Sequence):
                raise TypeError("Malformed argseq.")
        if isinstance(kwargseq, Mapping):
            kwargseq = cycle([kwargseq])
        return argseq, kwargseq

    @staticmethod
    def _check_exceptions(results, _permissive, _warn):
        """internal function for selective Exception-raising on async calls."""
        if (_warn is False) and (_permissive is True):
            return
        for exception in filter(lambda x: isinstance(x, Exception), results):
            if _permissive is False:
                raise exception
            if _warn is True:
                warnings.warn(f"{type(exception)}: {exception}")

    def commandmap(
        self,
        argseq: Union[str, Sequence[Any]],
        kwargseq: Optional[
            Union[Mapping[str, Any], Sequence[Mapping[str, Any]]]
        ] = None,
        wait: bool = True,
        max_concurrent: int = 1,
        task_delay: float | None = None
    ) -> Union[list[Viewer], ServerPool]:
        """
        Map a shell command or commands across this `Cluster's` `Instances`,
        asynchronously calling `Instance.command()` with optionally-variable
        args and kwargs. This method enables a wide variety of dispatch/map
        behaviors, and as such, has a very flexible signature.

        Notes:
            * Unlike `Cluster.command()`, this method blocks by default until
                all tasks have completed. If you do not wish it to block, pass
                `wait=False`, which will cause it to return a `ServerPool` you
                can later poll or join for output.
            * If neither `argseq` and `kwargseq` specify a finite number of
                tasks (e.g., `argseq` is a `str` and `kwargseq` is `None`),
                this method will execute the command once on each instance,
                much as if you had passed the same arguments to
                `Cluster.command()`.
            * If both `argseq` and `kwargseq` specify a finite number of tasks
                (e.g., `argseq` is a `list` of `tuples` and `kwargseq` is a
                `list` of `dicts`), they must have equal length.
            * Task order is always preserved in output, but if the number of
                tasks is greater than `len(self) * max_concurrent` (e.g.,
                `argseq` is a `list` of 30 `tuples`, `max_concurrent` is 1,
                and this `Cluster` has 4 `Instances`), there is no guarantee
                that tasks past the first `len(self) * max_concurrent` tasks
                will execute on any particular instance -- the underlying
                `ServerPool` will dispatch pending tasks as instances complete
                older ones. First come, first serves.
            * This method ignores the `_viewer=False` meta-option. It always
                returns either `Viewers` or a `ServerPool` that creates
                `Viewers`.
            * This method ignores the `_disown=True` meta-option.

        Args:
            argseq: Positional argument(s). May be:

                1. A sequence of sequences of args, like:
                    `[("ls", "/home"), ...]`; each of its elements will be
                    `*`-splatted into a single `Instance.command()` call.
                2. A single sequence of args, like `("ls", "/home")`. This will
                    be `*`-splatted into every `Instance.command()` call.
                3. A single string, like `"ls"`; this string will be passed
                    directly to every `Instance.command()` call.
            kwargseq: Optional keyword argument(s). May be:

                1. A sequence of mappings of kwargs, like:
                    `[{'-a': True, '-l': False}, ...]`; each of its elements
                    will be `**`-splatted into a single `command()` call.
                2. A single mapping of kwargs, like:
                    `{'-a': True, '-l': False}`; these kwargs will be
                    `**`-splatted into every `command()` call.
                3. `None`: no kwargs for anyone.
            wait: if `False`, return a `ServerPool` object that asynchronously
                polls the running processes. Otherwise, block until all
                processes complete and return a list of `Viewers`.
            max_concurrent: maximum number of commands to simultaneously run
                on each instance.
            task_delay: optional minimum interval, in seconds, between which
                subsequent tasks may be assigned to any one instance.

        Returns:
            If `wait` is `True`, a list of `Viewers` produced from
                `Instance.command()` executions. If `wait` is
                `False`, a `ServerPool` object that can be used to interact
                with and retrieve the results of the mapped commands.
        """
        argseq, kwargseq = self._dispatch_cycle_arguments(argseq, kwargseq)
        pool = self._async_method_map(
            "command", argseq, kwargseq, max_concurrent, task_delay
        )
        pool.close()
        if wait is False:
            return pool
        return pool.gather()

    def pythonmap(
        self,
        argseq: Union[str, Sequence[Any]],
        kwargseq: Optional[
            Union[Mapping[str, Any], Sequence[Mapping[str, Any]]]
        ] = None,
        wait: bool = True,
        max_concurrent: int = 1,
        task_delay: float | None = None
    ) -> Union[list[Union[Viewer, Exception]], ServerPool]:
        """
        Map Python calls across this `Cluster's` `Instances`, asynchronously
        calling `Instance.call_python()` with optionally-variable args and
        kwargs. This method has the same flexible calling conventions as
        `Cluster.commandmap()`; refer to that method's documentation for more
        detail.

        Args:
            argseq: Positional argument(s) to `Instance.call_python()`.
            kwargseq: Optional keyword argument(s) to `Instance.call_python()`.
            wait: if `False`, return a `ServerPool` object that asynchronously
                polls the running processes. Otherwise, block until all
                processes complete and return a list of `Viewers`.
            max_concurrent: maximum number of calls to simultaneously perform
                on each instance.
            task_delay: optional minimum interval, in seconds, between which
                subsequent calls may be assigned to any one instance.


        Returns:
            If `wait` is `True`, a list of `Viewers` produced from
                `Instance.call_python()` calls.  If `wait` is `False`, a
                `ServerPool` object that can be used to interact with and
                retrieve the results of the mapped calls.
        """
        argseq, kwargseq = self._dispatch_cycle_arguments(argseq, kwargseq)
        pool = self._async_method_map(
            "call_python", argseq, kwargseq, max_concurrent, task_delay
        )
        pool.close()
        if wait is False:
            return pool
        return pool.gather()

    def command(
        self,
        command: str,
        *args: Union[str, int, float],
        _permissive: bool = False,
        _warn: bool = True,
        **kwargs: bool
    ) -> list[Union[Processlike, Exception]]:
        """
        Call a shell command on all this Cluster's Instances. See
        `Instance.command()` for further documentation.

        Args:
            command: command name/string
            _permissive: if False, raise first Exception encountered, if any.
            _warn: if True and `permissive` is True, raise a UserWarning for
                each encountered Exception.
            *args: args to pass to `Instance.command()`
            **kwargs: kwargs to pass to `Instance.command()`.

        Returns:
            list containing result of `command()` from each Instance, 
                including raised Exceptions for failed calls if `permissive`
                is True
        """
        results = self._async_method_call("command", command, *args, **kwargs)
        self._check_exceptions(results, _permissive, _warn)
        return results

    def con(
        self,
        command: str,
        *args: Union[str, int, float],
        _permissive: bool = False,
        _warn: bool = True,
        **kwargs: bool
    ) -> list[Optional[Viewer]]:
        """
        Run a command 'console-style' on all this cluster's instances. See
        `Instance.con()` for further documentation. Note that this doesn't
        perform any kind of managed separation of outputs from different
        instances, so it can get pretty visually messy for commands that write
        to stdout/stderr multiple times.

        Args:
            command: command name/string
            _permissive: if False, raise first Exception encountered, if any.
            _warn: if True and `permissive` is True, raise a UserWarning for
                each encountered Exception.
            *args: args to pass to `Instance.con()`
            **kwargs: kwargs to pass to `Instance.con()`.

        Returns:
            list containing results of `con()` from each Instance, including
                raised Exceptions for failed calls if `permissive` is True
        """
        results = self._async_method_call("con", command, *args, **kwargs)
        self._check_exceptions(results, _permissive, _warn)
        return results

    def commands(
        self,
        commands: Sequence[str],
        op: Literal["and", "xor", "then"] = "then",
        _con: bool = False,
        _permissive: bool = False,
        _warn: bool = True,
        **kwargs: bool,
    ) -> list[Union[Processlike, Exception]]:
        """
        Call a sequence of shell commands on all this Cluster's Instances. See
        `Instance.commands()` for further documentation.

        Args:
            commands: command names/strings
            op: logical operator to connect commands.
            _con: run 'console-style', pretty-printing rather than
                returning output (will look message with lots of Instances)
            _permissive: if False, raise first Exception encountered, if any.
            _warn: if True and `permissive` is True, raise a UserWarning for
                each encountered Exception.
            **kwargs: kwargs to pass to `Instance._ssh()`.
                Only meta-options are recommended.

        Returns:
            list containing result of `commands()` from each Instance, 
                including raised Exceptions for failed calls if `permissive`
                is True
        """
        results = self._async_method_call(
            "commands", commands, op, _con, **kwargs
        )
        self._check_exceptions(results, _permissive, _warn)
        return results

    def call_python(
        self,
        module: str,
        func: Optional[str] = None,
        payload: Any = None,
        _permissive: bool = False,
        _warn: bool = True,
        **kwargs: Union[
            bool,
            str,
            CallerCompressionType,
            CallerSerializationType,
            CallerUnpackingOperator,
        ],
    ) -> list[Processlike]:
        """
        Call a Python function on all this Cluster's Instances. See
        `Instance.call_python()` for further documentation.

        Args:
            module: name of, or path to, the target module
            func: name of the function to call.
            payload: object from which to constrct func's call arguments.
            _permissive: if False, raise first Exception encountered, if any.
            _warn: if True and `permissive` is True, raise a 
                UserWarning for each encountered Exception.
            **kwargs: kwargs to pass to `Instance.call_python()`

        Returns:
            list containing results of `call_python()` from each Instance,
                including raised Exceptions for failed calls if `permissive`
                is True
        """
        results = self._async_method_call(
            "call_python", module, func, payload, **kwargs
        )
        self._check_exceptions(results, _permissive, _warn)
        return results

    def connect(self, maxtries: int = 10, delay: float = 1):
        """
        establish SSH connections to all instances, prepping new connections
        when none currently exist, but not replacing existing ones.

        Args:
            maxtries: maximum times to re-attempt failed connections
            delay: how many seconds to wait after failed attempts
        """
        return self._async_method_call(
            "_prep_connection", lazy=False, maxtries=maxtries, delay=delay
        )

    def update(self) -> list:
        """update basic information for instances."""
        return self._async_method_call("update")

    def start(self, *args, **kwargs) -> list:
        """
        Start all Instances. See `Instance.start()` for further documentation,
        including valid arguments.

        Returns:
            list containing results of `start()` from each Instance.
        """
        return self._async_method_call("start", *args, **kwargs)

    def stop(self, *args, **kwargs) -> list:
        """
        Stop all Instances. See `Instance.stop()` for further documentation,
        including valid arguments.

        Returns:
            list containing results of `stop()` from each Instance.
        """
        return self._async_method_call("stop", *args, **kwargs)

    def terminate(self, *args, **kwargs) -> list:
        """
        Terminate all Instances. See `Instance.terminate()` for further
        documentation  / valid arg and kwargs.

        Returns:
            list containing results of `terminate()` from each Instance.
        """
        return self._async_method_call("terminate", *args, **kwargs)

    def put(
        self,
        source: Union[str, Path, IO, bytes],
        target: Union[str, Path],
        *args: Any,
        literal_str: bool = False,
        _permissive: bool = False,
        _warn: bool = True,
        **kwargs: Any,
    ) -> list[Union[dict, Exception]]:
        """
        write local files or in-memory data to target files on instances.

        Args:
            source: filelike object, path to local file, string, or bytestring
                (shared between all instances), or a sequence of such objects,
                one per instance. note that if this is a single filelike
                object, it will be read into memory and closed before sending
                its contents to the instances.
            target: write path (shared between all instances), or a sequence
                of write paths, one per instance.
            _permissive: if False, raise first Exception encountered, if any.
            _warn: if True and `permissive` is True, raise a UserWarning for
                each encountered Exception.
            args: additional arguments to pass to underlying put method
            literal_str: if True and `source` is a `str`, write `source`
                into `target` as text rather than interpreting `source` as a
                path to a local file.
            **kwargs: kwargs to pass to underlying get method

        Returns:
            list of dicts giving transfer metadata: local, remote, host, port,
                including Exceptions for falled puts if _permissive is True.
        """
        if isinstance(target, (str, Path)):
            target = cycle((target,))
        elif len(target) != len(self.instances):
            raise ValueError(
                "a sequence of targets must have the same length as instances"
            )
        # upstream implementation makes it impossible to reuse buffers
        if isinstance(source, IO):
            contents = cycle((source.read(),))
            source.close()
            source = contents
        elif isinstance(source, (str, Path, bytes)):
            source = cycle((source,))
        elif len(source) != len(self.instances):
            raise ValueError(
                "a sequence of sources must have the same length as instances"
            )
        kwargs['literal_str'] = literal_str
        results = self._async_transfer_map(
            "put",
            # we need self.instances to bound length; s & t can both be cycles.
            [(s, t, *args) for s, t, _ in zip(source, target, self.instances)],
            cycle((kwargs,))
        )
        self._check_exceptions(results, _permissive, _warn)
        return results

    def get(
        self,
        source: Union[Sequence[Union[str, Path]], str, Path],
        target: Union[str, Path, IO, Sequence[Union[str, Path, IO]]],
        _permissive: bool = False,
        _warn: bool = True,
        **kwargs: Any,
    ) -> list[Union[dict, Exception]]:
        """
        copy files from instances to local.

        Args:
            source: path to file (shared between all instances), or a sequence
                of paths to files on instances, one per instance
            target: path to local file, or a filelike object (such as
                io.BytesIO), or a sequence of such things, one per instance.
                if `target` is a path to a local file, one separate file, with
                incrementing suffixes, will be written per instance.
            _permissive: if False, raise first Exception encountered, if any.
            _warn: if True and `permissive` is True, raise a UserWarning for
                each encountered Exception.
            **kwargs: kwargs to pass to underlying get method

        Returns:
            list of dicts giving transfer metadata: local, remote, host, port,
                including Exceptions for falled gets if _permissive is True.
        """
        if isinstance(target, (str, Path)):
            t = Path(target)
            target = [
                f"{t.absolute().parent}/{t.stem}_{i}{t.suffix}"
                for i in range(len(self))
            ]
        elif len(target) != len(self.instances):
            raise ValueError(
                "a sequence of targets must have the same length as instances"
            )
        if isinstance(source, (str, Path)):
            source = cycle((source,))
        elif len(source) != len(self.instances):
            raise ValueError(
                "a sequence of sources must have the same length as instances"
            )
        results = self._async_transfer_map(
            "get", [(s, t) for s, t in zip(source, target)], cycle((kwargs,))
        )
        self._check_exceptions(results, _permissive, _warn)
        return results

    # TODO: a bit messy
    def read(
        self,
        source: Union[Sequence[Union[str, Path]], str, Path],
        mode: Union[Literal["r", "rb"], Sequence[Literal["r", "rb"]]] = "r",
        encoding: str = "utf-8",
        as_buffer: bool = False,
        concatenate: bool = False,
        separator: Optional[Union[str, bytes]] = None,
        _permissive: bool = True,
        _warn: bool = True,
        **kwargs: Any,
    ) -> Union[
        io.BytesIO,
        io.StringIO,
        bytes,
        str,
        Sequence[Union[io.BytesIO, io.StringIO, bytes, str]],
    ]:
        """
        read files from instances directly into memory.

        Args:
            source: path to file (same path on all instances), or a sequence
                of paths to files on instances, one per instance
            mode: "r" for text, "rb" for binary, or a sequence of those, one
                per instance. must be a single value if `concatenate` is True.
            encoding: encoding for text files. ignored when mode is "rb".
            as_buffer: if True, return BytesIO/StringIO instead of bytes/str
            concatenate: if True, concatenate contents of all files into a
                single object (preserving order), rather than returning them
                as a list
            separator: if `concatenate` is True, separate results from
                different files with this string/bytestring. ignored if
                `concatenate` is False. if None, just stick them together.
            _permissive: if False, raise the first Exception encountered, if 
                any. When True, and concatenating, simply concatenate only 
                successful reads (meaning Exceptions will be discarded).
            _warn: if True and `permissive` is True, raise a UserWarning for
                each encountered Exception.
            **kwargs: kwargs to pass to underlying get method

        Returns:
            if `concatenate` is False: a list of contents, one element per
                instance, including raised Exceptions for failed calls if
                `permissive` is True. if `concatenate` is True: a single object
                containing concatenated contents, with failed reads omitted
                if `permissive` is True. (This will be a 0-length
                string/bytestring/buffer if all reads failed.)  type of
                elements/concatenated object depends on `mode` and `as_buffer`.
        """
        if concatenate is True and not isinstance(mode, str):
            raise TypeError("specify a single mode if concatenate is True.")
        if isinstance(source, (str, Path)):
            source = cycle((source,))
        elif len(source) != len(self.instances):
            raise ValueError(
                "a sequence of sources must have the same length as instances"
            )
        if isinstance(mode, str):
            mode = cycle((mode,))
        elif len(mode) != len(self.instances):
            raise ValueError(
                "a sequence of modes must have the same length of instances"
            )
        results = self._async_transfer_map(
            "read",
            [
                (s, m, encoding, True)
                for s, m, _ in zip(source, mode, self.instances)
            ],
            cycle((kwargs,)),
        )
        self._check_exceptions(results, _permissive, _warn)
        if concatenate is False:
            if as_buffer is True:
                return results
            return [
                r if isinstance(r, Exception) else r.read() for r in results
            ]
        output = [r.read() for r in results if not isinstance(r, Exception)]
        if separator is None:
            if next(mode) == "rb":
                separator = b""
            else:
                separator = ""
        if next(mode) == "rb" and isinstance(separator, str):
            separator = separator.encode(encoding)
        if next(mode) == "r" and isinstance(separator, bytes):
            separator = separator.decode(encoding)
        output = separator.join(output)
        if as_buffer is False:
            return output
        if mode == "rb":
            # noinspection PyTypeChecker
            return io.BytesIO(output)
        return io.StringIO(output)

    def read_csv(
        self,
        source: Union[Sequence[Union[str, Path]], str, Path],
        encoding: str = "utf-8",
        add_identifiers: bool = True,
        reset_index: bool = True,
        _permissive: bool = False,
        _warn: bool = False,
        **csv_kwargs: Any,
    ) -> pd.DataFrame:
        """
        read CSV-like files from all instances and concatenate them along the 
            column axis into a pandas DataFrame, optionally adding identifier
            columns.

        Args:
            source: path to CSV-like file (same path on all instances), or
                a sequence of such paths, one per instance
            encoding: text encoding for CSV-like files
            add_identifiers: if True, add "name" and "id" columns to the
                returned DataFrame indicating which instances produced which
                rows.
            reset_index: if True, reset index and drop original indices.
            _permissive: if False, raise first Exception encountered during
                read, if any. When True, simply ignore read Exceptions,
                meaning the `DataFrame` will contain no output from instances
                on which calls failed and all Exceptions will be discarded.
                (This will result in an empty `DataFrame` if all reads failed.)
                This option does *not* suppress Exceptions encountered during
                `DataFrame` construction or concatenation.
            _warn: if True and `permissive` is True, raise a UserWarning for
                each encountered read Exception.
            **csv_kwargs: kwargs to pass to pd.read_csv

        Returns:
            A `DataFrame` containing concatenated contents of all files,
                optionally including identifying information for source
                instances.
        """
        buffers = self.read(
            source,
            encoding=encoding,
            _permissive=_permissive,
            _warn=_warn,
            as_buffer=True
        )
        frames = []
        for buffer, instance in zip(buffers, self.instances):
            if isinstance(buffer, Exception):
                continue
            df = pd.read_csv(buffer, **csv_kwargs)
            if add_identifiers is True:
                df[['name', 'id']] = instance.name, instance.instance_id
            frames.append(df)
        concatenated = pd.concat(frames)
        if reset_index is False:
            return concatenated
        return concatenated.reset_index(drop=True)

    def install_conda(
        self,
        installer_url: str = CONDA_DEFAULTS['installer_url'],
        prefix: str = CONDA_DEFAULTS['prefix'],
        _permissive: bool = False,
        _warn: bool = False,
        **kwargs: bool
    ) -> list[Union[Processlike, Exception]]:
        """
        install a Conda Python distribution on all instances.

        Args:
            installer_url: url of install script; by default, the latest
                miniforge3 Linux x86_64 installer.
            prefix: path for Conda installation. If a Conda installation
                already exists at this path, it will be updated. Defaults
                to $HOME/miniconda3.
            _permissive: if False, raise first Exception encountered, if any.
            _warn: if True and `permissive` is True, raise a UserWarning for
                each encountered Exception.
            kwargs: kwargs to pass to `Instance.commands()`. Only meta-options
                are recommended.

        Returns:
            List containing outputs of `Instance.commands()` for installer
                script fetch/execution, including Exceptions for failed
                calls if `_permissive` is True.
        """
        results = self._async_method_call(
            "install_conda", installer_url, prefix, **kwargs
        )
        self._check_exceptions(results, _permissive, _warn)
        return results

    @classmethod
    def from_descriptions(
        cls,
        descriptions: Collection[InstanceDescription],
        **kwargs: Union[
            str,
            Path,
            botocore.client.BaseClient,
            boto3.resources.base.ServiceResource,
            boto3.Session,
            bool,
        ],
    ) -> "Cluster":
        """
        Construct a Cluster from InstanceDescriptions, as produced by
        `ls_instances()`.

        Args:
            descriptions: InstanceDescriptions to initialize Instances from
                and subsequently collect into a Cluster.
            **kwargs: kwargs to pass to the Instance constructor

        Returns:
            a Cluster including an Instance for each description.
        """
        instances = [Instance(d, **kwargs) for d in descriptions]
        return cls(instances)

    @classmethod
    def launch(
        cls,
        count: int,
        template: Optional[str] = None,
        options: Optional[dict] = None,
        tags: Optional[dict] = None,
        client: Optional[botocore.client.BaseClient] = None,
        session: Optional[boto3.Session] = None,
        wait: bool = True,
        connect: bool = False,
        maxtries: int = 40,
        increment_names: bool = True,
        verbose: bool = True,
        **instance_kwargs: Union[
            str,
            botocore.client.BaseClient,
            boto3.resources.base.ServiceResource,
            boto3.Session,
            Path,
            bool,
        ],
    ) -> "Cluster":
        """
        Launch a fleet of Instances and collect them into a Cluster. See
        hostess documentation Notebooks for examples of how to construct
        options, tags, etc.

        Args:
            count: number of instances to launch
            template: name of preexisting EC2 launch template to construct
                instances from. if not specified, will use a 'scratch'
                template.
            options: optional dict of specifications for Instances. if not
                specified, will use default values for everything. Currently,
                if a template is specified, it will override all values in
                options. This will change in the future.
            tags: optional tags to apply to the launched instances and their
                associated EBS volumes (if any).
            client: optional preexisting EC2 client.
            session: optional preexisting boto session.
            wait: if True, block after launch until all instances are running.
                overrides connect=True if False.
            connect: if True, block after launch until all instances are
                connectable.
            maxtries: how many times to try to connect to the instances if
                connect=True (waiting 1s between each attempt)
            increment_names: if True and count > 1, suffix incrementing
                integers to the names of each instance in the Cluster,
                and, if they had Name tags already, add a "ClusterName" tag
                indicating the base name
            verbose: if True, print launch progress to stdout
            **instance_kwargs: kwargs to pass to the Instance constructor.

        Returns:
            a Cluster created from the newly-launched fleet.
        """
        print_ = print if verbose is True else lambda *_, **__: None
        if count < 1:
            raise ValueError(f"count must be >= 1.")
        client = init_client("ec2", client, session)
        options = {} if options is None else options
        options["verbose"] = verbose
        # TODO: add a few more conveniences, clean up
        if instance_kwargs.get("type_") is not None:
            options["instance_type"] = instance_kwargs.pop("type_")
        if instance_kwargs.get("name") is not None:
            options["instance_name"] = instance_kwargs.pop("name")
        if template is None:
            using_scratch_template = True
            template = create_launch_template(**options)["LaunchTemplateName"]
            if options.get("image_id") is None:
                # we're always using a stock Canonical image in this case, so
                # note that we're forcing uname to 'ubuntu':
                print_(
                    "Using stock Canonical image, so setting uname to "
                    "'ubuntu'."
                )
                instance_kwargs["uname"] = "ubuntu"
        else:
            using_scratch_template = False
        if tags is not None:
            tagrecs = [{"Key": k, "Value": v} for k, v in tags.items()]
            tag_kwarg = {
                "TagSpecifications": [
                    {"ResourceType": "instance", "Tags": tagrecs},
                    {"ResourceType": "volume", "Tags": tagrecs},
                ]
            }
        else:
            tag_kwarg = {}
        try:
            fleet = client.create_fleet(
                LaunchTemplateConfigs=[
                    {
                        "LaunchTemplateSpecification": {
                            "LaunchTemplateName": template,
                            "Version": "$Default",
                        }
                    }
                ],
                TargetCapacitySpecification={
                    "TotalTargetCapacity": count,
                    "OnDemandTargetCapacity": count,
                    "DefaultTargetCapacityType": "on-demand",
                },
                Type="instant",
                **tag_kwarg,
            )
        finally:
            if using_scratch_template is True:
                client.delete_launch_template(LaunchTemplateName=template)
        # note that we do not want to raise these all the time, because the
        # API frequently dumps a lot of harmless info in here.
        launch_errors = len(fleet.get("Errors", []))
        try:
            n_instances = len(fleet["Instances"][0]["InstanceIds"])
            assert n_instances > 0
        except (KeyError, IndexError, AssertionError):
            raise ValueError(
                f"No instances appear to have launched. "
                f"Client returned error(s):\n\n{launch_errors}"
            )
        if n_instances != count:
            warnings.warn(
                f"fewer instances appear to have launched than "
                f"requested ({n_instances} vs. {count}). Check the 'Errors' "
                f"key of this Cluster's 'fleet_request' attribute."
            )

        def instance_hook():
            return _instances_from_ids(
                fleet["Instances"][0]["InstanceIds"],
                client=client,
                **instance_kwargs,
            )

        instances = []
        for _ in range(5):
            try:
                instances = instance_hook()
                break
            except botocore.exceptions.ClientError as ce:
                if "does not exist" in str(ce):
                    time.sleep(0.2)
            raise TimeoutError(
                "launched instances successfully, but unable to run "
                "DescribeInstances. Perhaps permissions are wrong. "
                "Reported instance ids:\n"
                + "\n".join(fleet["Instances"][0]["InstanceIds"])
            )
        cluster = Cluster(instances)
        cluster.fleet_request = fleet
        noun = "fleet" if count > 1 else "instance"
        # TODO: async?
        if (increment_names is True) and (count > 1):
            if (basename := instances[0].tags.get("Name")) is not None:
                client.create_tags(
                    Resources=[i.instance_id for i in instances],
                    Tags=[{"Key": "ClusterName", "Value": basename}],
                )
            for i, instance in enumerate(instances):
                instance.rename(instance.tags.get("Name", "") + str(i))
        if wait is False:
            print_(f"launched {noun}; wait=False passed, not checking status")
            return cluster
        if connect is True:
            print_(f"launched {noun}; waiting until connectable")
        else:
            print_(f"launched {noun}; waiting until running")
        # TODO: also async?
        for instance in cluster.instances:
            if connect is False:
                instance.wait_until_running()
                print_(f"{instance} is running")
            else:
                instance.wait_on_connection(maxtries)
                print_(f"connected to {instance}")
        return cluster

    def price_per_hour(self):
        prices = [i.price_per_hour() for i in self.instances]
        return {
            "running": sum(p["running"] for p in prices),
            "stopped": sum(p["stopped"] for p in prices),
        }

    def rebase_ssh_ingress_ip(
        self,
        ip: Optional[str] = None,
        force: bool = False,
        revoke: bool = True,
    ) -> list[None]:
        """
        Modify all security groups associated with all of this Cluster's
        instances to permit SSH access from an IP IMPORTANT: by default,
        this method revokes all other inbound access permissions, because it
        is good security practice to not slowly whitelist the entire world.
        Pass `revoke=False` if there are permissions you need to retain.

        Args:
            ip: permit SSH access from this IP. if None, use the caller's
                external IP.
            force: if True, will force modification even of default security
                groups.
            revoke: if True, will revoke all other inbound permissions.

        Returns:
            list of None (since the Instance method doesn't return anything)
        """
        return self._async_method_call(
            "rebase_ssh_ingress_ip", ip=ip, force=force, revoke=revoke
        )

    def __getitem__(self, item):
        if isinstance(item, (int, slice)):
            return self.instances[item]
        for attr in ("name", "ip", "instance_id"):
            matches = [i for i in self.instances if getattr(i, attr) == item]
            if len(matches) == 1:
                return matches[0]
            if len(matches) > 0:
                return matches
        raise KeyError

    def __repr__(self):
        return "\n".join([inst.__repr__() for inst in self.instances])

    def __str__(self):
        return "\n".join([inst.__str__() for inst in self.instances])

    def __len__(self):
        return len(self.instances)
__init__(instances)

Parameters:

Name Type Description Default
instances Collection[Instance]

Instance objects to incorporate into this Cluster.

required
Source code in hostess/aws/ec2.py
1295
1296
1297
1298
1299
1300
1301
def __init__(self, instances: Collection[Instance]):
    """
    Args:
        instances: Instance objects to incorporate into this Cluster.
    """
    self.instances = tuple(instances)
    self.fleet_request = None
_async_method_call(method_name, *args, **kwargs)

Internal wrapper function: make multithreaded calls to a specified method of all this Cluster's Instances with shared arguments.

Parameters:

Name Type Description Default
method_name str

named method of Instance to call on all our Instances

required
*args Any

args to pass to these method calls

()
**kwargs Any

kwargs to pass to these method calls

{}

Returns:

Type Description
list[Any]

list containing results of method call from each Instance, including raised Exceptions for failed calls

Source code in hostess/aws/ec2.py
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
def _async_method_call(
    self, method_name: str, *args: Any, **kwargs: Any
) -> list[Any]:
    """
    Internal wrapper function: make multithreaded calls to a specified
    method of all this Cluster's Instances with shared arguments.

    Args:
         method_name: named method of Instance to call on all our Instances
         *args: args to pass to these method calls
         **kwargs: kwargs to pass to these method calls

    Returns:
        list containing results of method call from each Instance, 
            including raised Exceptions for failed calls
    """
    exc = ThreadPoolExecutor(len(self))
    futures = []
    for instance in self.instances:
        futures.append(
            exc.submit(getattr(instance, method_name), *args, **kwargs)
        )
    while not all(f.done() for f in futures):
        time.sleep(0.01)
    return [
        f.exception() if f.exception() is not None else f.result()
        for f in futures
    ]
_async_method_map(method, argseq=None, kwargseq=None, max_concurrent=1, task_delay=None, poll=0.03)

Internal wrapper function: make multithreaded calls to a specified method of this Cluster's Instances with arbitrary number and homogeneity of arguments.

Parameters:

Name Type Description Default
method str

name of method of Instance to call on Instances

required
argseq Optional[Union[Sequence[Sequence], cycle]]

optional args to pass to these method calls -- one sequence of args per Instance. either args or kwargs must be defined.

None
kwargseq Optional[Union[Sequence[Mapping[str, Any]], cycle]]

optional kwargs to pass to these method calls -- one dict or other Mapping of kwargs per Instance.

None

Returns:

Type Description
ServerPool

list containing result of method call from each Instance, including raised Exceptions for failed calls

Source code in hostess/aws/ec2.py
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
def _async_method_map(
    self,
    method: str,
    argseq: Optional[Union[Sequence[Sequence], cycle]] = None,
    kwargseq: Optional[Union[Sequence[Mapping[str, Any]], cycle]] = None,
    max_concurrent: int = 1,
    task_delay: float | None = None,
    poll: float = 0.03,
) -> ServerPool:
    """
    Internal wrapper function: make multithreaded calls to a specified
    method of this Cluster's Instances with arbitrary number and
    homogeneity of arguments.

    Args:
        method: name of method of Instance to call on Instances
        argseq: optional args to pass to these method calls -- one
            sequence of args per Instance. either `args` or `kwargs` must
            be defined.
        kwargseq: optional kwargs to pass to these method calls -- one
            `dict` or other `Mapping` of kwargs per Instance.

    Returns:
        list containing result of method call from each Instance,
            including raised Exceptions for failed calls
    """
    argseq, kwargseq = self._format_map_arguments(argseq, kwargseq)
    pool = ServerPool(self.instances, max_concurrent, poll, task_delay)
    # attempt to prevent accidentally mapping an infinite number of tasks
    if isinstance(argseq, cycle) and isinstance(kwargseq, cycle):
        pool.max_concurrent = 1
        for _, args, kwargs in zip(self.instances, argseq, kwargseq):
            pool.apply(method, args, kwargs)
    else:
        for args, kwargs in zip(argseq, kwargseq):
            pool.apply(method, args, kwargs)
    return pool
_async_transfer_map(method, argseq=None, kwargseq=None)

" Internal wrapper function: make multithreaded calls to a specified file I/O method of all this Cluster's Instances.

Parameters:

Name Type Description Default
method str

name of file I/O method of Instance

required
argseq Optional[Union[Sequence[Sequence], cycle]]

optional args to pass to calls -- one sequence of args, one sequence of args per Instance, or a cycle. either args or kwargs must be defined.

None
kwargseq Optional[Union[Sequence[Mapping[str, Any]], cycle]]

optional kwargs to pass to these method calls -- a single dict or other Mapping, one Mapping of kwargs per Instance, or a cycle.

None

Returns:

Type Description
list[Any]

list containing result of method call from each Instance, including raised Exceptions for failed calls

Source code in hostess/aws/ec2.py
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
def _async_transfer_map(
    self,
    method: str,
    argseq: Optional[Union[Sequence[Sequence], cycle]] = None,
    kwargseq: Optional[Union[Sequence[Mapping[str, Any]], cycle]] = None,
) -> list[Any]:
    """"
    Internal wrapper function: make multithreaded calls to a specified
    file I/O method of all this Cluster's Instances.

    Args:
        method: name of file I/O method of Instance
        argseq: optional args to pass to calls -- one sequence of args,
            one sequence of args per Instance, or a `cycle`. either
            `args` or `kwargs` must be defined.
        kwargseq: optional kwargs to pass to these method calls -- a single
            `dict` or other `Mapping`, one `Mapping` of kwargs per
            Instance, or a `cycle`.

    Returns:
        list containing result of method call from each Instance,
            including raised Exceptions for failed calls
    """
    # TODO: there may be some formatting redundancy here
    argseq, kwargseq = self._format_map_arguments(argseq, kwargseq)
    exc, futures = ThreadPoolExecutor(len(self)), []
    for args, kwargs, instance in zip(argseq, kwargseq, self.instances):
        futures.append(
            exc.submit(getattr(instance, method), *args, **kwargs)
        )
    while not all(f.done() for f in futures):
        time.sleep(0.01)
    return [
        f.exception() if f.exception() is not None else f.result()
        for f in futures
    ]
_check_exceptions(results, _permissive, _warn) staticmethod

internal function for selective Exception-raising on async calls.

Source code in hostess/aws/ec2.py
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
@staticmethod
def _check_exceptions(results, _permissive, _warn):
    """internal function for selective Exception-raising on async calls."""
    if (_warn is False) and (_permissive is True):
        return
    for exception in filter(lambda x: isinstance(x, Exception), results):
        if _permissive is False:
            raise exception
        if _warn is True:
            warnings.warn(f"{type(exception)}: {exception}")
_format_map_arguments(argseq, kwargseq) staticmethod

internal method for preprocessing mapped call arguments

Source code in hostess/aws/ec2.py
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
@staticmethod
def _format_map_arguments(argseq, kwargseq):
    """internal method for preprocessing mapped call arguments"""
    if (argseq is None) and (kwargseq is None):
        raise TypeError("Must pass at least one of argseq or kwargseq.")
    if not any(
            map(lambda x: isinstance(x, (cycle, NoneType)), (argseq, kwargseq))
    ):
        if len(argseq) != len(kwargseq):
            raise ValueError(
                "sequences of args and kwargs must have matching lengths."
            )
    argseq = cycle([()]) if argseq is None else argseq
    kwargseq = cycle([{}]) if kwargseq is None else kwargseq
    return argseq, kwargseq
call_python(module, func=None, payload=None, _permissive=False, _warn=True, **kwargs)

Call a Python function on all this Cluster's Instances. See Instance.call_python() for further documentation.

Parameters:

Name Type Description Default
module str

name of, or path to, the target module

required
func Optional[str]

name of the function to call.

None
payload Any

object from which to constrct func's call arguments.

None
_permissive bool

if False, raise first Exception encountered, if any.

False
_warn bool

if True and permissive is True, raise a UserWarning for each encountered Exception.

True
**kwargs Union[bool, str, CallerCompressionType, CallerSerializationType, CallerUnpackingOperator]

kwargs to pass to Instance.call_python()

{}

Returns:

Type Description
list[Processlike]

list containing results of call_python() from each Instance, including raised Exceptions for failed calls if permissive is True

Source code in hostess/aws/ec2.py
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
def call_python(
    self,
    module: str,
    func: Optional[str] = None,
    payload: Any = None,
    _permissive: bool = False,
    _warn: bool = True,
    **kwargs: Union[
        bool,
        str,
        CallerCompressionType,
        CallerSerializationType,
        CallerUnpackingOperator,
    ],
) -> list[Processlike]:
    """
    Call a Python function on all this Cluster's Instances. See
    `Instance.call_python()` for further documentation.

    Args:
        module: name of, or path to, the target module
        func: name of the function to call.
        payload: object from which to constrct func's call arguments.
        _permissive: if False, raise first Exception encountered, if any.
        _warn: if True and `permissive` is True, raise a 
            UserWarning for each encountered Exception.
        **kwargs: kwargs to pass to `Instance.call_python()`

    Returns:
        list containing results of `call_python()` from each Instance,
            including raised Exceptions for failed calls if `permissive`
            is True
    """
    results = self._async_method_call(
        "call_python", module, func, payload, **kwargs
    )
    self._check_exceptions(results, _permissive, _warn)
    return results
command(command, *args, _permissive=False, _warn=True, **kwargs)

Call a shell command on all this Cluster's Instances. See Instance.command() for further documentation.

Parameters:

Name Type Description Default
command str

command name/string

required
_permissive bool

if False, raise first Exception encountered, if any.

False
_warn bool

if True and permissive is True, raise a UserWarning for each encountered Exception.

True
*args Union[str, int, float]

args to pass to Instance.command()

()
**kwargs bool

kwargs to pass to Instance.command().

{}

Returns:

Type Description
list[Union[Processlike, Exception]]

list containing result of command() from each Instance, including raised Exceptions for failed calls if permissive is True

Source code in hostess/aws/ec2.py
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
def command(
    self,
    command: str,
    *args: Union[str, int, float],
    _permissive: bool = False,
    _warn: bool = True,
    **kwargs: bool
) -> list[Union[Processlike, Exception]]:
    """
    Call a shell command on all this Cluster's Instances. See
    `Instance.command()` for further documentation.

    Args:
        command: command name/string
        _permissive: if False, raise first Exception encountered, if any.
        _warn: if True and `permissive` is True, raise a UserWarning for
            each encountered Exception.
        *args: args to pass to `Instance.command()`
        **kwargs: kwargs to pass to `Instance.command()`.

    Returns:
        list containing result of `command()` from each Instance, 
            including raised Exceptions for failed calls if `permissive`
            is True
    """
    results = self._async_method_call("command", command, *args, **kwargs)
    self._check_exceptions(results, _permissive, _warn)
    return results
commandmap(argseq, kwargseq=None, wait=True, max_concurrent=1, task_delay=None)

Map a shell command or commands across this Cluster's Instances, asynchronously calling Instance.command() with optionally-variable args and kwargs. This method enables a wide variety of dispatch/map behaviors, and as such, has a very flexible signature.

Notes
  • Unlike Cluster.command(), this method blocks by default until all tasks have completed. If you do not wish it to block, pass wait=False, which will cause it to return a ServerPool you can later poll or join for output.
  • If neither argseq and kwargseq specify a finite number of tasks (e.g., argseq is a str and kwargseq is None), this method will execute the command once on each instance, much as if you had passed the same arguments to Cluster.command().
  • If both argseq and kwargseq specify a finite number of tasks (e.g., argseq is a list of tuples and kwargseq is a list of dicts), they must have equal length.
  • Task order is always preserved in output, but if the number of tasks is greater than len(self) * max_concurrent (e.g., argseq is a list of 30 tuples, max_concurrent is 1, and this Cluster has 4 Instances), there is no guarantee that tasks past the first len(self) * max_concurrent tasks will execute on any particular instance -- the underlying ServerPool will dispatch pending tasks as instances complete older ones. First come, first serves.
  • This method ignores the _viewer=False meta-option. It always returns either Viewers or a ServerPool that creates Viewers.
  • This method ignores the _disown=True meta-option.

Parameters:

Name Type Description Default
argseq Union[str, Sequence[Any]]

Positional argument(s). May be:

  1. A sequence of sequences of args, like: [("ls", "/home"), ...]; each of its elements will be *-splatted into a single Instance.command() call.
  2. A single sequence of args, like ("ls", "/home"). This will be *-splatted into every Instance.command() call.
  3. A single string, like "ls"; this string will be passed directly to every Instance.command() call.
required
kwargseq Optional[Union[Mapping[str, Any], Sequence[Mapping[str, Any]]]]

Optional keyword argument(s). May be:

  1. A sequence of mappings of kwargs, like: [{'-a': True, '-l': False}, ...]; each of its elements will be **-splatted into a single command() call.
  2. A single mapping of kwargs, like: {'-a': True, '-l': False}; these kwargs will be **-splatted into every command() call.
  3. None: no kwargs for anyone.
None
wait bool

if False, return a ServerPool object that asynchronously polls the running processes. Otherwise, block until all processes complete and return a list of Viewers.

True
max_concurrent int

maximum number of commands to simultaneously run on each instance.

1
task_delay float | None

optional minimum interval, in seconds, between which subsequent tasks may be assigned to any one instance.

None

Returns:

Type Description
Union[list[Viewer], ServerPool]

If wait is True, a list of Viewers produced from Instance.command() executions. If wait is False, a ServerPool object that can be used to interact with and retrieve the results of the mapped commands.

Source code in hostess/aws/ec2.py
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
def commandmap(
    self,
    argseq: Union[str, Sequence[Any]],
    kwargseq: Optional[
        Union[Mapping[str, Any], Sequence[Mapping[str, Any]]]
    ] = None,
    wait: bool = True,
    max_concurrent: int = 1,
    task_delay: float | None = None
) -> Union[list[Viewer], ServerPool]:
    """
    Map a shell command or commands across this `Cluster's` `Instances`,
    asynchronously calling `Instance.command()` with optionally-variable
    args and kwargs. This method enables a wide variety of dispatch/map
    behaviors, and as such, has a very flexible signature.

    Notes:
        * Unlike `Cluster.command()`, this method blocks by default until
            all tasks have completed. If you do not wish it to block, pass
            `wait=False`, which will cause it to return a `ServerPool` you
            can later poll or join for output.
        * If neither `argseq` and `kwargseq` specify a finite number of
            tasks (e.g., `argseq` is a `str` and `kwargseq` is `None`),
            this method will execute the command once on each instance,
            much as if you had passed the same arguments to
            `Cluster.command()`.
        * If both `argseq` and `kwargseq` specify a finite number of tasks
            (e.g., `argseq` is a `list` of `tuples` and `kwargseq` is a
            `list` of `dicts`), they must have equal length.
        * Task order is always preserved in output, but if the number of
            tasks is greater than `len(self) * max_concurrent` (e.g.,
            `argseq` is a `list` of 30 `tuples`, `max_concurrent` is 1,
            and this `Cluster` has 4 `Instances`), there is no guarantee
            that tasks past the first `len(self) * max_concurrent` tasks
            will execute on any particular instance -- the underlying
            `ServerPool` will dispatch pending tasks as instances complete
            older ones. First come, first serves.
        * This method ignores the `_viewer=False` meta-option. It always
            returns either `Viewers` or a `ServerPool` that creates
            `Viewers`.
        * This method ignores the `_disown=True` meta-option.

    Args:
        argseq: Positional argument(s). May be:

            1. A sequence of sequences of args, like:
                `[("ls", "/home"), ...]`; each of its elements will be
                `*`-splatted into a single `Instance.command()` call.
            2. A single sequence of args, like `("ls", "/home")`. This will
                be `*`-splatted into every `Instance.command()` call.
            3. A single string, like `"ls"`; this string will be passed
                directly to every `Instance.command()` call.
        kwargseq: Optional keyword argument(s). May be:

            1. A sequence of mappings of kwargs, like:
                `[{'-a': True, '-l': False}, ...]`; each of its elements
                will be `**`-splatted into a single `command()` call.
            2. A single mapping of kwargs, like:
                `{'-a': True, '-l': False}`; these kwargs will be
                `**`-splatted into every `command()` call.
            3. `None`: no kwargs for anyone.
        wait: if `False`, return a `ServerPool` object that asynchronously
            polls the running processes. Otherwise, block until all
            processes complete and return a list of `Viewers`.
        max_concurrent: maximum number of commands to simultaneously run
            on each instance.
        task_delay: optional minimum interval, in seconds, between which
            subsequent tasks may be assigned to any one instance.

    Returns:
        If `wait` is `True`, a list of `Viewers` produced from
            `Instance.command()` executions. If `wait` is
            `False`, a `ServerPool` object that can be used to interact
            with and retrieve the results of the mapped commands.
    """
    argseq, kwargseq = self._dispatch_cycle_arguments(argseq, kwargseq)
    pool = self._async_method_map(
        "command", argseq, kwargseq, max_concurrent, task_delay
    )
    pool.close()
    if wait is False:
        return pool
    return pool.gather()
commands(commands, op='then', _con=False, _permissive=False, _warn=True, **kwargs)

Call a sequence of shell commands on all this Cluster's Instances. See Instance.commands() for further documentation.

Parameters:

Name Type Description Default
commands Sequence[str]

command names/strings

required
op Literal['and', 'xor', 'then']

logical operator to connect commands.

'then'
_con bool

run 'console-style', pretty-printing rather than returning output (will look message with lots of Instances)

False
_permissive bool

if False, raise first Exception encountered, if any.

False
_warn bool

if True and permissive is True, raise a UserWarning for each encountered Exception.

True
**kwargs bool

kwargs to pass to Instance._ssh(). Only meta-options are recommended.

{}

Returns:

Type Description
list[Union[Processlike, Exception]]

list containing result of commands() from each Instance, including raised Exceptions for failed calls if permissive is True

Source code in hostess/aws/ec2.py
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
def commands(
    self,
    commands: Sequence[str],
    op: Literal["and", "xor", "then"] = "then",
    _con: bool = False,
    _permissive: bool = False,
    _warn: bool = True,
    **kwargs: bool,
) -> list[Union[Processlike, Exception]]:
    """
    Call a sequence of shell commands on all this Cluster's Instances. See
    `Instance.commands()` for further documentation.

    Args:
        commands: command names/strings
        op: logical operator to connect commands.
        _con: run 'console-style', pretty-printing rather than
            returning output (will look message with lots of Instances)
        _permissive: if False, raise first Exception encountered, if any.
        _warn: if True and `permissive` is True, raise a UserWarning for
            each encountered Exception.
        **kwargs: kwargs to pass to `Instance._ssh()`.
            Only meta-options are recommended.

    Returns:
        list containing result of `commands()` from each Instance, 
            including raised Exceptions for failed calls if `permissive`
            is True
    """
    results = self._async_method_call(
        "commands", commands, op, _con, **kwargs
    )
    self._check_exceptions(results, _permissive, _warn)
    return results
con(command, *args, _permissive=False, _warn=True, **kwargs)

Run a command 'console-style' on all this cluster's instances. See Instance.con() for further documentation. Note that this doesn't perform any kind of managed separation of outputs from different instances, so it can get pretty visually messy for commands that write to stdout/stderr multiple times.

Parameters:

Name Type Description Default
command str

command name/string

required
_permissive bool

if False, raise first Exception encountered, if any.

False
_warn bool

if True and permissive is True, raise a UserWarning for each encountered Exception.

True
*args Union[str, int, float]

args to pass to Instance.con()

()
**kwargs bool

kwargs to pass to Instance.con().

{}

Returns:

Type Description
list[Optional[Viewer]]

list containing results of con() from each Instance, including raised Exceptions for failed calls if permissive is True

Source code in hostess/aws/ec2.py
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
def con(
    self,
    command: str,
    *args: Union[str, int, float],
    _permissive: bool = False,
    _warn: bool = True,
    **kwargs: bool
) -> list[Optional[Viewer]]:
    """
    Run a command 'console-style' on all this cluster's instances. See
    `Instance.con()` for further documentation. Note that this doesn't
    perform any kind of managed separation of outputs from different
    instances, so it can get pretty visually messy for commands that write
    to stdout/stderr multiple times.

    Args:
        command: command name/string
        _permissive: if False, raise first Exception encountered, if any.
        _warn: if True and `permissive` is True, raise a UserWarning for
            each encountered Exception.
        *args: args to pass to `Instance.con()`
        **kwargs: kwargs to pass to `Instance.con()`.

    Returns:
        list containing results of `con()` from each Instance, including
            raised Exceptions for failed calls if `permissive` is True
    """
    results = self._async_method_call("con", command, *args, **kwargs)
    self._check_exceptions(results, _permissive, _warn)
    return results
connect(maxtries=10, delay=1)

establish SSH connections to all instances, prepping new connections when none currently exist, but not replacing existing ones.

Parameters:

Name Type Description Default
maxtries int

maximum times to re-attempt failed connections

10
delay float

how many seconds to wait after failed attempts

1
Source code in hostess/aws/ec2.py
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
def connect(self, maxtries: int = 10, delay: float = 1):
    """
    establish SSH connections to all instances, prepping new connections
    when none currently exist, but not replacing existing ones.

    Args:
        maxtries: maximum times to re-attempt failed connections
        delay: how many seconds to wait after failed attempts
    """
    return self._async_method_call(
        "_prep_connection", lazy=False, maxtries=maxtries, delay=delay
    )
from_descriptions(descriptions, **kwargs) classmethod

Construct a Cluster from InstanceDescriptions, as produced by ls_instances().

Parameters:

Name Type Description Default
descriptions Collection[InstanceDescription]

InstanceDescriptions to initialize Instances from and subsequently collect into a Cluster.

required
**kwargs Union[str, Path, BaseClient, ServiceResource, Session, bool]

kwargs to pass to the Instance constructor

{}

Returns:

Type Description
Cluster

a Cluster including an Instance for each description.

Source code in hostess/aws/ec2.py
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
@classmethod
def from_descriptions(
    cls,
    descriptions: Collection[InstanceDescription],
    **kwargs: Union[
        str,
        Path,
        botocore.client.BaseClient,
        boto3.resources.base.ServiceResource,
        boto3.Session,
        bool,
    ],
) -> "Cluster":
    """
    Construct a Cluster from InstanceDescriptions, as produced by
    `ls_instances()`.

    Args:
        descriptions: InstanceDescriptions to initialize Instances from
            and subsequently collect into a Cluster.
        **kwargs: kwargs to pass to the Instance constructor

    Returns:
        a Cluster including an Instance for each description.
    """
    instances = [Instance(d, **kwargs) for d in descriptions]
    return cls(instances)
get(source, target, _permissive=False, _warn=True, **kwargs)

copy files from instances to local.

Parameters:

Name Type Description Default
source Union[Sequence[Union[str, Path]], str, Path]

path to file (shared between all instances), or a sequence of paths to files on instances, one per instance

required
target Union[str, Path, IO, Sequence[Union[str, Path, IO]]]

path to local file, or a filelike object (such as io.BytesIO), or a sequence of such things, one per instance. if target is a path to a local file, one separate file, with incrementing suffixes, will be written per instance.

required
_permissive bool

if False, raise first Exception encountered, if any.

False
_warn bool

if True and permissive is True, raise a UserWarning for each encountered Exception.

True
**kwargs Any

kwargs to pass to underlying get method

{}

Returns:

Type Description
list[Union[dict, Exception]]

list of dicts giving transfer metadata: local, remote, host, port, including Exceptions for falled gets if _permissive is True.

Source code in hostess/aws/ec2.py
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
def get(
    self,
    source: Union[Sequence[Union[str, Path]], str, Path],
    target: Union[str, Path, IO, Sequence[Union[str, Path, IO]]],
    _permissive: bool = False,
    _warn: bool = True,
    **kwargs: Any,
) -> list[Union[dict, Exception]]:
    """
    copy files from instances to local.

    Args:
        source: path to file (shared between all instances), or a sequence
            of paths to files on instances, one per instance
        target: path to local file, or a filelike object (such as
            io.BytesIO), or a sequence of such things, one per instance.
            if `target` is a path to a local file, one separate file, with
            incrementing suffixes, will be written per instance.
        _permissive: if False, raise first Exception encountered, if any.
        _warn: if True and `permissive` is True, raise a UserWarning for
            each encountered Exception.
        **kwargs: kwargs to pass to underlying get method

    Returns:
        list of dicts giving transfer metadata: local, remote, host, port,
            including Exceptions for falled gets if _permissive is True.
    """
    if isinstance(target, (str, Path)):
        t = Path(target)
        target = [
            f"{t.absolute().parent}/{t.stem}_{i}{t.suffix}"
            for i in range(len(self))
        ]
    elif len(target) != len(self.instances):
        raise ValueError(
            "a sequence of targets must have the same length as instances"
        )
    if isinstance(source, (str, Path)):
        source = cycle((source,))
    elif len(source) != len(self.instances):
        raise ValueError(
            "a sequence of sources must have the same length as instances"
        )
    results = self._async_transfer_map(
        "get", [(s, t) for s, t in zip(source, target)], cycle((kwargs,))
    )
    self._check_exceptions(results, _permissive, _warn)
    return results
install_conda(installer_url=CONDA_DEFAULTS['installer_url'], prefix=CONDA_DEFAULTS['prefix'], _permissive=False, _warn=False, **kwargs)

install a Conda Python distribution on all instances.

Parameters:

Name Type Description Default
installer_url str

url of install script; by default, the latest miniforge3 Linux x86_64 installer.

CONDA_DEFAULTS['installer_url']
prefix str

path for Conda installation. If a Conda installation already exists at this path, it will be updated. Defaults to $HOME/miniconda3.

CONDA_DEFAULTS['prefix']
_permissive bool

if False, raise first Exception encountered, if any.

False
_warn bool

if True and permissive is True, raise a UserWarning for each encountered Exception.

False
kwargs bool

kwargs to pass to Instance.commands(). Only meta-options are recommended.

{}

Returns:

Type Description
list[Union[Processlike, Exception]]

List containing outputs of Instance.commands() for installer script fetch/execution, including Exceptions for failed calls if _permissive is True.

Source code in hostess/aws/ec2.py
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
def install_conda(
    self,
    installer_url: str = CONDA_DEFAULTS['installer_url'],
    prefix: str = CONDA_DEFAULTS['prefix'],
    _permissive: bool = False,
    _warn: bool = False,
    **kwargs: bool
) -> list[Union[Processlike, Exception]]:
    """
    install a Conda Python distribution on all instances.

    Args:
        installer_url: url of install script; by default, the latest
            miniforge3 Linux x86_64 installer.
        prefix: path for Conda installation. If a Conda installation
            already exists at this path, it will be updated. Defaults
            to $HOME/miniconda3.
        _permissive: if False, raise first Exception encountered, if any.
        _warn: if True and `permissive` is True, raise a UserWarning for
            each encountered Exception.
        kwargs: kwargs to pass to `Instance.commands()`. Only meta-options
            are recommended.

    Returns:
        List containing outputs of `Instance.commands()` for installer
            script fetch/execution, including Exceptions for failed
            calls if `_permissive` is True.
    """
    results = self._async_method_call(
        "install_conda", installer_url, prefix, **kwargs
    )
    self._check_exceptions(results, _permissive, _warn)
    return results
launch(count, template=None, options=None, tags=None, client=None, session=None, wait=True, connect=False, maxtries=40, increment_names=True, verbose=True, **instance_kwargs) classmethod

Launch a fleet of Instances and collect them into a Cluster. See hostess documentation Notebooks for examples of how to construct options, tags, etc.

Parameters:

Name Type Description Default
count int

number of instances to launch

required
template Optional[str]

name of preexisting EC2 launch template to construct instances from. if not specified, will use a 'scratch' template.

None
options Optional[dict]

optional dict of specifications for Instances. if not specified, will use default values for everything. Currently, if a template is specified, it will override all values in options. This will change in the future.

None
tags Optional[dict]

optional tags to apply to the launched instances and their associated EBS volumes (if any).

None
client Optional[BaseClient]

optional preexisting EC2 client.

None
session Optional[Session]

optional preexisting boto session.

None
wait bool

if True, block after launch until all instances are running. overrides connect=True if False.

True
connect bool

if True, block after launch until all instances are connectable.

False
maxtries int

how many times to try to connect to the instances if connect=True (waiting 1s between each attempt)

40
increment_names bool

if True and count > 1, suffix incrementing integers to the names of each instance in the Cluster, and, if they had Name tags already, add a "ClusterName" tag indicating the base name

True
verbose bool

if True, print launch progress to stdout

True
**instance_kwargs Union[str, BaseClient, ServiceResource, Session, Path, bool]

kwargs to pass to the Instance constructor.

{}

Returns:

Type Description
Cluster

a Cluster created from the newly-launched fleet.

Source code in hostess/aws/ec2.py
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
@classmethod
def launch(
    cls,
    count: int,
    template: Optional[str] = None,
    options: Optional[dict] = None,
    tags: Optional[dict] = None,
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
    wait: bool = True,
    connect: bool = False,
    maxtries: int = 40,
    increment_names: bool = True,
    verbose: bool = True,
    **instance_kwargs: Union[
        str,
        botocore.client.BaseClient,
        boto3.resources.base.ServiceResource,
        boto3.Session,
        Path,
        bool,
    ],
) -> "Cluster":
    """
    Launch a fleet of Instances and collect them into a Cluster. See
    hostess documentation Notebooks for examples of how to construct
    options, tags, etc.

    Args:
        count: number of instances to launch
        template: name of preexisting EC2 launch template to construct
            instances from. if not specified, will use a 'scratch'
            template.
        options: optional dict of specifications for Instances. if not
            specified, will use default values for everything. Currently,
            if a template is specified, it will override all values in
            options. This will change in the future.
        tags: optional tags to apply to the launched instances and their
            associated EBS volumes (if any).
        client: optional preexisting EC2 client.
        session: optional preexisting boto session.
        wait: if True, block after launch until all instances are running.
            overrides connect=True if False.
        connect: if True, block after launch until all instances are
            connectable.
        maxtries: how many times to try to connect to the instances if
            connect=True (waiting 1s between each attempt)
        increment_names: if True and count > 1, suffix incrementing
            integers to the names of each instance in the Cluster,
            and, if they had Name tags already, add a "ClusterName" tag
            indicating the base name
        verbose: if True, print launch progress to stdout
        **instance_kwargs: kwargs to pass to the Instance constructor.

    Returns:
        a Cluster created from the newly-launched fleet.
    """
    print_ = print if verbose is True else lambda *_, **__: None
    if count < 1:
        raise ValueError(f"count must be >= 1.")
    client = init_client("ec2", client, session)
    options = {} if options is None else options
    options["verbose"] = verbose
    # TODO: add a few more conveniences, clean up
    if instance_kwargs.get("type_") is not None:
        options["instance_type"] = instance_kwargs.pop("type_")
    if instance_kwargs.get("name") is not None:
        options["instance_name"] = instance_kwargs.pop("name")
    if template is None:
        using_scratch_template = True
        template = create_launch_template(**options)["LaunchTemplateName"]
        if options.get("image_id") is None:
            # we're always using a stock Canonical image in this case, so
            # note that we're forcing uname to 'ubuntu':
            print_(
                "Using stock Canonical image, so setting uname to "
                "'ubuntu'."
            )
            instance_kwargs["uname"] = "ubuntu"
    else:
        using_scratch_template = False
    if tags is not None:
        tagrecs = [{"Key": k, "Value": v} for k, v in tags.items()]
        tag_kwarg = {
            "TagSpecifications": [
                {"ResourceType": "instance", "Tags": tagrecs},
                {"ResourceType": "volume", "Tags": tagrecs},
            ]
        }
    else:
        tag_kwarg = {}
    try:
        fleet = client.create_fleet(
            LaunchTemplateConfigs=[
                {
                    "LaunchTemplateSpecification": {
                        "LaunchTemplateName": template,
                        "Version": "$Default",
                    }
                }
            ],
            TargetCapacitySpecification={
                "TotalTargetCapacity": count,
                "OnDemandTargetCapacity": count,
                "DefaultTargetCapacityType": "on-demand",
            },
            Type="instant",
            **tag_kwarg,
        )
    finally:
        if using_scratch_template is True:
            client.delete_launch_template(LaunchTemplateName=template)
    # note that we do not want to raise these all the time, because the
    # API frequently dumps a lot of harmless info in here.
    launch_errors = len(fleet.get("Errors", []))
    try:
        n_instances = len(fleet["Instances"][0]["InstanceIds"])
        assert n_instances > 0
    except (KeyError, IndexError, AssertionError):
        raise ValueError(
            f"No instances appear to have launched. "
            f"Client returned error(s):\n\n{launch_errors}"
        )
    if n_instances != count:
        warnings.warn(
            f"fewer instances appear to have launched than "
            f"requested ({n_instances} vs. {count}). Check the 'Errors' "
            f"key of this Cluster's 'fleet_request' attribute."
        )

    def instance_hook():
        return _instances_from_ids(
            fleet["Instances"][0]["InstanceIds"],
            client=client,
            **instance_kwargs,
        )

    instances = []
    for _ in range(5):
        try:
            instances = instance_hook()
            break
        except botocore.exceptions.ClientError as ce:
            if "does not exist" in str(ce):
                time.sleep(0.2)
        raise TimeoutError(
            "launched instances successfully, but unable to run "
            "DescribeInstances. Perhaps permissions are wrong. "
            "Reported instance ids:\n"
            + "\n".join(fleet["Instances"][0]["InstanceIds"])
        )
    cluster = Cluster(instances)
    cluster.fleet_request = fleet
    noun = "fleet" if count > 1 else "instance"
    # TODO: async?
    if (increment_names is True) and (count > 1):
        if (basename := instances[0].tags.get("Name")) is not None:
            client.create_tags(
                Resources=[i.instance_id for i in instances],
                Tags=[{"Key": "ClusterName", "Value": basename}],
            )
        for i, instance in enumerate(instances):
            instance.rename(instance.tags.get("Name", "") + str(i))
    if wait is False:
        print_(f"launched {noun}; wait=False passed, not checking status")
        return cluster
    if connect is True:
        print_(f"launched {noun}; waiting until connectable")
    else:
        print_(f"launched {noun}; waiting until running")
    # TODO: also async?
    for instance in cluster.instances:
        if connect is False:
            instance.wait_until_running()
            print_(f"{instance} is running")
        else:
            instance.wait_on_connection(maxtries)
            print_(f"connected to {instance}")
    return cluster
put(source, target, *args, literal_str=False, _permissive=False, _warn=True, **kwargs)

write local files or in-memory data to target files on instances.

Parameters:

Name Type Description Default
source Union[str, Path, IO, bytes]

filelike object, path to local file, string, or bytestring (shared between all instances), or a sequence of such objects, one per instance. note that if this is a single filelike object, it will be read into memory and closed before sending its contents to the instances.

required
target Union[str, Path]

write path (shared between all instances), or a sequence of write paths, one per instance.

required
_permissive bool

if False, raise first Exception encountered, if any.

False
_warn bool

if True and permissive is True, raise a UserWarning for each encountered Exception.

True
args Any

additional arguments to pass to underlying put method

()
literal_str bool

if True and source is a str, write source into target as text rather than interpreting source as a path to a local file.

False
**kwargs Any

kwargs to pass to underlying get method

{}

Returns:

Type Description
list[Union[dict, Exception]]

list of dicts giving transfer metadata: local, remote, host, port, including Exceptions for falled puts if _permissive is True.

Source code in hostess/aws/ec2.py
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
def put(
    self,
    source: Union[str, Path, IO, bytes],
    target: Union[str, Path],
    *args: Any,
    literal_str: bool = False,
    _permissive: bool = False,
    _warn: bool = True,
    **kwargs: Any,
) -> list[Union[dict, Exception]]:
    """
    write local files or in-memory data to target files on instances.

    Args:
        source: filelike object, path to local file, string, or bytestring
            (shared between all instances), or a sequence of such objects,
            one per instance. note that if this is a single filelike
            object, it will be read into memory and closed before sending
            its contents to the instances.
        target: write path (shared between all instances), or a sequence
            of write paths, one per instance.
        _permissive: if False, raise first Exception encountered, if any.
        _warn: if True and `permissive` is True, raise a UserWarning for
            each encountered Exception.
        args: additional arguments to pass to underlying put method
        literal_str: if True and `source` is a `str`, write `source`
            into `target` as text rather than interpreting `source` as a
            path to a local file.
        **kwargs: kwargs to pass to underlying get method

    Returns:
        list of dicts giving transfer metadata: local, remote, host, port,
            including Exceptions for falled puts if _permissive is True.
    """
    if isinstance(target, (str, Path)):
        target = cycle((target,))
    elif len(target) != len(self.instances):
        raise ValueError(
            "a sequence of targets must have the same length as instances"
        )
    # upstream implementation makes it impossible to reuse buffers
    if isinstance(source, IO):
        contents = cycle((source.read(),))
        source.close()
        source = contents
    elif isinstance(source, (str, Path, bytes)):
        source = cycle((source,))
    elif len(source) != len(self.instances):
        raise ValueError(
            "a sequence of sources must have the same length as instances"
        )
    kwargs['literal_str'] = literal_str
    results = self._async_transfer_map(
        "put",
        # we need self.instances to bound length; s & t can both be cycles.
        [(s, t, *args) for s, t, _ in zip(source, target, self.instances)],
        cycle((kwargs,))
    )
    self._check_exceptions(results, _permissive, _warn)
    return results
pythonmap(argseq, kwargseq=None, wait=True, max_concurrent=1, task_delay=None)

Map Python calls across this Cluster's Instances, asynchronously calling Instance.call_python() with optionally-variable args and kwargs. This method has the same flexible calling conventions as Cluster.commandmap(); refer to that method's documentation for more detail.

Parameters:

Name Type Description Default
argseq Union[str, Sequence[Any]]

Positional argument(s) to Instance.call_python().

required
kwargseq Optional[Union[Mapping[str, Any], Sequence[Mapping[str, Any]]]]

Optional keyword argument(s) to Instance.call_python().

None
wait bool

if False, return a ServerPool object that asynchronously polls the running processes. Otherwise, block until all processes complete and return a list of Viewers.

True
max_concurrent int

maximum number of calls to simultaneously perform on each instance.

1
task_delay float | None

optional minimum interval, in seconds, between which subsequent calls may be assigned to any one instance.

None

Returns:

Type Description
Union[list[Union[Viewer, Exception]], ServerPool]

If wait is True, a list of Viewers produced from Instance.call_python() calls. If wait is False, a ServerPool object that can be used to interact with and retrieve the results of the mapped calls.

Source code in hostess/aws/ec2.py
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
def pythonmap(
    self,
    argseq: Union[str, Sequence[Any]],
    kwargseq: Optional[
        Union[Mapping[str, Any], Sequence[Mapping[str, Any]]]
    ] = None,
    wait: bool = True,
    max_concurrent: int = 1,
    task_delay: float | None = None
) -> Union[list[Union[Viewer, Exception]], ServerPool]:
    """
    Map Python calls across this `Cluster's` `Instances`, asynchronously
    calling `Instance.call_python()` with optionally-variable args and
    kwargs. This method has the same flexible calling conventions as
    `Cluster.commandmap()`; refer to that method's documentation for more
    detail.

    Args:
        argseq: Positional argument(s) to `Instance.call_python()`.
        kwargseq: Optional keyword argument(s) to `Instance.call_python()`.
        wait: if `False`, return a `ServerPool` object that asynchronously
            polls the running processes. Otherwise, block until all
            processes complete and return a list of `Viewers`.
        max_concurrent: maximum number of calls to simultaneously perform
            on each instance.
        task_delay: optional minimum interval, in seconds, between which
            subsequent calls may be assigned to any one instance.


    Returns:
        If `wait` is `True`, a list of `Viewers` produced from
            `Instance.call_python()` calls.  If `wait` is `False`, a
            `ServerPool` object that can be used to interact with and
            retrieve the results of the mapped calls.
    """
    argseq, kwargseq = self._dispatch_cycle_arguments(argseq, kwargseq)
    pool = self._async_method_map(
        "call_python", argseq, kwargseq, max_concurrent, task_delay
    )
    pool.close()
    if wait is False:
        return pool
    return pool.gather()
read(source, mode='r', encoding='utf-8', as_buffer=False, concatenate=False, separator=None, _permissive=True, _warn=True, **kwargs)

read files from instances directly into memory.

Parameters:

Name Type Description Default
source Union[Sequence[Union[str, Path]], str, Path]

path to file (same path on all instances), or a sequence of paths to files on instances, one per instance

required
mode Union[Literal['r', 'rb'], Sequence[Literal['r', 'rb']]]

"r" for text, "rb" for binary, or a sequence of those, one per instance. must be a single value if concatenate is True.

'r'
encoding str

encoding for text files. ignored when mode is "rb".

'utf-8'
as_buffer bool

if True, return BytesIO/StringIO instead of bytes/str

False
concatenate bool

if True, concatenate contents of all files into a single object (preserving order), rather than returning them as a list

False
separator Optional[Union[str, bytes]]

if concatenate is True, separate results from different files with this string/bytestring. ignored if concatenate is False. if None, just stick them together.

None
_permissive bool

if False, raise the first Exception encountered, if any. When True, and concatenating, simply concatenate only successful reads (meaning Exceptions will be discarded).

True
_warn bool

if True and permissive is True, raise a UserWarning for each encountered Exception.

True
**kwargs Any

kwargs to pass to underlying get method

{}

Returns:

Type Description
Union[BytesIO, StringIO, bytes, str, Sequence[Union[BytesIO, StringIO, bytes, str]]]

if concatenate is False: a list of contents, one element per instance, including raised Exceptions for failed calls if permissive is True. if concatenate is True: a single object containing concatenated contents, with failed reads omitted if permissive is True. (This will be a 0-length string/bytestring/buffer if all reads failed.) type of elements/concatenated object depends on mode and as_buffer.

Source code in hostess/aws/ec2.py
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
def read(
    self,
    source: Union[Sequence[Union[str, Path]], str, Path],
    mode: Union[Literal["r", "rb"], Sequence[Literal["r", "rb"]]] = "r",
    encoding: str = "utf-8",
    as_buffer: bool = False,
    concatenate: bool = False,
    separator: Optional[Union[str, bytes]] = None,
    _permissive: bool = True,
    _warn: bool = True,
    **kwargs: Any,
) -> Union[
    io.BytesIO,
    io.StringIO,
    bytes,
    str,
    Sequence[Union[io.BytesIO, io.StringIO, bytes, str]],
]:
    """
    read files from instances directly into memory.

    Args:
        source: path to file (same path on all instances), or a sequence
            of paths to files on instances, one per instance
        mode: "r" for text, "rb" for binary, or a sequence of those, one
            per instance. must be a single value if `concatenate` is True.
        encoding: encoding for text files. ignored when mode is "rb".
        as_buffer: if True, return BytesIO/StringIO instead of bytes/str
        concatenate: if True, concatenate contents of all files into a
            single object (preserving order), rather than returning them
            as a list
        separator: if `concatenate` is True, separate results from
            different files with this string/bytestring. ignored if
            `concatenate` is False. if None, just stick them together.
        _permissive: if False, raise the first Exception encountered, if 
            any. When True, and concatenating, simply concatenate only 
            successful reads (meaning Exceptions will be discarded).
        _warn: if True and `permissive` is True, raise a UserWarning for
            each encountered Exception.
        **kwargs: kwargs to pass to underlying get method

    Returns:
        if `concatenate` is False: a list of contents, one element per
            instance, including raised Exceptions for failed calls if
            `permissive` is True. if `concatenate` is True: a single object
            containing concatenated contents, with failed reads omitted
            if `permissive` is True. (This will be a 0-length
            string/bytestring/buffer if all reads failed.)  type of
            elements/concatenated object depends on `mode` and `as_buffer`.
    """
    if concatenate is True and not isinstance(mode, str):
        raise TypeError("specify a single mode if concatenate is True.")
    if isinstance(source, (str, Path)):
        source = cycle((source,))
    elif len(source) != len(self.instances):
        raise ValueError(
            "a sequence of sources must have the same length as instances"
        )
    if isinstance(mode, str):
        mode = cycle((mode,))
    elif len(mode) != len(self.instances):
        raise ValueError(
            "a sequence of modes must have the same length of instances"
        )
    results = self._async_transfer_map(
        "read",
        [
            (s, m, encoding, True)
            for s, m, _ in zip(source, mode, self.instances)
        ],
        cycle((kwargs,)),
    )
    self._check_exceptions(results, _permissive, _warn)
    if concatenate is False:
        if as_buffer is True:
            return results
        return [
            r if isinstance(r, Exception) else r.read() for r in results
        ]
    output = [r.read() for r in results if not isinstance(r, Exception)]
    if separator is None:
        if next(mode) == "rb":
            separator = b""
        else:
            separator = ""
    if next(mode) == "rb" and isinstance(separator, str):
        separator = separator.encode(encoding)
    if next(mode) == "r" and isinstance(separator, bytes):
        separator = separator.decode(encoding)
    output = separator.join(output)
    if as_buffer is False:
        return output
    if mode == "rb":
        # noinspection PyTypeChecker
        return io.BytesIO(output)
    return io.StringIO(output)
read_csv(source, encoding='utf-8', add_identifiers=True, reset_index=True, _permissive=False, _warn=False, **csv_kwargs)

read CSV-like files from all instances and concatenate them along the column axis into a pandas DataFrame, optionally adding identifier columns.

Parameters:

Name Type Description Default
source Union[Sequence[Union[str, Path]], str, Path]

path to CSV-like file (same path on all instances), or a sequence of such paths, one per instance

required
encoding str

text encoding for CSV-like files

'utf-8'
add_identifiers bool

if True, add "name" and "id" columns to the returned DataFrame indicating which instances produced which rows.

True
reset_index bool

if True, reset index and drop original indices.

True
_permissive bool

if False, raise first Exception encountered during read, if any. When True, simply ignore read Exceptions, meaning the DataFrame will contain no output from instances on which calls failed and all Exceptions will be discarded. (This will result in an empty DataFrame if all reads failed.) This option does not suppress Exceptions encountered during DataFrame construction or concatenation.

False
_warn bool

if True and permissive is True, raise a UserWarning for each encountered read Exception.

False
**csv_kwargs Any

kwargs to pass to pd.read_csv

{}

Returns:

Type Description
DataFrame

A DataFrame containing concatenated contents of all files, optionally including identifying information for source instances.

Source code in hostess/aws/ec2.py
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
def read_csv(
    self,
    source: Union[Sequence[Union[str, Path]], str, Path],
    encoding: str = "utf-8",
    add_identifiers: bool = True,
    reset_index: bool = True,
    _permissive: bool = False,
    _warn: bool = False,
    **csv_kwargs: Any,
) -> pd.DataFrame:
    """
    read CSV-like files from all instances and concatenate them along the 
        column axis into a pandas DataFrame, optionally adding identifier
        columns.

    Args:
        source: path to CSV-like file (same path on all instances), or
            a sequence of such paths, one per instance
        encoding: text encoding for CSV-like files
        add_identifiers: if True, add "name" and "id" columns to the
            returned DataFrame indicating which instances produced which
            rows.
        reset_index: if True, reset index and drop original indices.
        _permissive: if False, raise first Exception encountered during
            read, if any. When True, simply ignore read Exceptions,
            meaning the `DataFrame` will contain no output from instances
            on which calls failed and all Exceptions will be discarded.
            (This will result in an empty `DataFrame` if all reads failed.)
            This option does *not* suppress Exceptions encountered during
            `DataFrame` construction or concatenation.
        _warn: if True and `permissive` is True, raise a UserWarning for
            each encountered read Exception.
        **csv_kwargs: kwargs to pass to pd.read_csv

    Returns:
        A `DataFrame` containing concatenated contents of all files,
            optionally including identifying information for source
            instances.
    """
    buffers = self.read(
        source,
        encoding=encoding,
        _permissive=_permissive,
        _warn=_warn,
        as_buffer=True
    )
    frames = []
    for buffer, instance in zip(buffers, self.instances):
        if isinstance(buffer, Exception):
            continue
        df = pd.read_csv(buffer, **csv_kwargs)
        if add_identifiers is True:
            df[['name', 'id']] = instance.name, instance.instance_id
        frames.append(df)
    concatenated = pd.concat(frames)
    if reset_index is False:
        return concatenated
    return concatenated.reset_index(drop=True)
rebase_ssh_ingress_ip(ip=None, force=False, revoke=True)

Modify all security groups associated with all of this Cluster's instances to permit SSH access from an IP IMPORTANT: by default, this method revokes all other inbound access permissions, because it is good security practice to not slowly whitelist the entire world. Pass revoke=False if there are permissions you need to retain.

Parameters:

Name Type Description Default
ip Optional[str]

permit SSH access from this IP. if None, use the caller's external IP.

None
force bool

if True, will force modification even of default security groups.

False
revoke bool

if True, will revoke all other inbound permissions.

True

Returns:

Type Description
list[None]

list of None (since the Instance method doesn't return anything)

Source code in hostess/aws/ec2.py
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
def rebase_ssh_ingress_ip(
    self,
    ip: Optional[str] = None,
    force: bool = False,
    revoke: bool = True,
) -> list[None]:
    """
    Modify all security groups associated with all of this Cluster's
    instances to permit SSH access from an IP IMPORTANT: by default,
    this method revokes all other inbound access permissions, because it
    is good security practice to not slowly whitelist the entire world.
    Pass `revoke=False` if there are permissions you need to retain.

    Args:
        ip: permit SSH access from this IP. if None, use the caller's
            external IP.
        force: if True, will force modification even of default security
            groups.
        revoke: if True, will revoke all other inbound permissions.

    Returns:
        list of None (since the Instance method doesn't return anything)
    """
    return self._async_method_call(
        "rebase_ssh_ingress_ip", ip=ip, force=force, revoke=revoke
    )
start(*args, **kwargs)

Start all Instances. See Instance.start() for further documentation, including valid arguments.

Returns:

Type Description
list

list containing results of start() from each Instance.

Source code in hostess/aws/ec2.py
1729
1730
1731
1732
1733
1734
1735
1736
1737
def start(self, *args, **kwargs) -> list:
    """
    Start all Instances. See `Instance.start()` for further documentation,
    including valid arguments.

    Returns:
        list containing results of `start()` from each Instance.
    """
    return self._async_method_call("start", *args, **kwargs)
stop(*args, **kwargs)

Stop all Instances. See Instance.stop() for further documentation, including valid arguments.

Returns:

Type Description
list

list containing results of stop() from each Instance.

Source code in hostess/aws/ec2.py
1739
1740
1741
1742
1743
1744
1745
1746
1747
def stop(self, *args, **kwargs) -> list:
    """
    Stop all Instances. See `Instance.stop()` for further documentation,
    including valid arguments.

    Returns:
        list containing results of `stop()` from each Instance.
    """
    return self._async_method_call("stop", *args, **kwargs)
terminate(*args, **kwargs)

Terminate all Instances. See Instance.terminate() for further documentation / valid arg and kwargs.

Returns:

Type Description
list

list containing results of terminate() from each Instance.

Source code in hostess/aws/ec2.py
1749
1750
1751
1752
1753
1754
1755
1756
1757
def terminate(self, *args, **kwargs) -> list:
    """
    Terminate all Instances. See `Instance.terminate()` for further
    documentation  / valid arg and kwargs.

    Returns:
        list containing results of `terminate()` from each Instance.
    """
    return self._async_method_call("terminate", *args, **kwargs)
update()

update basic information for instances.

Source code in hostess/aws/ec2.py
1725
1726
1727
def update(self) -> list:
    """update basic information for instances."""
    return self._async_method_call("update")

Instance

Interface to an EC2 instance. Enables remote procedure calls, state control, and monitoring.

Attributes:

Name Type Description
session Session

session for API calls

client BaseClient

client for API calls

resource ServiceResource

resource for API calls

instance_ Instance

underlying boto3 Instance used for some API operations

instance_id str

AWS instance id

instance_type str

AWS instance type (e.g. 't3a.micro')

tags dict

AWS resource tags for instance

launch_time datetime

Time instance was launched

name Optional[str]

Value of 'Name' tag; None if not present

zone str

AWS Availability Zone

uname Optional[str]

Username for SSH operations (if known)

passed_key Optional[Path]

User-specified path to SSH keyfile, if any

state InstanceState

state of instance (e.g. 'running')

_ssh SSH

Underlying SSH object for command execution

address_type Literal['private', 'public']

Whether we are using the instance's public or private IP for SSH connections. (Private IP should generally only be used from within AWS).

ip Optional[str]

IPv4 address of instance -- its private IP if address_type is "private"; public IP otherwise. None if the instance has no IP (e.g., if it's stopped) or its IP cannot be determined.

key Optional[str]

stringified path to actually-existing SSH keyfile, if found

key_errstring Optional[str]

detailed description of what went wrong with our last attempt to find and load an SSH keyfile, if anything

Source code in hostess/aws/ec2.py
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
class Instance:
    """
    Interface to an EC2 instance. Enables remote procedure calls, state
    control, and monitoring.

    Attributes:
        session (boto3.Session): session for API calls
        client (botocore.client.BaseClient): client for API calls
        resource (boto3.resources.base.ServiceResource): resource for API calls
        instance_ (boto3.EC2.Instance): underlying boto3 `Instance` used for
            some API operations
        instance_id (str): AWS instance id
        instance_type (str): AWS instance type (e.g. 't3a.micro')
        tags (dict): AWS resource tags for instance
        launch_time (datetime): Time instance was launched
        name (Optional[str]): Value of 'Name' tag; None if not present
        zone (str): AWS Availability Zone
        uname (Optional[str]): Username for SSH operations (if known)
        passed_key (Optional[Path]): User-specified path to SSH keyfile, if any
        state (InstanceState): state of instance (e.g. 'running')
        _ssh (hostess.ssh.SSH): Underlying `SSH` object for command execution
        address_type (Literal["private", "public"]): Whether we are using the
            instance's public or private IP for SSH connections. (Private IP
            should generally only be used from within AWS).
        ip (Optional[str]): IPv4 address of instance -- its private
            IP if `address_type` is "private"; public IP otherwise. None if the
            instance has no IP (e.g., if it's stopped) or its IP cannot be
            determined.
        key (Optional[str]): stringified path to actually-existing SSH keyfile,
            if found
        key_errstring (Optional[str]): detailed description of what went wrong
            with our last attempt to find and load an SSH keyfile, if anything
    """

    def __init__(
        self,
        description: Union[InstanceIdentifier, InstanceDescription],
        *,
        uname: str = GENERAL_DEFAULTS["uname"],
        key: Optional[Path] = None,
        client: Optional[botocore.client.BaseClient] = None,
        resource: Optional[boto3.resources.base.ServiceResource] = None,
        session: Optional[boto3.Session] = None,
        use_private_ip: bool = False,
        verbose: bool = True
    ):
        """
        Args:
            description: unique identifier for the instance, either its public
                / private IP (whichever is accessible from where you're
                initializing this object), the full instance identifier, or
                an InstanceDescription as returned by ls_instances().
            uname: username for SSH access to the instance.
            key: path to keyfile. You don't usually need to explicitly specify
                it. The constructor can find it automatically given the
                following conditions:

                1. You keep the keyfile in a 'standard' location (like
                ~/.ssh/; see config.config.GENERAL_DEFAULTS['secrets_folders']
                for a list) or a directory you specify in
                config.user_config.GENERAL_DEFAULTS['secrets_folders'],
                2. its filename matches the key name given in the API
                response describing the instance.

                If those aren't both true, you'll need to pass this value to
                connect to the instance via SSH.
            client: boto client. creates default client if not given.
            resource: boto resource. creates default resource if not given.
            session: boto session. creates default session if not given.
            verbose: if True, print some changes in status to stdout.
        """
        self.session = session if session is not None else make_boto_session()
        self.resource = init_resource("ec2", resource, self.session)
        self.client = init_client("ec2", client, self.session)
        self.verbose = verbose

        if isinstance(description, str):
            # if it's got periods in it, assume it's a public IPv4 address
            if "." in description:
                instance_id = ls_instances(description, client=self.client)[0][
                    "id"
                ]
            # otherwise assume it's the instance id
            else:
                instance_id = description
        # otherwise assume it's a full description like from
        # ls_instances / ec2.describe_instance
        elif "id" in description.keys():
            instance_id = description["id"]
        elif "InstanceId" in description.keys():
            instance_id = description["InstanceId"]
        else:
            raise ValueError("can't interpret this description.")
        instance_ = self.resource.Instance(instance_id)
        self.instance_id = instance_id
        self.address_type = "private" if use_private_ip is True else "public"
        if f"{self.address_type}_ip_address" in dir(instance_):
            self.ip = getattr(instance_, f"{self.address_type}_ip_address")
        self.instance_type = instance_.instance_type
        self.tags = tag_dict(instance_.tags)
        self.launch_time = instance_.launch_time
        self.state = instance_.state["Name"]
        self.request_cache = []
        self.name = self.tags.get("Name")
        self.zone = instance_.placement["AvailabilityZone"]
        self.uname, self.passed_key, self._ssh = uname, key, None
        self.instance_ = instance_

    def rename(self, name: str):
        """
        Rename the instance. Does not rename volumes or network interfaces.
        Updates local instance state cache when called.

        Args:
            name: new name for instance.
        """
        self.client.create_tags(
            Resources=[self.instance_id], Tags=[{"Key": "Name", "Value": name}]
        )
        self.update()

    @classmethod
    def find(
        cls,
        identifier: Optional[InstanceIdentifier] = None,
        states: Sequence[str] = ("running", "pending", "stopping", "stopped"),
        raw_filters: Optional[Sequence[Mapping[str, str]]] = None,
        client: Optional[botocore.client.BaseClient] = None,
        session: Optional[boto3.Session] = None,
        long: bool = False,
        tag_regex: bool = True,
        uname: str = GENERAL_DEFAULTS["uname"],
        *,
        key: Optional[Path] = None,
        resource: Optional[boto3.resources.base.ServiceResource] = None,
        use_private_ip: bool = False,
        pick_first: bool = False,
        verbose: bool = True,
        **tag_filters: str
    ) -> "Instance":
        """
        Forwards relevant passed arguments to `ls_instances()` and returns an
        Instance constructed from the first match, passing relevant arguments
        to `Instance.__init__()`. It also accepts one unique argument,
        `pick_first`; if `True`, it will return the first instance found in
        the case of multiple matches; otherwise (default) raise a ValueError.

        Arguments passed to `ls_instances()`:
            `identifier`, `states`, `raw_filters`, `client`, `session`,
            `long`, `tag_regex`, `tag_filters`

        Arguments passed to `Instance.__init__()`:
            `uname`, `key`, `client`, `resource`, `session`, `use_private_ip`,
            `verbose`

        See documentation for those functions for full discussion of those
        arguments.

        Raises:
            KeyError: If `ls_instances()` finds no matching instances.
            ValueError: If `ls_instances` finds more than one matching
                instance and `pick_first` is not True.
        """
        search_result = ls_instances(identifier, states, raw_filters, client,
                                     session, long, tag_regex, **tag_filters)
        if len(search_result) == 0:
            raise KeyError("No matching instances found")
        if len(search_result) > 1 and pick_first is not True:
            raise ValueError(
                "More than one matching instance found. Pass pick_first=True "
                "to select the first matching instance. Note that instance "
                "order in search results is undefined and may not be "
                "consistent across multiple calls to this function."
            )
        return Instance(search_result[0], uname=uname, key=key,
                        resource=resource, use_private_ip=use_private_ip,
                        client=client, session=session, verbose=verbose)

    @classmethod
    def launch(
        cls,
        template=None,
        options=None,
        tags=None,
        client=None,
        session=None,
        wait=True,
        connect=False,
        maxtries: int = 40,
        **instance_kwargs: Union[
            str,
            botocore.client.BaseClient,
            boto3.resources.base.ServiceResource,
            boto3.Session,
            Path,
            bool,
        ],
    ) -> "Instance":
        """
        launch a single instance. This is a thin wrapper for
        `Cluster.launch()` with `count=1`. See that function for full
        documentation.

        Returns:
            an Instance associated with a newly-launched EC2 instance.
        """
        return Cluster.launch(
            1,
            template,
            options,
            tags,
            client,
            session,
            wait,
            connect,
            maxtries,
            **instance_kwargs,
        )[0]

    def make_ssh_string(self) -> str:
        """
        Convenience method that returns a terminal command a local user might
        run in a shell, assuming correct system configuration, in order
        to start an interactive SSH session on this instance. This performs
        no system-level verification that this command will actually work
        (for instance, it does not check to see if `ssh` is locally
        installed). This does not imply that `Instance` executed, or will
        execute, this command at any point (it did not and will not).

        Returns:
            A string that, if run in a local shell, may start an interactive
            SSH session.
        """
        if self.key is None or self.ip is None:
            raise ValueError(
                "No connection established to instance. Cannot confirm "
                "keyfile or ip. Run self.connect() first."
            )
        return f"ssh -i {self.key} {self.uname}@{self.ip}"

    def wait_on_connection(self, maxtries: int):
        """block until an SSH connection to the instance is established"""
        while not self.is_connected:
            try:
                self.connect(maxtries=maxtries)
                break
            except ConnectionError:
                continue
        if self.verbose is True:
            print("connection established")

    # TODO: pull more of these command / connect behaviors up to SSH.

    def connect(self, maxtries: int = 5, delay: float = 1):
        """
        establish SSH connection to the instance, prepping a new connection
        if none currently exists, but not replacing an existing one.

        Args:
            maxtries: maximum times to re-attempt failed connections
            delay: how many seconds to wait after failed attempts
        """
        self._prep_connection(lazy=False, maxtries=maxtries, delay=delay)

    def reconnect(self, maxtries: int = 5, delay: float = 1):
        """
        create and attempt to establish a new SSH connection to the instance,
        closing any existing one.
        Note that this will immediately terminate any non-daemonized processes
        previously executed over the existing connection.

        Args:
            maxtries: maximum times to re-attempt failed connections
            delay: how many seconds to wait after failed attempts
        """
        if self._ssh is not None:
            self._drop_ssh()
        self._prep_connection(lazy=False, maxtries=maxtries, delay=delay)

    @property
    def is_connected(self):
        if self._ssh is None:
            return False
        if self._ssh.conn.is_connected:
            return True
        return False

    def _maybe_find_key(self):
        if self.key is not None:
            return
        if self.passed_key is None:
            try:
                found = find_ssh_key(self.instance_.key_name)
            except FileNotFoundError as fe:
                self.key_errstring = (
                    f"Couldn't find a keyfile for the instance. The keyfile "
                    f"may not be in the expected path, or its filename might "
                    f"not match the AWS key name ({self.instance_.key_name}). "
                    f"Try explicitly specifying the path by setting the .key "
                    f"attribute or passing the key= argument. {fe}"
                )
            else:
                self.key, self.key_errstring = str(found), None
            return
        if not Path(self.passed_key).exists():
            self.key_errstring = (
                f"The specified key file ({self.passed_key}) does not exist."
            )
            return
        self.key, self.key_errstring = str(self.passed_key), None

    def _prep_connection(
        self, *, lazy: bool = True, maxtries: int = 5, delay: float = 1
    ):
        """
        try to prep, and optionally establish, a SSH connection to the
        instance. if no closed / latent connection exists, create one;
        otherwise, use the existing one. if the instance isn't running,
        automatically replace any existing connection (which will be closed
        anyway by then, or should be).

        Args:
            lazy: don't establish the connection immediately; wait until some
                method needs it. other arguments do nothing if this is True.
            maxtries: maximum times to re-attempt failed connections
            delay: how many seconds to wait after subsequent attempts
        """
        if self.is_connected:  # nothing to do
            return
        self._maybe_find_key()
        if self.key_errstring is not None:
            # we want to raise this error immediately
            raise NoKeyError(self.key_errstring)
        self._update_ssh_info()
        if self._ssh is None:
            self._ssh = SSH.connect(self.ip, self.uname, self.key)
        if lazy is True:
            return
        connection_error = None
        for attempt in range(maxtries):
            try:
                self._ssh.conn.open()
                return
            except (
                AttributeError,
                SSHException,
                NoValidConnectionsError,
            ) as ce:
                connection_error = ce
                time.sleep(delay)
                self.update()
        raise ConnectionError(
            f"Unable to establish SSH connection to instance. It may not yet "
            f"be ready to accept SSH connections, or something might be wrong "
            f"with configuration. Reported error: {connection_error}"
        )

    def _check_unready(self) -> Union[str, bool]:
        """
        Is the instance obviously not ready for SSH connections?

        Returns:
            "state" if it is not running or transitioning to running;
                comma-separated list of "ip"/"uname"/"key" if any are missing.
                Otherwise False (meaning it looks ready).
        """
        if self.state not in ("running", "pending"):
            return "state"
        none = [p for p in ("ip", "uname", "key") if getattr(self, p) is None]
        if len(none) > 0:
            return ", ".join(none)
        return False

    def _update_ssh_info(self):
        """
        update SSH connectability info about Instance. raise an error if
        required info is not available. automatically remove any existing
        prepped connection if instance is not running.
        """
        self.update()
        if (unready := self._check_unready()) is False:
            return
        errstring = f"Unable to execute commands on {self.instance_id}. "
        number = iter(range(1, 5))
        if "state" in unready:
            errstring += (
                f"{next(number)}. It is currently not running. Try starting "
                f"the instance with .start()."
            )
            if self._ssh is not None:
                self._drop_ssh()
        # only mention missing IP if instance is running -- we don't expect
        # a stopped instance to have an IP.
        elif "ip" in unready:
            errstring += (
                f"{next(number)}. Cannot find IP for instance. It "
                f"may be in the process of IP assignment; try waiting a "
                f"moment. It may also be configured to have no appropriate IP."
            )
        if "key" in unready:
            errstring += f"{next(number)}. {self.key_errstring}"
        raise ConnectionError(errstring)

    def _drop_ssh(self):
        """
        close an existing SSH connection and attempt to purge it from memory.
        should only be called by other methods of Instance.
        """
        self._ssh.close()
        del self._ssh
        self._ssh = None

    @connectwrap
    def command(
        self,
        command: str,
        *args: Union[int, str, float],
        _viewer: bool = True,
        _wait: bool = False,
        _quiet: bool = True,
        **kwargs: Union[int, str, float, bool],
    ) -> Processlike:
        """
        run a command in the instance's default interpreter.

        Args:
            command: command name or full text of command
                (see `hostess.subutils.RunCommand.__call__()` for details on
                calling conventions).
            *args: args to pass to `self._ssh.__call__()`.
            _viewer: if `True`, return a `Viewer` object. otherwise return
                unwrapped result from `self._ssh.__call__()`.
            _wait: if `True`, block until command terminates (or connection
                fails). _w is an alias.
            _quiet: if `False`, print stdout and stderr, should the process
                return any before this function terminates. Generally best
                used with _wait=True.
            **kwargs: kwargs to pass to `self._ssh.__call__()`.

        Returns:
            object representing executed process.
        """
        return self._ssh(
            command,
            *args,
            _viewer=_viewer,
            _wait=_wait,
            _quiet=_quiet,
            **kwargs,
        )

    @connectwrap
    def con(
        self,
        command: str,
        *args: Union[int, str, float],
        _poll: float = 0.05,
        _timeout: Optional[float] = None,
        _return_viewer: bool = False,
        **kwargs: Union[int, str, float, bool],
    ) -> Optional[Viewer]:
        """
        pretend you are running a command on the instance while looking at a
        terminal emulator. pauses for output and pretty-prints it to stdout.

        Does not return a process abstraction by default (pass
        _return_viewer=True if you want one). Fun in interactive environments.

        Args:
            command: command name or full text of command
                (see `hostess.subutils.RunCommand.__call__()` for details on
                calling conventions).
            _poll: polling rate for process output, in seconds
            _timeout: if not None, raise a TimeoutError if this many seconds
                pass before receiving additional output from process (or
                process exit).
            _return_viewer: if True, return a Viewer for the process once it
                exits. Otherwise, return None.
            **kwargs: kwargs to pass to Instance.command().

        Returns:
            A Viewer if _return_viewer is True; otherwise None.
        """
        return self._ssh.con(
            command,
            *args,
            _poll=_poll,
            _timeout=_timeout,
            _return_viewer=_return_viewer,
            **kwargs,
        )

    # noinspection PyTypeChecker
    @connectwrap
    def commands(
        self,
        commands: Sequence[str],
        op: Literal["and", "xor", "then"] = "then",
        _con: bool = False,
        **kwargs,
    ) -> Optional[Processlike]:
        """
        Remotely run a multi-part shell command. Convenience method
        for constructing long shell instructions like
        `this && that && theother && etcetera`.

        Args:
            commands: commands to chain together.
            op: logical operator to connect commands.
            _con: run 'console-style', pretty-printing rather than
                returning output

        Returns:
            abstraction representing executed process, or None if
                _con is True.
        """
        if _con is True:
            return self.con(hs.chain(commands, op), **kwargs)
        return self.command(hs.chain(commands, op), **kwargs)

    @connectwrap
    def notebook(
        self, **connect_kwargs: Union[int, str, bool]
    ) -> NotebookConnection:
        """
        execute a Jupyter Notebook on the instance and establish a tunnel for
        local access.

        Args:
            connect_kwargs: arguments for notebook execution/connection. see
                `ssh.jupyter_connect()` for complete signature.

        Returns:
            structure containing results of tunneled Notebook execution.
        """
        self._prep_connection()
        return jupyter_connect(self._ssh, **connect_kwargs)

    @connectwrap
    def install_conda(
        self,
        installer_url: str = CONDA_DEFAULTS['installer_url'],
        prefix: str = CONDA_DEFAULTS['prefix'],
        **kwargs: bool
    ) -> Processlike:
        """
        install a Conda Python distribution on the instance.

        Args:
            installer_url: url of install script; by default, the latest
                miniforge3 Linux x86_64 installer.
            prefix: path for Conda installation. If a Conda installation
                already exists at this path, it will be updated. Defaults
                to $HOME/miniforge3.
            kwargs: kwargs to pass to `self.commands()`. Only meta-options are
                recommended.

        Returns:
            Output of `self.commands()` for installer script fetch/execution.
        """
        # noinspection PyArgumentList
        return self.commands(
            [
                f"wget {installer_url}",
                f"sh {Path(installer_url).name} -b -u -p {prefix}"
            ],
            "and",
            **kwargs
        )

    def start(
        self,
        return_response: bool = False,
        wait: bool = True,
        connect: bool = False,
        maxtries: int = 40,
    ) -> Optional[dict]:
        """
        Start the instance.

        Args:
            return_response: if True, return API response.
            wait: if True, wait until instance is running.
            connect: if True, wait until the instance is connectable via SSH
                or we have tried to connect `maxtries` times.
            maxtries: max number of times to attempt connection (5s delay in
                between).

        Returns:
            API response if `return_response` is True; otherwise None.
        """
        response = self.instance_.start()
        self.update()
        if (wait is True) and (connect is False):
            if self.verbose is True:
                print("waiting until instance is running...", end="")
            self.wait_until_running()
            if self.verbose is True:
                print("running.")
        if connect is True:
            self.wait_until_running()
            self.wait_on_connection(maxtries)
        if return_response is True:
            return response

    def stop(self, return_response: bool = False) -> Optional[dict]:
        """
        Stop the instance.

        Args:
            return_response: if True, return API response.

        Return:
            API response if `return_response` is True; otherwise None.
        """
        response = self.instance_.stop()
        self.update()
        if return_response is True:
            return response

    def terminate(self, return_response: bool = False) -> Optional[dict]:
        """
        Terminate (aka delete) the instance. The royal road to cloud cost
        management. Please note that this action is permanent and cannot be
        undone.

        Args:
            return_response: if True, return the API response.

        Returns:
            API response if `return_response` is True; otherwise None.
        """
        response = self.instance_.terminate()
        self.update()
        if return_response is True:
            return response

    @connectwrap
    def put(
        self,
        source: Union[str, Path, IO, bytes],
        target: Union[str, Path],
        *args: Any,
        literal_str: bool = False,
        **kwargs: Any,
    ) -> dict:
        """
        write local file or in-memory data to target file on instance.

        Args:
            source: filelike object or path to local file. note that filelike
                objects will be closed during put operation.
            target: write path on instance
            args: additional arguments to pass to underlying put method
            literal_str: if True and `source` is a `str`, write `source`
                into `target` as text rather than interpreting `source` as a
                path to a local file
            kwargs: additional kwargs to pass to underlying put command

        Returns:
            dict giving transfer metadata: local, remote, host, and port
        """
        return self._ssh.put(
            source, target, *args, literal_str=literal_str, **kwargs
        )

    @connectwrap
    def get(
        self,
        source: Union[str, Path],
        target: Union[str, Path, IO],
        *args: Any,
        **kwargs: Any,
    ) -> dict:
        """
        copy file from instance to local.

        Args:
            source: path to file on instance
            target: path to local file, or a filelike object (such as
                io.BytesIO)
            *args: args to pass to underlying get method
            **kwargs: kwargs to pass to underlying get method

        Returns:
            dict giving transfer metadata: local, remote, host, port, and this
                instance's name, if any
        """
        result = self._ssh.get(source, target, *args, **kwargs)
        if self.name is not None:
            result["name"] = self.name
        return result

    @connectwrap
    def read(
        self,
        source: Union[str, Path],
        mode: Literal["r", "rb"] = "r",
        encoding: str = "utf-8",
        as_buffer: bool = False,
    ) -> Union[io.BytesIO, io.StringIO, bytes, str]:
        """
        read a file from the instance directly into memory.

        Args:
            source: path to file on instance
            mode: 'r' to read file as text; 'rb' to read file as bytes
            encoding: encoding for text, used only if `mode` is 'r'
            as_buffer: if True, return BytesIO/StringIO; if False, return
                bytes/str

        Returns:
            Buffer containing contents of remote file
        """
        return self._ssh.read(source, mode, encoding, as_buffer)

    @connectwrap
    def read_csv(
        self,
        source: Union[str, Path],
        encoding: str = "utf-8",
        **csv_kwargs: Any,
    ) -> pd.DataFrame:
        """
        read a CSV-like file from the instance into a pandas DataFrame.

        Args:
            source: path to CSV-like file on instance
            encoding: encoding for text
            csv_kwargs: kwargs to pass to pd.read_csv

        Returns:
            DataFrame created from contents of remote CSV file
        """
        return self._ssh.read_csv(source, encoding, csv_kwargs)

    @cache
    @connectwrap
    def conda_env(self, env: str = "base") -> str:
        """
        Find the root directory of a named conda environment on the instance.

        Args:
            env: name of conda environment

        Returns:
            absolute path to root directory of conda environment.

        Raises:
            FileNotFoundError: if environment cannot be found.
        """
        return find_conda_env(self._ssh, env)

    @cache
    @connectwrap
    def find_package(self, package: str, env: Optional[str] = None) -> str:
        """
        Find the location of an installed Python package on the instance using
        `pip show`.

        Args:
            package: name of package (e.g. 'requests')
            env: optional name of conda environment. If None, uses whatever
                `pip` is on the remote user's $PATH, if any.

        Returns:
            Absolute path to parent directory of package (e.g.
                "/home/ubuntu/miniforge3/lib/python3.12/site-packages")

        Raises:
            OSError: if unable to execute `pip show`
            FileNotFoundError: if package doesn't appear to be installed
        """
        if env is None:
            pip = "pip"
        else:
            pip = f"{self.conda_env(env)}bin/pip"
        try:
            result = self.command(f"{pip} show {package}", _wait=True)
            if len(result.stderr) > 0:
                raise OSError(
                    f"pip show did not run successfully: {result.stderr[0]}"
                )
            return re.search(
                r"Location:\s+(.*?)\n", "".join(result.stdout)
            ).group(1)
        except AttributeError:
            raise FileNotFoundError("package not found")

    def compile_env(self):
        """"""
        pass

    def update(self):
        """Refresh basic state and identification information."""
        self.instance_.load()
        self.state = self.instance_.state["Name"]
        self.ip = getattr(self.instance_, f"{self.address_type}_ip_address")
        self.tags = tag_dict(self.instance_.tags)
        self.name = self.tags.get("Name")

    def wait_until(self, state: Literal[InstanceState], timeout: float = 65):
        """
        Pause execution until the instance reaches the specified state.
        Automatically updates `state` and `ip` attributes.

        Args:
            state: name of target instance state
            timeout: how long, in seconds, to wait before timing out
        """

        # noinspection PyUnresolvedReferences
        assert state in InstanceState.__args__
        waiting, _ = timeout_factory(timeout=timeout)
        while self.state != state:
            waiting()
            self.update()

    def wait_until_running(self, timeout: float = 65):
        """
        Alias for Instance.wait_until('running')

        Args:
            timeout: how long, in seconds, to wait until timing out
        """
        self.wait_until("running", timeout)

    def wait_until_started(self, timeout: float = 65):
        """
        Additional alias for Instance.wait_until('running')

        Args:
            timeout: how long, in seconds, to wait until timing out
        """
        self.wait_until("running", timeout)

    def wait_until_stopped(self, timeout: float = 65):
        """
        Alias for Instance.wait_until('stopped')

        Args:
            timeout: how long, in seconds, to wait before timing out
        """
        self.wait_until("stopped", timeout)

    def wait_until_terminated(self, timeout: float = 65):
        """
        Alias for Instance.wait_until('terminated')

        Args:
            timeout: how long, in seconds, to wait before timing out
        """
        self.wait_until("terminated", timeout)

    def reboot(
        self, wait: bool = True, hard: bool = False, timeout: float = 65
    ):
        """
        Reboot or hard-restart the instance. Note that a hard
        restart will change the instance's ip unless it has been assigned a
        static ip. The Instance object will automatically handle this, but
        other code/processes using it will need to be informed.

        Args:
            wait: if True, block until instance state reaches 'running' again.
            hard: if True, perform a 'hard' restart: fully shut the instance
                down and start it up again. if False, perform a 'soft' restart.
                Note that AWS will automatically switch to a 'hard' restart if
                its attempt at a soft restart fails.
            timeout: seconds to wait for state transitions before timing out.
        """
        if hard is True:
            self.stop()
            self.wait_until_stopped(timeout)
            self.start()
        else:
            self.instance_.reboot()
        if wait is True:
            self.wait_until_running(timeout)

    def restart(
        self, wait: bool = True, hard: bool = False, timeout: float = 65
    ):
        """alias for Instance.reboot()."""
        self.reboot(wait, hard, timeout)

    @connectwrap
    def tunnel(
        self, local_port: int, remote_port: int
    ) -> tuple[Callable, dict[str, Union[int, str, Path]]]:
        """
        create an SSH tunnel between a local port and a remote port.

        Args:
            local_port: port number for local end of tunnel.
            remote_port: port number for remote end of tunnel.

        Returns:
            signaler: function to shut down tunnel
            tunnel_metadata: dict of metadata about the tunnel
        """
        self._ssh.tunnel(local_port, remote_port)
        return self._ssh.tunnels[-1]

    @connectwrap
    def call_python(
        self,
        module: str,
        func: Optional[str] = None,
        payload: Any = None,
        *,
        compression: CallerCompressionType = None,
        serialization: CallerSerializationType = None,
        interpreter: Optional[str] = None,
        env: Optional[str] = None,
        splat: CallerUnpackingOperator = "",
        payload_encoded: bool = False,
        print_result: bool = True,
        filter_kwargs: bool = True,
        **command_kwargs: bool,
    ) -> Viewer:
        """
        call a Python function on the instance. See
        `hostess.caller.generic_python_endpoint()` for more verbose
        documentation and technical discussion.

        Args:
            module: name of, or path to, the target module
            func: name of the function to call. must be a member of the target
                module (or explicitly imported by that module).
            payload: object from which to construct func's call arguments.
                must specify appropriate `serialization` if it cannot be
                reconstructed from its string representation.
            interpreter: path to Python interpreter that should be specified in
                the shell command.
            env: optional name of conda environment. both `interpreter` and
                `env` cannot be specified. If neither are specified, simply
                uses the first `python` binary on the remote user's $PATH,
                if any.
            compression: compression for payload. 'gzip' or None.
            serialization: how to serialize `payload`. 'json' means serialize
                to knownJSON; 'pickle' means serialize using pickle; None means just
                use the string representation of `payload`.
            splat: Operator for splatting the payload into the function call.
                `"*"` means `func(*payload)`, `"**"` means `func(**payload)`;
                None means `func(payload)`.
            payload_encoded: set to True if you have already
                compressed/serialized `payload` with the specified methods.
            print_result: if True, the function call will print its result
                to stdout, so it will be available in the `.out` attribute of
                the returned Viewer.
            filter_kwargs: Attempt to filter `func`-inappropriate kwargs from
                `payload`? Does nothing if `splat != "**"`.
            **command_kwargs: additional kwargs to pass to `self.command()`.
                Note that `_viewer=False` is invalid; this function always
                returns a `Viewer`. Only `RunCommand` meta-option are valid,
                you can't pass extra command-line-type kwargs. If you try, it
                will break the call.

        Returns:
            `Viewer` wrapping executed Python process.
        """
        if (interpreter is not None) and (env is not None):
            raise TypeError(
                "Please pass either the name of a conda environment or the "
                "path to a Python interpreter (one or the other, not both)."
            )
        if env is not None:
            path = self.conda_env(env)
            path = path + "/" if not path.endswith("/") else path
            interpreter = f"{path}bin/python"
        interpreter = "python" if interpreter is None else interpreter
        python_command_string = generic_python_endpoint(
            module,
            func,
            payload,
            compression=compression,
            serialization=serialization,
            splat=splat,
            payload_encoded=payload_encoded,
            return_result=print_result,
            filter_kwargs=filter_kwargs,
            interpreter=interpreter,
            for_bash=True,
        )
        return self.command(
            python_command_string, _viewer=True, **command_kwargs
        )

    # TODO: It is permitted to have more than one security group apply to
    #  an instance. This method will currently modify _all_ security
    #  groups attached to a particular instance. We only really ever use
    #  one security group per instance... but this should be fixed
    #  somehow.
    def rebase_ssh_ingress_ip(
        self,
        ip: Optional[str] = None,
        force: bool = False,
        revoke: bool = True,
        verbose: bool = True
    ):
        """
        Modify this instance's security group(s) to permit SSH access from
        an IP (by default, the caller's external IP). IMPORTANT: by default,
        this method revokes all other inbound access permissions, because it
        is good security practice to not slowly whitelist the entire world.
        Pass `revoke=False `if there are permissions you need to retain.

        Args:
            ip: permit SSH access from this IP. if None, use the caller's
                external IP.
            force: if True, will force modification even of default security
                groups.
            revoke: if True, will revoke all other inbound permissions.
            verbose: if True, print actions to stdout
        """
        for sg_index in self.instance_.security_groups:
            sg = init_resource("ec2").SecurityGroup(sg_index["GroupId"])
            try:
                if revoke is True:
                    revoke_ingress(
                        sg,
                        verbose=verbose,
                        force_modification_of_default_sg=force
                    )
                authorize_ssh_ingress_from_ip(
                    sg,
                    ip=ip,
                    force_modification_of_default_sg=force,
                    verbose=verbose
                )
            except DefaultSecurityGroupError:
                raise DefaultSecurityGroupError(
                    "\tRefusing to modify permissions of a default security "
                    "group. Pass force=True to override."
                )

    def price_per_hour(self):
        return instance_price_per_hour(self)

    def __repr__(self):
        string = f"{self.instance_type} in {self.zone} "
        if self.ip is None:
            string += "(no ip)"
        else:
            string += f"at {self.ip}"
        if self.name is None:
            return f"{self.instance_id}: {string}"
        return f"{self.name} ({self.instance_id}): {string}"

    def __str__(self):
        return self.__repr__()

    key = None
    key_errstring = None
__init__(description, *, uname=GENERAL_DEFAULTS['uname'], key=None, client=None, resource=None, session=None, use_private_ip=False, verbose=True)

Parameters:

Name Type Description Default
description Union[InstanceIdentifier, InstanceDescription]

unique identifier for the instance, either its public / private IP (whichever is accessible from where you're initializing this object), the full instance identifier, or an InstanceDescription as returned by ls_instances().

required
uname str

username for SSH access to the instance.

GENERAL_DEFAULTS['uname']
key Optional[Path]

path to keyfile. You don't usually need to explicitly specify it. The constructor can find it automatically given the following conditions:

  1. You keep the keyfile in a 'standard' location (like ~/.ssh/; see config.config.GENERAL_DEFAULTS['secrets_folders'] for a list) or a directory you specify in config.user_config.GENERAL_DEFAULTS['secrets_folders'],
  2. its filename matches the key name given in the API response describing the instance.

If those aren't both true, you'll need to pass this value to connect to the instance via SSH.

None
client Optional[BaseClient]

boto client. creates default client if not given.

None
resource Optional[ServiceResource]

boto resource. creates default resource if not given.

None
session Optional[Session]

boto session. creates default session if not given.

None
verbose bool

if True, print some changes in status to stdout.

True
Source code in hostess/aws/ec2.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
def __init__(
    self,
    description: Union[InstanceIdentifier, InstanceDescription],
    *,
    uname: str = GENERAL_DEFAULTS["uname"],
    key: Optional[Path] = None,
    client: Optional[botocore.client.BaseClient] = None,
    resource: Optional[boto3.resources.base.ServiceResource] = None,
    session: Optional[boto3.Session] = None,
    use_private_ip: bool = False,
    verbose: bool = True
):
    """
    Args:
        description: unique identifier for the instance, either its public
            / private IP (whichever is accessible from where you're
            initializing this object), the full instance identifier, or
            an InstanceDescription as returned by ls_instances().
        uname: username for SSH access to the instance.
        key: path to keyfile. You don't usually need to explicitly specify
            it. The constructor can find it automatically given the
            following conditions:

            1. You keep the keyfile in a 'standard' location (like
            ~/.ssh/; see config.config.GENERAL_DEFAULTS['secrets_folders']
            for a list) or a directory you specify in
            config.user_config.GENERAL_DEFAULTS['secrets_folders'],
            2. its filename matches the key name given in the API
            response describing the instance.

            If those aren't both true, you'll need to pass this value to
            connect to the instance via SSH.
        client: boto client. creates default client if not given.
        resource: boto resource. creates default resource if not given.
        session: boto session. creates default session if not given.
        verbose: if True, print some changes in status to stdout.
    """
    self.session = session if session is not None else make_boto_session()
    self.resource = init_resource("ec2", resource, self.session)
    self.client = init_client("ec2", client, self.session)
    self.verbose = verbose

    if isinstance(description, str):
        # if it's got periods in it, assume it's a public IPv4 address
        if "." in description:
            instance_id = ls_instances(description, client=self.client)[0][
                "id"
            ]
        # otherwise assume it's the instance id
        else:
            instance_id = description
    # otherwise assume it's a full description like from
    # ls_instances / ec2.describe_instance
    elif "id" in description.keys():
        instance_id = description["id"]
    elif "InstanceId" in description.keys():
        instance_id = description["InstanceId"]
    else:
        raise ValueError("can't interpret this description.")
    instance_ = self.resource.Instance(instance_id)
    self.instance_id = instance_id
    self.address_type = "private" if use_private_ip is True else "public"
    if f"{self.address_type}_ip_address" in dir(instance_):
        self.ip = getattr(instance_, f"{self.address_type}_ip_address")
    self.instance_type = instance_.instance_type
    self.tags = tag_dict(instance_.tags)
    self.launch_time = instance_.launch_time
    self.state = instance_.state["Name"]
    self.request_cache = []
    self.name = self.tags.get("Name")
    self.zone = instance_.placement["AvailabilityZone"]
    self.uname, self.passed_key, self._ssh = uname, key, None
    self.instance_ = instance_
_check_unready()

Is the instance obviously not ready for SSH connections?

Returns:

Type Description
Union[str, bool]

"state" if it is not running or transitioning to running; comma-separated list of "ip"/"uname"/"key" if any are missing. Otherwise False (meaning it looks ready).

Source code in hostess/aws/ec2.py
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
def _check_unready(self) -> Union[str, bool]:
    """
    Is the instance obviously not ready for SSH connections?

    Returns:
        "state" if it is not running or transitioning to running;
            comma-separated list of "ip"/"uname"/"key" if any are missing.
            Otherwise False (meaning it looks ready).
    """
    if self.state not in ("running", "pending"):
        return "state"
    none = [p for p in ("ip", "uname", "key") if getattr(self, p) is None]
    if len(none) > 0:
        return ", ".join(none)
    return False
_drop_ssh()

close an existing SSH connection and attempt to purge it from memory. should only be called by other methods of Instance.

Source code in hostess/aws/ec2.py
638
639
640
641
642
643
644
645
def _drop_ssh(self):
    """
    close an existing SSH connection and attempt to purge it from memory.
    should only be called by other methods of Instance.
    """
    self._ssh.close()
    del self._ssh
    self._ssh = None
_prep_connection(*, lazy=True, maxtries=5, delay=1)

try to prep, and optionally establish, a SSH connection to the instance. if no closed / latent connection exists, create one; otherwise, use the existing one. if the instance isn't running, automatically replace any existing connection (which will be closed anyway by then, or should be).

Parameters:

Name Type Description Default
lazy bool

don't establish the connection immediately; wait until some method needs it. other arguments do nothing if this is True.

True
maxtries int

maximum times to re-attempt failed connections

5
delay float

how many seconds to wait after subsequent attempts

1
Source code in hostess/aws/ec2.py
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
def _prep_connection(
    self, *, lazy: bool = True, maxtries: int = 5, delay: float = 1
):
    """
    try to prep, and optionally establish, a SSH connection to the
    instance. if no closed / latent connection exists, create one;
    otherwise, use the existing one. if the instance isn't running,
    automatically replace any existing connection (which will be closed
    anyway by then, or should be).

    Args:
        lazy: don't establish the connection immediately; wait until some
            method needs it. other arguments do nothing if this is True.
        maxtries: maximum times to re-attempt failed connections
        delay: how many seconds to wait after subsequent attempts
    """
    if self.is_connected:  # nothing to do
        return
    self._maybe_find_key()
    if self.key_errstring is not None:
        # we want to raise this error immediately
        raise NoKeyError(self.key_errstring)
    self._update_ssh_info()
    if self._ssh is None:
        self._ssh = SSH.connect(self.ip, self.uname, self.key)
    if lazy is True:
        return
    connection_error = None
    for attempt in range(maxtries):
        try:
            self._ssh.conn.open()
            return
        except (
            AttributeError,
            SSHException,
            NoValidConnectionsError,
        ) as ce:
            connection_error = ce
            time.sleep(delay)
            self.update()
    raise ConnectionError(
        f"Unable to establish SSH connection to instance. It may not yet "
        f"be ready to accept SSH connections, or something might be wrong "
        f"with configuration. Reported error: {connection_error}"
    )
_update_ssh_info()

update SSH connectability info about Instance. raise an error if required info is not available. automatically remove any existing prepped connection if instance is not running.

Source code in hostess/aws/ec2.py
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
def _update_ssh_info(self):
    """
    update SSH connectability info about Instance. raise an error if
    required info is not available. automatically remove any existing
    prepped connection if instance is not running.
    """
    self.update()
    if (unready := self._check_unready()) is False:
        return
    errstring = f"Unable to execute commands on {self.instance_id}. "
    number = iter(range(1, 5))
    if "state" in unready:
        errstring += (
            f"{next(number)}. It is currently not running. Try starting "
            f"the instance with .start()."
        )
        if self._ssh is not None:
            self._drop_ssh()
    # only mention missing IP if instance is running -- we don't expect
    # a stopped instance to have an IP.
    elif "ip" in unready:
        errstring += (
            f"{next(number)}. Cannot find IP for instance. It "
            f"may be in the process of IP assignment; try waiting a "
            f"moment. It may also be configured to have no appropriate IP."
        )
    if "key" in unready:
        errstring += f"{next(number)}. {self.key_errstring}"
    raise ConnectionError(errstring)
call_python(module, func=None, payload=None, *, compression=None, serialization=None, interpreter=None, env=None, splat='', payload_encoded=False, print_result=True, filter_kwargs=True, **command_kwargs)

call a Python function on the instance. See hostess.caller.generic_python_endpoint() for more verbose documentation and technical discussion.

Parameters:

Name Type Description Default
module str

name of, or path to, the target module

required
func Optional[str]

name of the function to call. must be a member of the target module (or explicitly imported by that module).

None
payload Any

object from which to construct func's call arguments. must specify appropriate serialization if it cannot be reconstructed from its string representation.

None
interpreter Optional[str]

path to Python interpreter that should be specified in the shell command.

None
env Optional[str]

optional name of conda environment. both interpreter and env cannot be specified. If neither are specified, simply uses the first python binary on the remote user's $PATH, if any.

None
compression CallerCompressionType

compression for payload. 'gzip' or None.

None
serialization CallerSerializationType

how to serialize payload. 'json' means serialize to knownJSON; 'pickle' means serialize using pickle; None means just use the string representation of payload.

None
splat CallerUnpackingOperator

Operator for splatting the payload into the function call. "*" means func(*payload), "**" means func(**payload); None means func(payload).

''
payload_encoded bool

set to True if you have already compressed/serialized payload with the specified methods.

False
print_result bool

if True, the function call will print its result to stdout, so it will be available in the .out attribute of the returned Viewer.

True
filter_kwargs bool

Attempt to filter func-inappropriate kwargs from payload? Does nothing if splat != "**".

True
**command_kwargs bool

additional kwargs to pass to self.command(). Note that _viewer=False is invalid; this function always returns a Viewer. Only RunCommand meta-option are valid, you can't pass extra command-line-type kwargs. If you try, it will break the call.

{}

Returns:

Type Description
Viewer

Viewer wrapping executed Python process.

Source code in hostess/aws/ec2.py
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
@connectwrap
def call_python(
    self,
    module: str,
    func: Optional[str] = None,
    payload: Any = None,
    *,
    compression: CallerCompressionType = None,
    serialization: CallerSerializationType = None,
    interpreter: Optional[str] = None,
    env: Optional[str] = None,
    splat: CallerUnpackingOperator = "",
    payload_encoded: bool = False,
    print_result: bool = True,
    filter_kwargs: bool = True,
    **command_kwargs: bool,
) -> Viewer:
    """
    call a Python function on the instance. See
    `hostess.caller.generic_python_endpoint()` for more verbose
    documentation and technical discussion.

    Args:
        module: name of, or path to, the target module
        func: name of the function to call. must be a member of the target
            module (or explicitly imported by that module).
        payload: object from which to construct func's call arguments.
            must specify appropriate `serialization` if it cannot be
            reconstructed from its string representation.
        interpreter: path to Python interpreter that should be specified in
            the shell command.
        env: optional name of conda environment. both `interpreter` and
            `env` cannot be specified. If neither are specified, simply
            uses the first `python` binary on the remote user's $PATH,
            if any.
        compression: compression for payload. 'gzip' or None.
        serialization: how to serialize `payload`. 'json' means serialize
            to knownJSON; 'pickle' means serialize using pickle; None means just
            use the string representation of `payload`.
        splat: Operator for splatting the payload into the function call.
            `"*"` means `func(*payload)`, `"**"` means `func(**payload)`;
            None means `func(payload)`.
        payload_encoded: set to True if you have already
            compressed/serialized `payload` with the specified methods.
        print_result: if True, the function call will print its result
            to stdout, so it will be available in the `.out` attribute of
            the returned Viewer.
        filter_kwargs: Attempt to filter `func`-inappropriate kwargs from
            `payload`? Does nothing if `splat != "**"`.
        **command_kwargs: additional kwargs to pass to `self.command()`.
            Note that `_viewer=False` is invalid; this function always
            returns a `Viewer`. Only `RunCommand` meta-option are valid,
            you can't pass extra command-line-type kwargs. If you try, it
            will break the call.

    Returns:
        `Viewer` wrapping executed Python process.
    """
    if (interpreter is not None) and (env is not None):
        raise TypeError(
            "Please pass either the name of a conda environment or the "
            "path to a Python interpreter (one or the other, not both)."
        )
    if env is not None:
        path = self.conda_env(env)
        path = path + "/" if not path.endswith("/") else path
        interpreter = f"{path}bin/python"
    interpreter = "python" if interpreter is None else interpreter
    python_command_string = generic_python_endpoint(
        module,
        func,
        payload,
        compression=compression,
        serialization=serialization,
        splat=splat,
        payload_encoded=payload_encoded,
        return_result=print_result,
        filter_kwargs=filter_kwargs,
        interpreter=interpreter,
        for_bash=True,
    )
    return self.command(
        python_command_string, _viewer=True, **command_kwargs
    )
command(command, *args, _viewer=True, _wait=False, _quiet=True, **kwargs)

run a command in the instance's default interpreter.

Parameters:

Name Type Description Default
command str

command name or full text of command (see hostess.subutils.RunCommand.__call__() for details on calling conventions).

required
*args Union[int, str, float]

args to pass to self._ssh.__call__().

()
_viewer bool

if True, return a Viewer object. otherwise return unwrapped result from self._ssh.__call__().

True
_wait bool

if True, block until command terminates (or connection fails). _w is an alias.

False
_quiet bool

if False, print stdout and stderr, should the process return any before this function terminates. Generally best used with _wait=True.

True
**kwargs Union[int, str, float, bool]

kwargs to pass to self._ssh.__call__().

{}

Returns:

Type Description
Processlike

object representing executed process.

Source code in hostess/aws/ec2.py
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
@connectwrap
def command(
    self,
    command: str,
    *args: Union[int, str, float],
    _viewer: bool = True,
    _wait: bool = False,
    _quiet: bool = True,
    **kwargs: Union[int, str, float, bool],
) -> Processlike:
    """
    run a command in the instance's default interpreter.

    Args:
        command: command name or full text of command
            (see `hostess.subutils.RunCommand.__call__()` for details on
            calling conventions).
        *args: args to pass to `self._ssh.__call__()`.
        _viewer: if `True`, return a `Viewer` object. otherwise return
            unwrapped result from `self._ssh.__call__()`.
        _wait: if `True`, block until command terminates (or connection
            fails). _w is an alias.
        _quiet: if `False`, print stdout and stderr, should the process
            return any before this function terminates. Generally best
            used with _wait=True.
        **kwargs: kwargs to pass to `self._ssh.__call__()`.

    Returns:
        object representing executed process.
    """
    return self._ssh(
        command,
        *args,
        _viewer=_viewer,
        _wait=_wait,
        _quiet=_quiet,
        **kwargs,
    )
commands(commands, op='then', _con=False, **kwargs)

Remotely run a multi-part shell command. Convenience method for constructing long shell instructions like this && that && theother && etcetera.

Parameters:

Name Type Description Default
commands Sequence[str]

commands to chain together.

required
op Literal['and', 'xor', 'then']

logical operator to connect commands.

'then'
_con bool

run 'console-style', pretty-printing rather than returning output

False

Returns:

Type Description
Optional[Processlike]

abstraction representing executed process, or None if _con is True.

Source code in hostess/aws/ec2.py
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
@connectwrap
def commands(
    self,
    commands: Sequence[str],
    op: Literal["and", "xor", "then"] = "then",
    _con: bool = False,
    **kwargs,
) -> Optional[Processlike]:
    """
    Remotely run a multi-part shell command. Convenience method
    for constructing long shell instructions like
    `this && that && theother && etcetera`.

    Args:
        commands: commands to chain together.
        op: logical operator to connect commands.
        _con: run 'console-style', pretty-printing rather than
            returning output

    Returns:
        abstraction representing executed process, or None if
            _con is True.
    """
    if _con is True:
        return self.con(hs.chain(commands, op), **kwargs)
    return self.command(hs.chain(commands, op), **kwargs)
compile_env()
Source code in hostess/aws/ec2.py
1024
1025
1026
def compile_env(self):
    """"""
    pass
con(command, *args, _poll=0.05, _timeout=None, _return_viewer=False, **kwargs)

pretend you are running a command on the instance while looking at a terminal emulator. pauses for output and pretty-prints it to stdout.

Does not return a process abstraction by default (pass _return_viewer=True if you want one). Fun in interactive environments.

Parameters:

Name Type Description Default
command str

command name or full text of command (see hostess.subutils.RunCommand.__call__() for details on calling conventions).

required
_poll float

polling rate for process output, in seconds

0.05
_timeout Optional[float]

if not None, raise a TimeoutError if this many seconds pass before receiving additional output from process (or process exit).

None
_return_viewer bool

if True, return a Viewer for the process once it exits. Otherwise, return None.

False
**kwargs Union[int, str, float, bool]

kwargs to pass to Instance.command().

{}

Returns:

Type Description
Optional[Viewer]

A Viewer if _return_viewer is True; otherwise None.

Source code in hostess/aws/ec2.py
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
@connectwrap
def con(
    self,
    command: str,
    *args: Union[int, str, float],
    _poll: float = 0.05,
    _timeout: Optional[float] = None,
    _return_viewer: bool = False,
    **kwargs: Union[int, str, float, bool],
) -> Optional[Viewer]:
    """
    pretend you are running a command on the instance while looking at a
    terminal emulator. pauses for output and pretty-prints it to stdout.

    Does not return a process abstraction by default (pass
    _return_viewer=True if you want one). Fun in interactive environments.

    Args:
        command: command name or full text of command
            (see `hostess.subutils.RunCommand.__call__()` for details on
            calling conventions).
        _poll: polling rate for process output, in seconds
        _timeout: if not None, raise a TimeoutError if this many seconds
            pass before receiving additional output from process (or
            process exit).
        _return_viewer: if True, return a Viewer for the process once it
            exits. Otherwise, return None.
        **kwargs: kwargs to pass to Instance.command().

    Returns:
        A Viewer if _return_viewer is True; otherwise None.
    """
    return self._ssh.con(
        command,
        *args,
        _poll=_poll,
        _timeout=_timeout,
        _return_viewer=_return_viewer,
        **kwargs,
    )
conda_env(env='base') cached

Find the root directory of a named conda environment on the instance.

Parameters:

Name Type Description Default
env str

name of conda environment

'base'

Returns:

Type Description
str

absolute path to root directory of conda environment.

Raises:

Type Description
FileNotFoundError

if environment cannot be found.

Source code in hostess/aws/ec2.py
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
@cache
@connectwrap
def conda_env(self, env: str = "base") -> str:
    """
    Find the root directory of a named conda environment on the instance.

    Args:
        env: name of conda environment

    Returns:
        absolute path to root directory of conda environment.

    Raises:
        FileNotFoundError: if environment cannot be found.
    """
    return find_conda_env(self._ssh, env)
connect(maxtries=5, delay=1)

establish SSH connection to the instance, prepping a new connection if none currently exists, but not replacing an existing one.

Parameters:

Name Type Description Default
maxtries int

maximum times to re-attempt failed connections

5
delay float

how many seconds to wait after failed attempts

1
Source code in hostess/aws/ec2.py
488
489
490
491
492
493
494
495
496
497
def connect(self, maxtries: int = 5, delay: float = 1):
    """
    establish SSH connection to the instance, prepping a new connection
    if none currently exists, but not replacing an existing one.

    Args:
        maxtries: maximum times to re-attempt failed connections
        delay: how many seconds to wait after failed attempts
    """
    self._prep_connection(lazy=False, maxtries=maxtries, delay=delay)
find(identifier=None, states=('running', 'pending', 'stopping', 'stopped'), raw_filters=None, client=None, session=None, long=False, tag_regex=True, uname=GENERAL_DEFAULTS['uname'], *, key=None, resource=None, use_private_ip=False, pick_first=False, verbose=True, **tag_filters) classmethod

Forwards relevant passed arguments to ls_instances() and returns an Instance constructed from the first match, passing relevant arguments to Instance.__init__(). It also accepts one unique argument, pick_first; if True, it will return the first instance found in the case of multiple matches; otherwise (default) raise a ValueError.

Arguments passed to ls_instances(): identifier, states, raw_filters, client, session, long, tag_regex, tag_filters

Arguments passed to Instance.__init__(): uname, key, client, resource, session, use_private_ip, verbose

See documentation for those functions for full discussion of those arguments.

Raises:

Type Description
KeyError

If ls_instances() finds no matching instances.

ValueError

If ls_instances finds more than one matching instance and pick_first is not True.

Source code in hostess/aws/ec2.py
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
@classmethod
def find(
    cls,
    identifier: Optional[InstanceIdentifier] = None,
    states: Sequence[str] = ("running", "pending", "stopping", "stopped"),
    raw_filters: Optional[Sequence[Mapping[str, str]]] = None,
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
    long: bool = False,
    tag_regex: bool = True,
    uname: str = GENERAL_DEFAULTS["uname"],
    *,
    key: Optional[Path] = None,
    resource: Optional[boto3.resources.base.ServiceResource] = None,
    use_private_ip: bool = False,
    pick_first: bool = False,
    verbose: bool = True,
    **tag_filters: str
) -> "Instance":
    """
    Forwards relevant passed arguments to `ls_instances()` and returns an
    Instance constructed from the first match, passing relevant arguments
    to `Instance.__init__()`. It also accepts one unique argument,
    `pick_first`; if `True`, it will return the first instance found in
    the case of multiple matches; otherwise (default) raise a ValueError.

    Arguments passed to `ls_instances()`:
        `identifier`, `states`, `raw_filters`, `client`, `session`,
        `long`, `tag_regex`, `tag_filters`

    Arguments passed to `Instance.__init__()`:
        `uname`, `key`, `client`, `resource`, `session`, `use_private_ip`,
        `verbose`

    See documentation for those functions for full discussion of those
    arguments.

    Raises:
        KeyError: If `ls_instances()` finds no matching instances.
        ValueError: If `ls_instances` finds more than one matching
            instance and `pick_first` is not True.
    """
    search_result = ls_instances(identifier, states, raw_filters, client,
                                 session, long, tag_regex, **tag_filters)
    if len(search_result) == 0:
        raise KeyError("No matching instances found")
    if len(search_result) > 1 and pick_first is not True:
        raise ValueError(
            "More than one matching instance found. Pass pick_first=True "
            "to select the first matching instance. Note that instance "
            "order in search results is undefined and may not be "
            "consistent across multiple calls to this function."
        )
    return Instance(search_result[0], uname=uname, key=key,
                    resource=resource, use_private_ip=use_private_ip,
                    client=client, session=session, verbose=verbose)
find_package(package, env=None) cached

Find the location of an installed Python package on the instance using pip show.

Parameters:

Name Type Description Default
package str

name of package (e.g. 'requests')

required
env Optional[str]

optional name of conda environment. If None, uses whatever pip is on the remote user's $PATH, if any.

None

Returns:

Type Description
str

Absolute path to parent directory of package (e.g. "/home/ubuntu/miniforge3/lib/python3.12/site-packages")

Raises:

Type Description
OSError

if unable to execute pip show

FileNotFoundError

if package doesn't appear to be installed

Source code in hostess/aws/ec2.py
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
@cache
@connectwrap
def find_package(self, package: str, env: Optional[str] = None) -> str:
    """
    Find the location of an installed Python package on the instance using
    `pip show`.

    Args:
        package: name of package (e.g. 'requests')
        env: optional name of conda environment. If None, uses whatever
            `pip` is on the remote user's $PATH, if any.

    Returns:
        Absolute path to parent directory of package (e.g.
            "/home/ubuntu/miniforge3/lib/python3.12/site-packages")

    Raises:
        OSError: if unable to execute `pip show`
        FileNotFoundError: if package doesn't appear to be installed
    """
    if env is None:
        pip = "pip"
    else:
        pip = f"{self.conda_env(env)}bin/pip"
    try:
        result = self.command(f"{pip} show {package}", _wait=True)
        if len(result.stderr) > 0:
            raise OSError(
                f"pip show did not run successfully: {result.stderr[0]}"
            )
        return re.search(
            r"Location:\s+(.*?)\n", "".join(result.stdout)
        ).group(1)
    except AttributeError:
        raise FileNotFoundError("package not found")
get(source, target, *args, **kwargs)

copy file from instance to local.

Parameters:

Name Type Description Default
source Union[str, Path]

path to file on instance

required
target Union[str, Path, IO]

path to local file, or a filelike object (such as io.BytesIO)

required
*args Any

args to pass to underlying get method

()
**kwargs Any

kwargs to pass to underlying get method

{}

Returns:

Type Description
dict

dict giving transfer metadata: local, remote, host, port, and this instance's name, if any

Source code in hostess/aws/ec2.py
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
@connectwrap
def get(
    self,
    source: Union[str, Path],
    target: Union[str, Path, IO],
    *args: Any,
    **kwargs: Any,
) -> dict:
    """
    copy file from instance to local.

    Args:
        source: path to file on instance
        target: path to local file, or a filelike object (such as
            io.BytesIO)
        *args: args to pass to underlying get method
        **kwargs: kwargs to pass to underlying get method

    Returns:
        dict giving transfer metadata: local, remote, host, port, and this
            instance's name, if any
    """
    result = self._ssh.get(source, target, *args, **kwargs)
    if self.name is not None:
        result["name"] = self.name
    return result
install_conda(installer_url=CONDA_DEFAULTS['installer_url'], prefix=CONDA_DEFAULTS['prefix'], **kwargs)

install a Conda Python distribution on the instance.

Parameters:

Name Type Description Default
installer_url str

url of install script; by default, the latest miniforge3 Linux x86_64 installer.

CONDA_DEFAULTS['installer_url']
prefix str

path for Conda installation. If a Conda installation already exists at this path, it will be updated. Defaults to $HOME/miniforge3.

CONDA_DEFAULTS['prefix']
kwargs bool

kwargs to pass to self.commands(). Only meta-options are recommended.

{}

Returns:

Type Description
Processlike

Output of self.commands() for installer script fetch/execution.

Source code in hostess/aws/ec2.py
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
@connectwrap
def install_conda(
    self,
    installer_url: str = CONDA_DEFAULTS['installer_url'],
    prefix: str = CONDA_DEFAULTS['prefix'],
    **kwargs: bool
) -> Processlike:
    """
    install a Conda Python distribution on the instance.

    Args:
        installer_url: url of install script; by default, the latest
            miniforge3 Linux x86_64 installer.
        prefix: path for Conda installation. If a Conda installation
            already exists at this path, it will be updated. Defaults
            to $HOME/miniforge3.
        kwargs: kwargs to pass to `self.commands()`. Only meta-options are
            recommended.

    Returns:
        Output of `self.commands()` for installer script fetch/execution.
    """
    # noinspection PyArgumentList
    return self.commands(
        [
            f"wget {installer_url}",
            f"sh {Path(installer_url).name} -b -u -p {prefix}"
        ],
        "and",
        **kwargs
    )
launch(template=None, options=None, tags=None, client=None, session=None, wait=True, connect=False, maxtries=40, **instance_kwargs) classmethod

launch a single instance. This is a thin wrapper for Cluster.launch() with count=1. See that function for full documentation.

Returns:

Type Description
Instance

an Instance associated with a newly-launched EC2 instance.

Source code in hostess/aws/ec2.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
@classmethod
def launch(
    cls,
    template=None,
    options=None,
    tags=None,
    client=None,
    session=None,
    wait=True,
    connect=False,
    maxtries: int = 40,
    **instance_kwargs: Union[
        str,
        botocore.client.BaseClient,
        boto3.resources.base.ServiceResource,
        boto3.Session,
        Path,
        bool,
    ],
) -> "Instance":
    """
    launch a single instance. This is a thin wrapper for
    `Cluster.launch()` with `count=1`. See that function for full
    documentation.

    Returns:
        an Instance associated with a newly-launched EC2 instance.
    """
    return Cluster.launch(
        1,
        template,
        options,
        tags,
        client,
        session,
        wait,
        connect,
        maxtries,
        **instance_kwargs,
    )[0]
make_ssh_string()

Convenience method that returns a terminal command a local user might run in a shell, assuming correct system configuration, in order to start an interactive SSH session on this instance. This performs no system-level verification that this command will actually work (for instance, it does not check to see if ssh is locally installed). This does not imply that Instance executed, or will execute, this command at any point (it did not and will not).

Returns:

Type Description
str

A string that, if run in a local shell, may start an interactive

str

SSH session.

Source code in hostess/aws/ec2.py
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
def make_ssh_string(self) -> str:
    """
    Convenience method that returns a terminal command a local user might
    run in a shell, assuming correct system configuration, in order
    to start an interactive SSH session on this instance. This performs
    no system-level verification that this command will actually work
    (for instance, it does not check to see if `ssh` is locally
    installed). This does not imply that `Instance` executed, or will
    execute, this command at any point (it did not and will not).

    Returns:
        A string that, if run in a local shell, may start an interactive
        SSH session.
    """
    if self.key is None or self.ip is None:
        raise ValueError(
            "No connection established to instance. Cannot confirm "
            "keyfile or ip. Run self.connect() first."
        )
    return f"ssh -i {self.key} {self.uname}@{self.ip}"
notebook(**connect_kwargs)

execute a Jupyter Notebook on the instance and establish a tunnel for local access.

Parameters:

Name Type Description Default
connect_kwargs Union[int, str, bool]

arguments for notebook execution/connection. see ssh.jupyter_connect() for complete signature.

{}

Returns:

Type Description
NotebookConnection

structure containing results of tunneled Notebook execution.

Source code in hostess/aws/ec2.py
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
@connectwrap
def notebook(
    self, **connect_kwargs: Union[int, str, bool]
) -> NotebookConnection:
    """
    execute a Jupyter Notebook on the instance and establish a tunnel for
    local access.

    Args:
        connect_kwargs: arguments for notebook execution/connection. see
            `ssh.jupyter_connect()` for complete signature.

    Returns:
        structure containing results of tunneled Notebook execution.
    """
    self._prep_connection()
    return jupyter_connect(self._ssh, **connect_kwargs)
put(source, target, *args, literal_str=False, **kwargs)

write local file or in-memory data to target file on instance.

Parameters:

Name Type Description Default
source Union[str, Path, IO, bytes]

filelike object or path to local file. note that filelike objects will be closed during put operation.

required
target Union[str, Path]

write path on instance

required
args Any

additional arguments to pass to underlying put method

()
literal_str bool

if True and source is a str, write source into target as text rather than interpreting source as a path to a local file

False
kwargs Any

additional kwargs to pass to underlying put command

{}

Returns:

Type Description
dict

dict giving transfer metadata: local, remote, host, and port

Source code in hostess/aws/ec2.py
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
@connectwrap
def put(
    self,
    source: Union[str, Path, IO, bytes],
    target: Union[str, Path],
    *args: Any,
    literal_str: bool = False,
    **kwargs: Any,
) -> dict:
    """
    write local file or in-memory data to target file on instance.

    Args:
        source: filelike object or path to local file. note that filelike
            objects will be closed during put operation.
        target: write path on instance
        args: additional arguments to pass to underlying put method
        literal_str: if True and `source` is a `str`, write `source`
            into `target` as text rather than interpreting `source` as a
            path to a local file
        kwargs: additional kwargs to pass to underlying put command

    Returns:
        dict giving transfer metadata: local, remote, host, and port
    """
    return self._ssh.put(
        source, target, *args, literal_str=literal_str, **kwargs
    )
read(source, mode='r', encoding='utf-8', as_buffer=False)

read a file from the instance directly into memory.

Parameters:

Name Type Description Default
source Union[str, Path]

path to file on instance

required
mode Literal['r', 'rb']

'r' to read file as text; 'rb' to read file as bytes

'r'
encoding str

encoding for text, used only if mode is 'r'

'utf-8'
as_buffer bool

if True, return BytesIO/StringIO; if False, return bytes/str

False

Returns:

Type Description
Union[BytesIO, StringIO, bytes, str]

Buffer containing contents of remote file

Source code in hostess/aws/ec2.py
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
@connectwrap
def read(
    self,
    source: Union[str, Path],
    mode: Literal["r", "rb"] = "r",
    encoding: str = "utf-8",
    as_buffer: bool = False,
) -> Union[io.BytesIO, io.StringIO, bytes, str]:
    """
    read a file from the instance directly into memory.

    Args:
        source: path to file on instance
        mode: 'r' to read file as text; 'rb' to read file as bytes
        encoding: encoding for text, used only if `mode` is 'r'
        as_buffer: if True, return BytesIO/StringIO; if False, return
            bytes/str

    Returns:
        Buffer containing contents of remote file
    """
    return self._ssh.read(source, mode, encoding, as_buffer)
read_csv(source, encoding='utf-8', **csv_kwargs)

read a CSV-like file from the instance into a pandas DataFrame.

Parameters:

Name Type Description Default
source Union[str, Path]

path to CSV-like file on instance

required
encoding str

encoding for text

'utf-8'
csv_kwargs Any

kwargs to pass to pd.read_csv

{}

Returns:

Type Description
DataFrame

DataFrame created from contents of remote CSV file

Source code in hostess/aws/ec2.py
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
@connectwrap
def read_csv(
    self,
    source: Union[str, Path],
    encoding: str = "utf-8",
    **csv_kwargs: Any,
) -> pd.DataFrame:
    """
    read a CSV-like file from the instance into a pandas DataFrame.

    Args:
        source: path to CSV-like file on instance
        encoding: encoding for text
        csv_kwargs: kwargs to pass to pd.read_csv

    Returns:
        DataFrame created from contents of remote CSV file
    """
    return self._ssh.read_csv(source, encoding, csv_kwargs)
rebase_ssh_ingress_ip(ip=None, force=False, revoke=True, verbose=True)

Modify this instance's security group(s) to permit SSH access from an IP (by default, the caller's external IP). IMPORTANT: by default, this method revokes all other inbound access permissions, because it is good security practice to not slowly whitelist the entire world. Pass revoke=Falseif there are permissions you need to retain.

Parameters:

Name Type Description Default
ip Optional[str]

permit SSH access from this IP. if None, use the caller's external IP.

None
force bool

if True, will force modification even of default security groups.

False
revoke bool

if True, will revoke all other inbound permissions.

True
verbose bool

if True, print actions to stdout

True
Source code in hostess/aws/ec2.py
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
def rebase_ssh_ingress_ip(
    self,
    ip: Optional[str] = None,
    force: bool = False,
    revoke: bool = True,
    verbose: bool = True
):
    """
    Modify this instance's security group(s) to permit SSH access from
    an IP (by default, the caller's external IP). IMPORTANT: by default,
    this method revokes all other inbound access permissions, because it
    is good security practice to not slowly whitelist the entire world.
    Pass `revoke=False `if there are permissions you need to retain.

    Args:
        ip: permit SSH access from this IP. if None, use the caller's
            external IP.
        force: if True, will force modification even of default security
            groups.
        revoke: if True, will revoke all other inbound permissions.
        verbose: if True, print actions to stdout
    """
    for sg_index in self.instance_.security_groups:
        sg = init_resource("ec2").SecurityGroup(sg_index["GroupId"])
        try:
            if revoke is True:
                revoke_ingress(
                    sg,
                    verbose=verbose,
                    force_modification_of_default_sg=force
                )
            authorize_ssh_ingress_from_ip(
                sg,
                ip=ip,
                force_modification_of_default_sg=force,
                verbose=verbose
            )
        except DefaultSecurityGroupError:
            raise DefaultSecurityGroupError(
                "\tRefusing to modify permissions of a default security "
                "group. Pass force=True to override."
            )
reboot(wait=True, hard=False, timeout=65)

Reboot or hard-restart the instance. Note that a hard restart will change the instance's ip unless it has been assigned a static ip. The Instance object will automatically handle this, but other code/processes using it will need to be informed.

Parameters:

Name Type Description Default
wait bool

if True, block until instance state reaches 'running' again.

True
hard bool

if True, perform a 'hard' restart: fully shut the instance down and start it up again. if False, perform a 'soft' restart. Note that AWS will automatically switch to a 'hard' restart if its attempt at a soft restart fails.

False
timeout float

seconds to wait for state transitions before timing out.

65
Source code in hostess/aws/ec2.py
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
def reboot(
    self, wait: bool = True, hard: bool = False, timeout: float = 65
):
    """
    Reboot or hard-restart the instance. Note that a hard
    restart will change the instance's ip unless it has been assigned a
    static ip. The Instance object will automatically handle this, but
    other code/processes using it will need to be informed.

    Args:
        wait: if True, block until instance state reaches 'running' again.
        hard: if True, perform a 'hard' restart: fully shut the instance
            down and start it up again. if False, perform a 'soft' restart.
            Note that AWS will automatically switch to a 'hard' restart if
            its attempt at a soft restart fails.
        timeout: seconds to wait for state transitions before timing out.
    """
    if hard is True:
        self.stop()
        self.wait_until_stopped(timeout)
        self.start()
    else:
        self.instance_.reboot()
    if wait is True:
        self.wait_until_running(timeout)
reconnect(maxtries=5, delay=1)

create and attempt to establish a new SSH connection to the instance, closing any existing one. Note that this will immediately terminate any non-daemonized processes previously executed over the existing connection.

Parameters:

Name Type Description Default
maxtries int

maximum times to re-attempt failed connections

5
delay float

how many seconds to wait after failed attempts

1
Source code in hostess/aws/ec2.py
499
500
501
502
503
504
505
506
507
508
509
510
511
512
def reconnect(self, maxtries: int = 5, delay: float = 1):
    """
    create and attempt to establish a new SSH connection to the instance,
    closing any existing one.
    Note that this will immediately terminate any non-daemonized processes
    previously executed over the existing connection.

    Args:
        maxtries: maximum times to re-attempt failed connections
        delay: how many seconds to wait after failed attempts
    """
    if self._ssh is not None:
        self._drop_ssh()
    self._prep_connection(lazy=False, maxtries=maxtries, delay=delay)
rename(name)

Rename the instance. Does not rename volumes or network interfaces. Updates local instance state cache when called.

Parameters:

Name Type Description Default
name str

new name for instance.

required
Source code in hostess/aws/ec2.py
343
344
345
346
347
348
349
350
351
352
353
354
def rename(self, name: str):
    """
    Rename the instance. Does not rename volumes or network interfaces.
    Updates local instance state cache when called.

    Args:
        name: new name for instance.
    """
    self.client.create_tags(
        Resources=[self.instance_id], Tags=[{"Key": "Name", "Value": name}]
    )
    self.update()
restart(wait=True, hard=False, timeout=65)

alias for Instance.reboot().

Source code in hostess/aws/ec2.py
1115
1116
1117
1118
1119
def restart(
    self, wait: bool = True, hard: bool = False, timeout: float = 65
):
    """alias for Instance.reboot()."""
    self.reboot(wait, hard, timeout)
start(return_response=False, wait=True, connect=False, maxtries=40)

Start the instance.

Parameters:

Name Type Description Default
return_response bool

if True, return API response.

False
wait bool

if True, wait until instance is running.

True
connect bool

if True, wait until the instance is connectable via SSH or we have tried to connect maxtries times.

False
maxtries int

max number of times to attempt connection (5s delay in between).

40

Returns:

Type Description
Optional[dict]

API response if return_response is True; otherwise None.

Source code in hostess/aws/ec2.py
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
def start(
    self,
    return_response: bool = False,
    wait: bool = True,
    connect: bool = False,
    maxtries: int = 40,
) -> Optional[dict]:
    """
    Start the instance.

    Args:
        return_response: if True, return API response.
        wait: if True, wait until instance is running.
        connect: if True, wait until the instance is connectable via SSH
            or we have tried to connect `maxtries` times.
        maxtries: max number of times to attempt connection (5s delay in
            between).

    Returns:
        API response if `return_response` is True; otherwise None.
    """
    response = self.instance_.start()
    self.update()
    if (wait is True) and (connect is False):
        if self.verbose is True:
            print("waiting until instance is running...", end="")
        self.wait_until_running()
        if self.verbose is True:
            print("running.")
    if connect is True:
        self.wait_until_running()
        self.wait_on_connection(maxtries)
    if return_response is True:
        return response
stop(return_response=False)

Stop the instance.

Parameters:

Name Type Description Default
return_response bool

if True, return API response.

False
Return

API response if return_response is True; otherwise None.

Source code in hostess/aws/ec2.py
840
841
842
843
844
845
846
847
848
849
850
851
852
853
def stop(self, return_response: bool = False) -> Optional[dict]:
    """
    Stop the instance.

    Args:
        return_response: if True, return API response.

    Return:
        API response if `return_response` is True; otherwise None.
    """
    response = self.instance_.stop()
    self.update()
    if return_response is True:
        return response
terminate(return_response=False)

Terminate (aka delete) the instance. The royal road to cloud cost management. Please note that this action is permanent and cannot be undone.

Parameters:

Name Type Description Default
return_response bool

if True, return the API response.

False

Returns:

Type Description
Optional[dict]

API response if return_response is True; otherwise None.

Source code in hostess/aws/ec2.py
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
def terminate(self, return_response: bool = False) -> Optional[dict]:
    """
    Terminate (aka delete) the instance. The royal road to cloud cost
    management. Please note that this action is permanent and cannot be
    undone.

    Args:
        return_response: if True, return the API response.

    Returns:
        API response if `return_response` is True; otherwise None.
    """
    response = self.instance_.terminate()
    self.update()
    if return_response is True:
        return response
tunnel(local_port, remote_port)

create an SSH tunnel between a local port and a remote port.

Parameters:

Name Type Description Default
local_port int

port number for local end of tunnel.

required
remote_port int

port number for remote end of tunnel.

required

Returns:

Name Type Description
signaler Callable

function to shut down tunnel

tunnel_metadata dict[str, Union[int, str, Path]]

dict of metadata about the tunnel

Source code in hostess/aws/ec2.py
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
@connectwrap
def tunnel(
    self, local_port: int, remote_port: int
) -> tuple[Callable, dict[str, Union[int, str, Path]]]:
    """
    create an SSH tunnel between a local port and a remote port.

    Args:
        local_port: port number for local end of tunnel.
        remote_port: port number for remote end of tunnel.

    Returns:
        signaler: function to shut down tunnel
        tunnel_metadata: dict of metadata about the tunnel
    """
    self._ssh.tunnel(local_port, remote_port)
    return self._ssh.tunnels[-1]
update()

Refresh basic state and identification information.

Source code in hostess/aws/ec2.py
1028
1029
1030
1031
1032
1033
1034
def update(self):
    """Refresh basic state and identification information."""
    self.instance_.load()
    self.state = self.instance_.state["Name"]
    self.ip = getattr(self.instance_, f"{self.address_type}_ip_address")
    self.tags = tag_dict(self.instance_.tags)
    self.name = self.tags.get("Name")
wait_on_connection(maxtries)

block until an SSH connection to the instance is established

Source code in hostess/aws/ec2.py
475
476
477
478
479
480
481
482
483
484
def wait_on_connection(self, maxtries: int):
    """block until an SSH connection to the instance is established"""
    while not self.is_connected:
        try:
            self.connect(maxtries=maxtries)
            break
        except ConnectionError:
            continue
    if self.verbose is True:
        print("connection established")
wait_until(state, timeout=65)

Pause execution until the instance reaches the specified state. Automatically updates state and ip attributes.

Parameters:

Name Type Description Default
state Literal[InstanceState]

name of target instance state

required
timeout float

how long, in seconds, to wait before timing out

65
Source code in hostess/aws/ec2.py
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
def wait_until(self, state: Literal[InstanceState], timeout: float = 65):
    """
    Pause execution until the instance reaches the specified state.
    Automatically updates `state` and `ip` attributes.

    Args:
        state: name of target instance state
        timeout: how long, in seconds, to wait before timing out
    """

    # noinspection PyUnresolvedReferences
    assert state in InstanceState.__args__
    waiting, _ = timeout_factory(timeout=timeout)
    while self.state != state:
        waiting()
        self.update()
wait_until_running(timeout=65)

Alias for Instance.wait_until('running')

Parameters:

Name Type Description Default
timeout float

how long, in seconds, to wait until timing out

65
Source code in hostess/aws/ec2.py
1053
1054
1055
1056
1057
1058
1059
1060
def wait_until_running(self, timeout: float = 65):
    """
    Alias for Instance.wait_until('running')

    Args:
        timeout: how long, in seconds, to wait until timing out
    """
    self.wait_until("running", timeout)
wait_until_started(timeout=65)

Additional alias for Instance.wait_until('running')

Parameters:

Name Type Description Default
timeout float

how long, in seconds, to wait until timing out

65
Source code in hostess/aws/ec2.py
1062
1063
1064
1065
1066
1067
1068
1069
def wait_until_started(self, timeout: float = 65):
    """
    Additional alias for Instance.wait_until('running')

    Args:
        timeout: how long, in seconds, to wait until timing out
    """
    self.wait_until("running", timeout)
wait_until_stopped(timeout=65)

Alias for Instance.wait_until('stopped')

Parameters:

Name Type Description Default
timeout float

how long, in seconds, to wait before timing out

65
Source code in hostess/aws/ec2.py
1071
1072
1073
1074
1075
1076
1077
1078
def wait_until_stopped(self, timeout: float = 65):
    """
    Alias for Instance.wait_until('stopped')

    Args:
        timeout: how long, in seconds, to wait before timing out
    """
    self.wait_until("stopped", timeout)
wait_until_terminated(timeout=65)

Alias for Instance.wait_until('terminated')

Parameters:

Name Type Description Default
timeout float

how long, in seconds, to wait before timing out

65
Source code in hostess/aws/ec2.py
1080
1081
1082
1083
1084
1085
1086
1087
def wait_until_terminated(self, timeout: float = 65):
    """
    Alias for Instance.wait_until('terminated')

    Args:
        timeout: how long, in seconds, to wait before timing out
    """
    self.wait_until("terminated", timeout)

NoKeyError

Bases: OSError

we're trying to do things over SSH, but can't find a valid keyfile.

Source code in hostess/aws/ec2.py
98
99
class NoKeyError(OSError):
    """we're trying to do things over SSH, but can't find a valid keyfile."""

_ebs_device_mapping(volume_type, volume_size, index=0, iops=None, throughput=None, device_name=None)

reformat the passed specification as a legal EC2 API LaunchTemplateBlockDeviceMappingRequest object

Parameters:

Name Type Description Default
volume_type str

EBS volume type, e.g. 'gp3'

required
volume_size int

volume size in GB

required
index int

index of volume among all specified volumes (0 means root)

0
iops Optional[int]

I/O operations per second, if other than default and volume type supports IOPS specification

None
throughput Optional[int]

throughput in MiB/s, if other than default and volume type supports throughput specification

None
device_name Optional[str]

optional block device name; if not specified, will automatically assign one based on index

None

Returns:

Type Description
dict

LaunchTemplateBlockDeviceMappingRequest-formatted dict

Source code in hostess/aws/ec2.py
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
def _ebs_device_mapping(
    volume_type: str,
    volume_size: int,
    index: int = 0,
    iops: Optional[int] = None,
    throughput: Optional[int] = None,
    device_name: Optional[str] = None,
) -> dict:
    """
    reformat the passed specification as a legal EC2 API
    [LaunchTemplateBlockDeviceMappingRequest object](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_LaunchTemplateBlockDeviceMappingRequest.html)

    Args:
        volume_type: EBS volume type, e.g. 'gp3'
        volume_size: volume size in GB
        index: index of volume among all specified volumes (0 means root)
        iops: I/O operations per second, if other than default and volume type
            supports IOPS specification
        throughput: throughput in MiB/s, if other than default and volume type
            supports throughput specification
        device_name: optional block device name; if not specified, will
            automatically assign one based on index

    Returns:
        LaunchTemplateBlockDeviceMappingRequest-formatted dict
    """
    if volume_type not in EBS_VOLUME_TYPES:
        raise ValueError(f"{volume_type} is not a recognized EBS volume type.")
    if volume_type.startswith("io"):
        raise NotImplementedError(
            "Handling for io1 and io2 volumes is not yet supported."
        )
    if volume_type not in ("io1", "io2", "gp3") and (
        (iops is not None) or (throughput is not None)
    ):
        raise ValueError(
            f"{volume_type} does not support specifying explicit values for "
            f"throughput or IOPS."
        )
    if device_name is None:
        device_name = f"/dev/sd{ascii_lowercase[index]}"
    if device_name == "/dev/sda":
        device_name = "/dev/sda1"
    mapping = {
        "DeviceName": device_name,
        "Ebs": {"VolumeType": volume_type, "VolumeSize": volume_size},
    }
    if iops is not None:
        mapping["Ebs"]["Iops"] = iops
    if throughput is not None:
        mapping["Ebs"]["Throughput"] = throughput
    return mapping

_hostess_placeholder()

create a random hostess placeholder name

Source code in hostess/aws/ec2.py
2685
2686
2687
def _hostess_placeholder() -> str:
    """create a random hostess placeholder name"""
    return f"hostess-{''.join(choices(ascii_lowercase, k=10))}"

_instances_from_ids(ids, resource=None, session=None, **instance_kwargs)

helper function for cluster launch.

Source code in hostess/aws/ec2.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def _instances_from_ids(
    ids,
    resource: Optional[boto3.resources.base.ServiceResource] = None,
    session: Optional[boto3.Session] = None,
    **instance_kwargs: Union[
        str,
        botocore.client.BaseClient,
        boto3.resources.base.ServiceResource,
        boto3.Session,
        Path,
        bool,
    ],
):
    """helper function for cluster launch."""
    resource = init_resource("ec2", resource, session)
    instances = []
    # TODO: make this asynchronous
    for instance_id in ids:
        instance = Instance(instance_id, resource=resource, **instance_kwargs)
        instances.append(instance)
    return instances

_interpret_ebs_args(volume_type=None, volume_size=None, iops=None, throughput=None, volume_list=None)

helper function for create_launch_template(). Parse user-provided volume specifications into a list of dicts formatted like EC2 API LaunchTemplateBlockDeviceMappingRequest objects.

This function permits specification of either a volume_list or volume_type + volume_size + (optional) iops + (optional) throughput, which all implicitly refer to a single boot volume.

Parameters:

Name Type Description Default
volume_type Optional[Literal['gp2', 'gp3', 'io1', 'io2']]

EBS volume type for instance boot volum.

None
volume_size Optional[int]

Size in GB for instance boot volume.

None
iops Optional[int]

IOPS for instance boot volume, if other than default

None
throughput Optional[int]

throughput for instance boot volume, if other than default

None
volume_list Optional[Sequence[dict[str, Union[int, str]]]]

sequence of dicts that could be passed as kwargs to this function, giving all volumes that will be attached to the instance at creation. the first one is the boot volume. If specified, volume_type and volume_size must be None.

None

Returns:

Type Description
list[dict]

LaunchTemplateBlockDeviceMappingRequest objects from specs

Source code in hostess/aws/ec2.py
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
def _interpret_ebs_args(
    volume_type: Optional[Literal["gp2", "gp3", "io1", "io2"]] = None,
    volume_size: Optional[int] = None,
    iops: Optional[int] = None,
    throughput: Optional[int] = None,
    volume_list: Optional[Sequence[dict[str, Union[int, str]]]] = None,
) -> list[dict]:
    """
    helper function for `create_launch_template()`. Parse user-provided volume
    specifications into a list of dicts formatted like EC2 API
    [LaunchTemplateBlockDeviceMappingRequest objects](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_LaunchTemplateBlockDeviceMappingRequest.html).

    This function permits specification of _either_ a volume_list or
    volume_type + volume_size + (optional) iops + (optional) throughput, which
    all implicitly refer to a single boot volume.

    Args:
        volume_type: EBS volume type for instance boot volum.
        volume_size: Size in GB for instance boot volume.
        iops: IOPS for instance boot volume, if other than default
        throughput: throughput for instance boot volume, if other than default
        volume_list: sequence of dicts that could be passed as kwargs to this
            function, giving all volumes that will be attached to the instance
            at creation. the first one is the boot volume. If specified,
            volume_type and volume_size must be None.

    Returns:
        LaunchTemplateBlockDeviceMappingRequest objects from specs
    """
    if ((volume_type is not None) or (volume_size is not None)) and (
        volume_list is not None
    ):
        raise TypeError(
            "Please pass either a list of volumes (volume_list) or "
            "volume_type and _size, not both."
        )
    if volume_list is None:
        if (volume_type is None) or (volume_size is None):
            raise TypeError(
                "If a list of volumes (volume_list) is not specified, "
                "volume_type and volume_size cannot be None."
            )
        return [
            _ebs_device_mapping(volume_type, volume_size, 0, iops, throughput)
        ]
    return [
        _ebs_device_mapping(index=index, **specification)
        for index, specification in enumerate(volume_list)
    ]

_retrieve_instance_type_info(client=None, session=None, reset_cache=False)

Retrieve full descriptions of all instance types available in the client's AWS Region, either from the API or from an on-disk cache, and cache them to disk if retrieved from API.

This is primarily intended to be used as a helper function for higher-level instance type summarization and tabulation functions.

Parameters:

Name Type Description Default
client Optional[BaseClient]

optional preexisting boto ec2 client

None
session Optional[Session]

optional preexisting boto session

None
reset_cache bool

if True, always retrieve all descriptions from the API, even if 'fresh' cached descriptions are available on disk. If False, do so only if there are no cached descriptions or they are more than 5 days old.

False

Returns:

Type Description
tuple[dict]

dicts produced from EC2 InstanceTypeInfo API objects, one for every instance type available in the AWS Region.

Source code in hostess/aws/ec2.py
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
def _retrieve_instance_type_info(
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
    reset_cache: bool = False,
) -> tuple[dict]:
    """
    Retrieve full descriptions of all instance types available in the client's
    AWS Region, either from the API or from an on-disk cache, and cache them
    to disk if retrieved from API.

    This is primarily intended to be used as a helper function for higher-level
    instance type summarization and tabulation functions.

    Args:
        client: optional preexisting boto ec2 client
        session: optional preexisting boto session
        reset_cache: if True, always retrieve all descriptions from the API,
            even if 'fresh' cached descriptions are available on disk. If
            False, do so only if there are no cached descriptions or they are
            more than 5 days old.

    Returns:
        dicts produced from [EC2 InstanceTypeInfo](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_InstanceTypeInfo.html)
            API objects, one for every instance type available in the AWS
            Region.
    """
    client = init_client("ec2", client, session)
    region = clarify_region(None, client)
    cache_path = Path(GENERAL_DEFAULTS["cache_path"])
    prefix = f"instance_types_{region}"
    if reset_cache is False:
        cached_results = _check_cached_results(cache_path, prefix, max_age=5)
        if cached_results is not None:
            return pickle.load(cached_results.open("rb"))
    results = autopage(client, "describe_instance_types")
    _clear_cached_results(cache_path, prefix)
    with Path(cache_path, f"{prefix }_{filestamp()}.pkl").open("wb") as stream:
        pickle.dump(results, stream)
    return results

authorize_ssh_ingress_from_ip(sg, ip=None, force_modification_of_default_sg=False, verbose=True)

Parameters:

Name Type Description Default
sg SecurityGroup

ec2.SecurityGroup object to apply authorization to

required
ip Optional[str]

ip to authorize

None
force_modification_of_default_sg bool

apply modification even to a default security group?

False
verbose bool

if True, print actions to stdout

True
Source code in hostess/aws/ec2.py
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
def authorize_ssh_ingress_from_ip(
    sg: "SecurityGroup",
    ip: Optional[str] = None,
    force_modification_of_default_sg: bool = False,
    verbose: bool = True
):
    """
    Args:
        sg: ec2.SecurityGroup object to apply authorization to
        ip: ip to authorize
        force_modification_of_default_sg: apply modification even to a default
            security group?
        verbose: if True, print actions to stdout
    """
    print_ = print if verbose is True else lambda *_, **__: None
    if ip is None:
        # automatically select the user's ip
        ip = my_external_ip()
    print_(f"Authorizing SSH ingress from {ip} for security group {sg.id}")
    if "default" in sg.id and not force_modification_of_default_sg:
        raise DefaultSecurityGroupError(
            "\tRefusing to modify permissions of a default security group. "
            "Pass force_modification_of_default_security_group=True to override."
        )
    try:
        sg.authorize_ingress(
            IpPermissions=[
                {
                    "FromPort": 22,
                    "ToPort": 22,
                    "IpProtocol": "tcp",
                    "IpRanges": [
                        {
                            "CidrIp": f"{ip}/32",
                            "Description": "SSH access from specified IP",
                        }
                    ],
                },
            ],
        )
    except botocore.client.ClientError as ce:
        if "InvalidPermission.Duplicate" in str(ce):
            print_(f"** {ip} already authorized for SSH ingress to {sg.id} **")
        else:
            raise

connectwrap(func)

Decorator for methods of Instance that require an SSH connection to work. Causes them to check for an open connection, and, if they do not find one, to attempt to open one.

Source code in hostess/aws/ec2.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def connectwrap(func: Callable[[I, P], R]) -> Callable[[I, P], R]:
    """
    Decorator for methods of Instance that require an SSH connection to work.
    Causes them to check for an open connection, and, if they do not find one,
    to attempt to open one.
    """
    @wraps(func)
    def tryconnect(self: I, *args: P.args, **kwargs: P.kwargs):
        # noinspection PyProtectedMember
        self._prep_connection()
        try:
            return func(self, *args, **kwargs)
        except (SSHException, NoValidConnectionsError):
            self.connect()
            return func(self, *args, **kwargs)

    return tryconnect

create_ec2_key(key_name=None, save_key=True, resource=None, session=None)

Create a new EC2 SSH key pair in the caller's AWS account. Optionally also save the key material to disk.

Parameters:

Name Type Description Default
key_name Optional[str]

optional name for key pair (if not specified, a name is randomly assigned)

None
save_key bool

if True, save the key material to disk in ~/.ssh, in a .pem file with the same filename stem as the key pair

True
resource Optional[ServiceResource]

optional preexisting boto ec2 resource

None
session Optional[Session]

optional preexisting boto session

None

Returns:

Type Description
KeyPair

a boto3 KeyPair resource providing an interface to the newly-created key pair.

Source code in hostess/aws/ec2.py
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
def create_ec2_key(
    key_name: Optional[str] = None,
    save_key: bool = True,
    resource: Optional[boto3.resources.base.ServiceResource] = None,
    session: Optional[boto3.Session] = None,
) -> "KeyPair":
    """
    Create a new EC2 SSH key pair in the caller's AWS account. Optionally also
    save the key material to disk.

    Args:
        key_name: optional name for key pair (if not specified, a name is
            randomly assigned)
        save_key: if True, save the key material to disk in ~/.ssh, in a .pem
            file with the same filename stem as the key pair
        resource: optional preexisting boto ec2 resource
        session: optional preexisting boto session

    Returns:
        a boto3 KeyPair resource providing an interface to the newly-created
            key pair.
    """
    if key_name is None:
        key_name = _hostess_placeholder()
    resource = init_resource("ec2", resource, session)
    key = resource.create_key_pair(KeyName=key_name)
    keydir = Path(os.path.expanduser("~/.ssh"))
    keydir.mkdir(exist_ok=True)
    if save_key is True:
        keyfile = Path(keydir, f"{key_name}.pem")
        with keyfile.open("w") as stream:
            stream.write(key.key_material)
        # many programs will not permit you to use a key file with read/write
        # permissions for other users
        keyfile.chmod(0o700)
    return key

create_launch_template(template_name=None, instance_type=EC2_DEFAULTS['instance_type'], volume_type=None, volume_size=None, image_id=None, iops=None, throughput=None, volume_list=None, instance_name=None, security_group_name=None, tags=None, key_name=None, client=None, session=None, verbose=True)

Create a new EC2 launch template in the caller's AWS account (see https://docs.aws.amazon.com/autoscaling/ec2/userguide/launch-templates.html).

Parameters:

Name Type Description Default
template_name Optional[str]

optional name for template. if none is specified, a random name is assigned.

None
instance_type str

instance type name (e.g. 'm6i.large')

EC2_DEFAULTS['instance_type']
volume_type Optional[Literal['gp2', 'gp3', 'io1', 'io2']]

EBS volume type for boot volume (e.g. 'gp3'). If not specified and volume_list is not passed, defaults to EC2_DEFAULTS['volume_type']

None
volume_size Optional[int]

volume size in GB for boot volume. If not specified and volume_list is not passed, defaults to EC2_DEFAULTS['volume_size']

None
image_id Optional[str]

ID for Amazon Machine Image (AMI) to create instance from. if not specified, uses the most recently-released Ubuntu Server LTS AMI.

None
iops Optional[int]

I/O operations per second for boot volume, if other than default and volume type supports IOPS specification; ignored if volume_list passed

None
throughput Optional[int]

throughput in MiB/s for boot volume, if other than default and volume type supports throughput specification; ignored if volume_list passed

None
volume_list Optional[list[dict]]

alternative to specifying volume_type/volume_size (and optional iops/throughput). a list of dicts with 'volume_type', 'volume_size', and optionally 'iops' and 'throughput' keys. Each of these dicts specifies a separate EBS volume for the instance; the first will be the boot volume. It must not be passed along with volume_size or volume_type.

None
instance_name Optional[str]

optional name for instances created from template

None
security_group_name Optional[str]

optional name of preexisting security group. if not specified, a new security group will be created.

None
tags Optional[dict]

optional dict like {tag name: tag value} specifying resource tags for instances and volumes created from this template.

None
key_name Optional[str]

optional name of preexisting EC2 key pair object known to AWS. if not specified, a new EC2 key pair will be created and a corresponding key file will be saved to disk in ~/.ssh.

None
client Optional[BaseClient]

optional preexisting boto ec2 client

None
session Optional[Session]

optional preexisting boto session

None
verbose bool

if True, print launch template decision process to stdout

True

Returns:

Type Description
dict

dict created from an AWS LaunchTemplate API response.

Source code in hostess/aws/ec2.py
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
def create_launch_template(
    template_name: Optional[str] = None,
    instance_type: str = EC2_DEFAULTS["instance_type"],
    volume_type: Optional[Literal["gp2", "gp3", "io1", "io2"]] = None,
    volume_size: Optional[int] = None,
    image_id: Optional[str] = None,
    iops: Optional[int] = None,
    throughput: Optional[int] = None,
    volume_list: Optional[list[dict]] = None,
    instance_name: Optional[str] = None,
    security_group_name: Optional[str] = None,
    tags: Optional[dict] = None,
    key_name: Optional[str] = None,
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
    verbose: bool = True
) -> dict:
    """
    Create a new EC2 launch template in the caller's AWS account (see
    https://docs.aws.amazon.com/autoscaling/ec2/userguide/launch-templates.html).

    Args:
        template_name: optional name for template. if none is specified, a
            random name is assigned.
        instance_type: instance type name (e.g. 'm6i.large')
        volume_type: EBS volume type for boot volume (e.g. 'gp3'). If not
            specified and `volume_list` is not passed, defaults to
            `EC2_DEFAULTS['volume_type']`
        volume_size: volume size in GB for boot volume. If not specified and
            `volume_list` is not passed, defaults to
            `EC2_DEFAULTS['volume_size']`
        image_id: ID for Amazon Machine Image (AMI) to create instance from.
            if not specified, uses the most recently-released Ubuntu Server
            LTS AMI.
        iops: I/O operations per second for boot volume, if other than default
            and volume type supports IOPS specification; ignored if volume_list
            passed
        throughput: throughput in MiB/s for boot volume, if other than default
            and volume type supports throughput specification; ignored if
            volume_list passed
        volume_list: alternative to specifying volume_type/volume_size (and
            optional iops/throughput). a list of dicts with 'volume_type',
            'volume_size', and optionally 'iops' and 'throughput' keys. Each
            of these dicts specifies a separate EBS volume for the instance;
            the first will be the boot volume. It must not be passed along
            with volume_size or volume_type.
        instance_name: optional name for instances created from template
        security_group_name: optional name of preexisting security group. if
            not specified, a new security group will be created.
        tags: optional dict like {tag name: tag value} specifying resource
            tags for instances and volumes created from this template.
        key_name: optional name of preexisting EC2 key pair object known to
            AWS. if not specified, a new EC2 key pair will be created and a
            corresponding key file will be saved to disk in ~/.ssh.
        client: optional preexisting boto ec2 client
        session: optional preexisting boto session
        verbose: if True, print launch template decision process to stdout

    Returns:
        dict created from an AWS
            [LaunchTemplate API response](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_LaunchTemplate.html).
    """
    print_ = print if verbose is True else lambda *_, **__: None
    default_name = _hostess_placeholder()
    if volume_list is None:
        volume_type = (
            EC2_DEFAULTS["volume_type"] if volume_type is None else volume_type
        )
        volume_size = (
            EC2_DEFAULTS["volume_size"] if volume_size is None else volume_size
        )
    client = init_client("ec2", client, session)
    if (image_id is not None) and not image_id.startswith("ami-"):
        try:
            image_id = client.describe_images(
                Filters=[{"Name": "name", "Values": [image_id]}]
            )["Images"][0]["ImageId"]
        except KeyError:
            raise ValueError(
                f"Can't find an image corresponding to the name {image_id}."
            )
    if image_id is None:
        description = describe_instance_type(
            instance_type, pricing=False, ec2_client=client
        )
        image_id = get_stock_ubuntu_image(description["architecture"], client)
        print_(
            f"No AMI specified, using most recent Ubuntu Server LTS "
            f"image from Canonical ({image_id})."
        )
    block_device_mappings = _interpret_ebs_args(
        volume_type, volume_size, iops, throughput, volume_list
    )
    if tags is None:
        tags = []
    elif isinstance(tags, Mapping):
        tags = [{"Key": k, "Value": v} for k, v in tags.items()]
    tags.append({"Key": "hostess-generated", "Value": "True"})
    resource_tags = tags.copy()
    if instance_name is not None:
        resource_tags.append({"Key": "Name", "Value": instance_name})
    if security_group_name is not None:
        sg_response = client.describe_security_groups(
            GroupNames=[security_group_name]
        )["SecurityGroups"]
        if len(sg_response) == 0:
            create_security_group(security_group_name, None, client)
            print_(
                f"No security group named {security_group_name} exists; "
                f"created one."
            )
    else:
        security_group_name = create_security_group(
            default_name, client=client
        ).group_name
        print_(
            f"No security group specified; created a new one named "
            f"{default_name}."
        )
    if key_name is not None:
        key_response = client.describe_key_pairs(KeyNames=[key_name])[
            "KeyPairs"
        ]
        if len(key_response) == 0:
            create_ec2_key(key_name)
            print_(f"No key pair named {key_name} exists; created one.")
    else:
        key_name = create_ec2_key(default_name).key_name
        print_(
            f"No key pair specified; created one named {default_name} "
            "and saved key material to disk in ~/.ssh."
        )
    launch_template_data = {
        "BlockDeviceMappings": block_device_mappings,
        "ImageId": image_id,
        "TagSpecifications": [
            {"ResourceType": "instance", "Tags": resource_tags},
            {"ResourceType": "volume", "Tags": resource_tags},
        ],
        "SecurityGroups": [security_group_name],
        "InstanceType": instance_type,
        "KeyName": key_name,
    }
    if template_name is None:
        template_name = default_name
    return client.create_launch_template(
        LaunchTemplateName=template_name,
        LaunchTemplateData=launch_template_data,
        TagSpecifications=[{"ResourceType": "launch-template", "Tags": tags}],
    )["LaunchTemplate"]

create_security_group(name=None, description=None, client=None, resource=None, session=None)

Create a new EC2 security group in the caller's AWS account with default hostess settings.

Parameters:

Name Type Description Default
name Optional[str]

optional name for new security group. If not specified, will randomly assign a name.

None
description Optional[str]

optional description for new security group. will give a default description.

None
client Optional[BaseClient]

optional preexisting boto ec2 client

None
resource Optional[ServiceResource]

optional preexisting boto ec2 resource

None
session Optional[Session]

optional preexisting boto session

None

Returns:

Type Description
SecurityGroup

A boto3 SecurityGroup resource providing an interface to the newly- created security group.

Source code in hostess/aws/ec2.py
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
def create_security_group(
    name: Optional[str] = None,
    description: Optional[str] = None,
    client: Optional[botocore.client.BaseClient] = None,
    resource: Optional[boto3.resources.base.ServiceResource] = None,
    session: Optional[boto3.Session] = None,
) -> "SecurityGroup":
    """
    Create a new EC2 security group in the caller's AWS account with default
    hostess settings.

    Args:
        name: optional name for new security group. If not specified, will
            randomly assign a name.
        description: optional description for new security group. will give a
            default description.
        client: optional preexisting boto ec2 client
        resource: optional preexisting boto ec2 resource
        session: optional preexisting boto session

    Returns:
        A boto3 SecurityGroup resource providing an interface to the newly-
            created security group.
    """
    client = init_client("ec2", client, session)
    try:
        default_vpc_id = client.describe_vpcs(
            Filters=[{"Name": "is-default", "Values": ["true"]}]
        )["Vpcs"][0]["VpcId"]
    except IndexError:
        raise EnvironmentError(
            "Could not find a default VPC for automated security group "
            "creation."
        )
    resource = init_resource("ec2", resource, session)
    if name is None:
        name = _hostess_placeholder()
    if description is None:
        description = "hostess-generated security group"
    sg = resource.create_security_group(
        Description=description,
        GroupName=name,
        VpcId=default_vpc_id,
        TagSpecifications=[
            {
                "ResourceType": "security-group",
                "Tags": [{"Key": "hostess-generated", "Value": "True"}],
            },
        ],
    )
    my_ip = my_external_ip()
    sg.authorize_ingress(
        IpPermissions=[
            {
                "FromPort": 22,
                "ToPort": 22,
                "IpProtocol": "tcp",
                "IpRanges": [
                    {
                        "CidrIp": f"{my_ip}/32",
                        "Description": "SSH access from creating IP",
                    }
                ],
            },
        ]
    )
    sg.authorize_ingress(SourceSecurityGroupName=sg.group_name)
    return sg

describe_instance_type(instance_type, pricing=True, ec2_client=None, pricing_client=None, session=None)

Retrieve a succinct description of an EC2 instance type.

NOTE: this function will report i386 architecture for the very limited number of instance types that support both i386 and x86_64.

Parameters:

Name Type Description Default
instance_type str

instance type name, e.g. 'm6i.large'

required
pricing bool

if True, also retrieve pricing information

True
ec2_client Optional[BaseClient]

optional preexisting boto ec2 client

None
pricing_client Optional[BaseClient]

optional preexisting boto pricing client

None
session Optional[Session]

optional preexisting boto session

None

Returns:

Type Description
dict

dict containing summary information for this instance type

Source code in hostess/aws/ec2.py
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
def describe_instance_type(
    instance_type: str,
    pricing: bool = True,
    ec2_client: Optional[botocore.client.BaseClient] = None,
    pricing_client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
) -> dict:
    """
    Retrieve a succinct description of an EC2 instance type.

    NOTE: this function will report i386 architecture for the
    very limited number of instance types that support both i386
    and x86_64.

    Args:
        instance_type: instance type name, e.g. 'm6i.large'
        pricing: if True, also retrieve pricing information
        ec2_client: optional preexisting boto ec2 client
        pricing_client: optional preexisting boto pricing client
        session: optional preexisting boto session

    Returns:
        dict containing summary information for this instance type
    """
    ec2_client = init_client("ec2", ec2_client, session)
    response = ec2_client.describe_instance_types(
        InstanceTypes=[instance_type]
    )
    summary = summarize_instance_type_response(response)[0]
    if pricing is False:
        return summary
    pricing_client = init_client("pricing", pricing_client, session)
    summary["on_demand_price"] = get_on_demand_price(
        instance_type, None, pricing_client
    )
    if instance_type.startswith("t"):
        summary["cpu_credit_price"] = get_cpu_credit_price(
            instance_type, None, pricing_client
        )
    return summary

get_canonical_images(architecture='x86_64', client=None, session=None)

fetch the subset of official (we refuse to make the obvious pun) Ubuntu Amazon Machine Images from Canonical that we might plausibly want to offer as defaults to users. This will generally return hundreds of images and take > 1.5 seconds because of the number of unsupported daily builds available, so we cache the results with a one-week shelf life.

Parameters:

Name Type Description Default
architecture Literal['x86_64', 'arm64', 'i386']

get images for this system architecture

'x86_64'
client Optional[BaseClient]

optional preexisting boto client

None
session Optional[Session]

optional preexisting boto session

None

Returns:

Type Description
list[dict]

list of metadata dictionaries for all matching Canonical AMIs

Source code in hostess/aws/ec2.py
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
def get_canonical_images(
    architecture: Literal["x86_64", "arm64", "i386"] = "x86_64",
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
) -> list[dict]:
    """
    fetch the subset of official (we refuse to make the obvious pun) Ubuntu
    Amazon Machine Images from Canonical that we might plausibly want to offer
    as defaults to users. This will generally return hundreds of images and
    take > 1.5 seconds because of the number of unsupported daily builds
    available, so we cache the results with a one-week shelf life.

    Args:
        architecture: get images for this system architecture
        client: optional preexisting boto client
        session: optional preexisting boto session

    Returns:
        list of metadata dictionaries for all matching Canonical AMIs
    """
    client = init_client("ec2", client, session)
    # this perhaps excessive-looking optimization is intended to reduce not
    # only call time but also the chance that we will pick a 'bad' image --
    # Canonical generally drops LTS images at a quicker cadence than 7 months.
    month_globs = [
        f"{(dt.datetime.now() - dt.timedelta(days=30 * n)).isoformat()[:7]}*"
        for n in range(7)
    ]
    return client.describe_images(
        Filters=[
            {"Name": "creation-date", "Values": list(set(month_globs))},
            {"Name": "block-device-mapping.volume-size", "Values": ["8"]},
            {"Name": "architecture", "Values": [architecture]},
        ],
        Owners=["099720109477"],
    )["Images"]

get_stock_ubuntu_image(architecture='x86_64', client=None, session=None)

retrieve image ID of the most recent officially-supported Canonical Ubuntu Server LTS AMI for the provided architecture.

Parameters:

Name Type Description Default
architecture Literal['x86_64', 'arm64', 'i386']

get images for this system architecture

'x86_64'
client Optional[BaseClient]

optional preexisting boto client

None
session Optional[Session]

optional preexisting boto session

None

Returns:

Type Description
str

AWS image ID for most recent matching Ubuntu Server LTS AMI.

Source code in hostess/aws/ec2.py
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
def get_stock_ubuntu_image(
    architecture: Literal["x86_64", "arm64", "i386"] = "x86_64",
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
) -> str:
    """
    retrieve image ID of the most recent officially-supported
    Canonical Ubuntu Server LTS AMI for the provided architecture.

    Args:
        architecture: get images for this system architecture
        client: optional preexisting boto client
        session: optional preexisting boto session

    Returns:
        AWS image ID for most recent matching Ubuntu Server LTS AMI.
    """
    available_images = get_canonical_images(architecture, client, session)
    supported_lts_images = [
        i
        for i in available_images
        if (
            ("UNSUPPORTED" not in i["Description"])
            and ("LTS" in i["Description"])
            and ("FIPS" not in i["Description"])
        )
    ]
    dates = {
        ix: date
        for ix, date in enumerate(
            map(dtp.parse, map(get("CreationDate"), supported_lts_images))
        )
    }
    release_date = max(dates.values())
    idxmax = [ix for ix, date in dates.items() if date == release_date][0]
    return supported_lts_images[idxmax]["ImageId"]

instance_catalog(family=None, client=None, session=None)

Construct a catalog of available instance types, including their technical specifications and on-demand pricing.

Parameters:

Name Type Description Default
family Optional[str]

optional name of instance family, e.g. 'm6i'. If not specified, catalog includes all available instance types.

None
client Optional[BaseClient]

optional preexisting boto ec2 client

None
session Optional[Session]

optional preexisting boto session

None

Returns:

Type Description
DataFrame

Instance catalog DataFrame.

Source code in hostess/aws/ec2.py
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
def instance_catalog(
    family: Optional[str] = None,
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
) -> pd.DataFrame:
    """
    Construct a catalog of available instance types, including their
    technical specifications and on-demand pricing.

    Args:
        family: optional name of instance family, e.g. 'm6i'. If not specified,
            catalog includes all available instance types.
        client: optional preexisting boto ec2 client
        session: optional preexisting boto session

    Returns:
        Instance catalog DataFrame.
    """
    types = _retrieve_instance_type_info(client, session)
    summaries = gmap(summarize_instance_type_structure, types)
    if family is not None:
        summaries = [
            s for s in summaries if s["instance_type"].split(".")[0] == family
        ]
    summary_df = pd.DataFrame(summaries)
    pricing = get_ec2_basic_price_list(session=session)["ondemand"]
    pricing_df = pd.DataFrame(pricing)
    return summary_df.merge(pricing_df, on="instance_type", how="left")

ls_instances(identifier=None, states=('running', 'pending', 'stopping', 'stopped'), raw_filters=None, client=None, session=None, long=False, tag_regex=True, **tag_filters)

ls for EC2 instances.

Parameters:

Name Type Description Default
identifier Optional[InstanceIdentifier]

string specifying a particular instance. may be a stringified IP address or instance id.

None
states Sequence[str]

strings specifying legal states for listed instances.

('running', 'pending', 'stopping', 'stopped')
raw_filters Optional[Sequence[Mapping[str, str]]]

search filters to pass directly to the EC2 API.

None
client Optional[BaseClient]

optional boto3 Client object

None
session Optional[Session]

optional boto3 Session object

None
long bool

if not True, return InstanceDescriptions, including only the most regularly-pertinent information, flattened, with succinct field names. Otherwise, return a flattened version of the full API response.

False
tag_regex bool

regex patterns for tag matching.

True
tag_filters str

filters to interpret as tag name / value pairs before passing to the EC2 API.

{}

Returns:

Type Description
tuple[Union[InstanceDescription, dict]]

tuple of records describing all matching EC2 instances owned by caller.

Source code in hostess/aws/ec2.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def ls_instances(
    identifier: Optional[InstanceIdentifier] = None,
    states: Sequence[str] = ("running", "pending", "stopping", "stopped"),
    raw_filters: Optional[Sequence[Mapping[str, str]]] = None,
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
    long: bool = False,
    tag_regex: bool = True,
    **tag_filters: str,
) -> tuple[Union[InstanceDescription, dict]]:
    """
    `ls` for EC2 instances.

    Args:
        identifier: string specifying a particular instance. may be a
            stringified IP address or instance id.
        states: strings specifying legal states for listed instances.
        raw_filters: search filters to pass directly to the EC2 API.
        client: optional boto3 Client object
        session: optional boto3 Session object
        long: if not True, return InstanceDescriptions, including only the
            most regularly-pertinent information, flattened, with succinct
            field names. Otherwise, return a flattened version of the full API
            response.
        tag_regex: regex patterns for tag matching.
        tag_filters: filters to interpret as tag name / value pairs before
            passing to the EC2 API.

    Returns:
        tuple of records describing all matching EC2 instances owned by
            caller.
    """
    client = init_client("ec2", client, session)
    filters = [] if raw_filters is None else raw_filters
    if identifier is not None:
        identifier = listify(identifier)
        if "." in identifier[0]:
            filters.append({"Name": "ip-address", "Values": identifier})
        else:
            filters.append({"Name": "instance-id", "Values": identifier})
    filters.append({"Name": "instance-state-name", "Values": list(states)})
    response = client.describe_instances(Filters=filters)
    descriptions = chain.from_iterable(
        map(get("Instances"), response["Reservations"])
    )
    # TODO: examine newer Filter API functionality (see
    # https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Filter.html)

    # the EC2 api does not support string inclusion or other fuzzy filters.
    # we would like to be able to fuzzy-filter, so we apply our tag filters to
    # the structure returned by boto3 from its DescribeInstances call.
    descriptions = filter(
        partial(tagfilter, filters=tag_filters, regex=tag_regex), descriptions
    )
    if long is False:
        return gmap(summarize_instance_description, descriptions)
    return tuple(descriptions)

revoke_ingress(sg, force_modification_of_default_sg=False, ports=(22,), protocols=('tcp',), verbose=True)

Remove inbound permission rules from a security group. The default settings revoke permissions on the default SSH port.

Parameters:

Name Type Description Default
sg SecurityGroup

boto3 SecurityGroup resource object

required
force_modification_of_default_sg bool

if True, modify rules even if sg is a default security group

False
ports Collection[int]

revoke only those rules granting ingress on one of these ports

(22,)
protocols Collection[Literal['tcp', 'udp', 'icmp']]

revoke only those rules granting ingress via one of these protocols

('tcp',)
verbose bool

if True, print actions to stdout

True
Source code in hostess/aws/ec2.py
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
def revoke_ingress(
    sg: "SecurityGroup",
    force_modification_of_default_sg: bool = False,
    ports: Collection[int] = (22,),
    protocols: Collection[Literal["tcp", "udp", "icmp"]] = ("tcp",),
    verbose: bool = True
):
    """
    Remove inbound permission rules from a security group. The default
    settings revoke permissions on the default SSH port.

    Args:
        sg: boto3 SecurityGroup resource object
        force_modification_of_default_sg: if True, modify rules even if
            `sg` is a default security group
        ports: revoke only those rules granting ingress on one of these ports
        protocols: revoke only those rules granting ingress via one of these
            protocols
        verbose: if True, print actions to stdout
    """
    if "default" in sg.id and not force_modification_of_default_sg:
        raise DefaultSecurityGroupError(
            "\tRefusing to modify permissions of a default security group. "
            "Pass force_modification_of_default_sg=True to override."
        )
    print_ = print if verbose is True else lambda *_, **__: None
    print_(
        f"Revoking ingress from all IPs on port(s) {ports} to security group "
        f"{sg.id}"
    )
    for rule in sg.ip_permissions:
        if not (
            rule["FromPort"] in ports and rule["IpProtocol"] in protocols
        ):
            continue  # irrelevant rule
        # do not remove ingress permissions based on security group
        # (and not just IP).
        if 'UserIdGroupPairs' in rule:
            if len(rule.get('IpRanges', []) + rule.get('Ipv6Ranges', [])) == 0:
                continue
            # to do this for ingress rules that contain both security group
            # and IP permissions, you must revoke, modify, and then reauthorize
            # the rule.
            sg.revoke_ingress(IpPermissions=[rule])
            rule['IpRanges'], rule['Ipv6Ranges'] = [], []
            sg.authorize_ingress(IpPermissions=[rule])
            continue
        sg.revoke_ingress(IpPermissions=[rule])

summarize_instance_description(description)

convert a dictionary produced from an EC2 API Instance object to a more concise format. Likely sources for this dictionary include boto3 or parsing JSON responses from the AWS CLI or HTTP API.

Source code in hostess/aws/ec2.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def summarize_instance_description(
    description: Mapping,
) -> InstanceDescription:
    """
    convert a dictionary produced from an EC2 API Instance object to a more
    concise format. Likely sources for this dictionary include `boto3` or
    parsing JSON responses from the AWS CLI or HTTP API.
    """
    return {
        "name": tag_dict(description.get("Tags", {})).get("Name"),
        "ip": description.get("PublicIpAddress"),
        "id": description.get("InstanceId"),
        "state": description.get("State")["Name"],
        "type": description.get("InstanceType"),
        "ip_private": description.get("PrivateIpAddress"),
        "keyname": description.get("KeyName"),
    }

summarize_instance_type_response(response)

extract a series of EC2 InstanceTypeInfo from a boto3-wrapped DescribeInstanceTypes or DescribeInstanceTypeOfferings call and produce succinct summaries of them

Parameters:

Name Type Description Default
response dict

boto3-wrapped DescribeInstanceType* API response

required

Returns:

Type Description
tuple[dict]

summaries of each described instance type

Source code in hostess/aws/ec2.py
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
def summarize_instance_type_response(response: dict) -> tuple[dict]:
    """
    extract a series of [EC2 InstanceTypeInfo](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_InstanceTypeInfo.html)
    from a boto3-wrapped DescribeInstanceTypes or DescribeInstanceTypeOfferings
    call and produce succinct summaries of them

    Args:
        response: boto3-wrapped DescribeInstanceType* API response

    Returns:
        summaries of each described instance type
    """
    types = response["InstanceTypes"]
    return gmap(summarize_instance_type_structure, types)

summarize_instance_type_structure(itinfo)

summarize an EC2 InstanceTypeInfo object.

Parameters:

Name Type Description Default
itinfo dict

dict created from an InstanceTypeInfo structure, as returned by boto3 functions like describe_instance_types()

required

Returns:

Type Description
dict

succinct version of InstanceTypeInfo structure

Source code in hostess/aws/ec2.py
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
def summarize_instance_type_structure(itinfo: dict) -> dict:
    """
    summarize an [EC2 InstanceTypeInfo](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_InstanceTypeInfo.html)
    object.

    Args:
        itinfo: dict created from an InstanceTypeInfo structure, as returned
            by boto3 functions like `describe_instance_types()`

    Returns:
        succinct version of InstanceTypeInfo structure
    """
    proc = itinfo["ProcessorInfo"]
    attributes = {
        "instance_type": itinfo["InstanceType"],
        "architecture": proc["SupportedArchitectures"][0],
        "cpus": itinfo["VCpuInfo"]["DefaultVCpus"],
        "cpu_speed": proc.get("SustainedClockSpeedInGhz"),
        "ram": itinfo["MemoryInfo"]["SizeInMiB"] / 1024,
        "bw": itinfo["NetworkInfo"]["NetworkPerformance"],
    }
    if "EbsOptimizedInfo" in itinfo["EbsInfo"].keys():
        ebs = itinfo["EbsInfo"]["EbsOptimizedInfo"]
        attributes["ebs_bw_min"] = ebs["BaselineThroughputInMBps"]
        attributes["ebs_bw_max"] = ebs["MaximumThroughputInMBps"]
        attributes["ebs_iops_min"] = ebs["BaselineIops"]
        attributes["ebs_iops_max"] = ebs["MaximumIops"]
    if itinfo["InstanceStorageSupported"] is True:
        attributes["disks"] = itinfo["InstanceStorageInfo"]["Disks"]
        attributes["local_storage"] = itinfo["InstanceStorageInfo"][
            "TotalSizeInGB"
        ]
    else:
        attributes["disks"] = []
        attributes["local_storage"] = 0
    attributes["cpu_surcharge"] = itinfo["BurstablePerformanceSupported"]
    return attributes

aws.pricing

HOURS_PER_MONTH = 730 module-attribute

canonical hours-to-month conversion used in AWS rate quotes

get_ec2_basic_price_list(region=None, client=None, session=None, reset_cache=False)

on-demand rates are USD / instance-hour. EBS volume is USD / GB-month. EBS throughput is USD / Gibps-month. EBS IOPS is USD / IOPS-month. CPU credits are too complicated to explain in this margin.

Source code in hostess/aws/pricing.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def get_ec2_basic_price_list(
    region=None, client=None, session=None, reset_cache=False
):
    """
    on-demand rates are USD / instance-hour.
    EBS volume is USD / GB-month.
    EBS throughput is USD / Gibps-month.
    EBS IOPS is USD / IOPS-month.
    CPU credits are too complicated to explain in this margin.
    """
    if region is None:
        client = init_client("pricing", client, session)
        region = clarify_region(region, client)
    cache_path = Path(GENERAL_DEFAULTS["cache_path"])
    prefix = f"ec2_basic_price_list_{region}"
    if reset_cache is False:
        cached_results = _check_cached_results(cache_path, prefix, max_age=7)
        if cached_results is not None:
            return pickle.load(cached_results.open("rb"))
    if client is None:
        client = init_client("pricing", client, session)
    prices = {
        "ondemand": get_on_demand_rates(region, client),
        "credits": get_cpu_credit_rates(region, client),
        "ebs": get_ebs_rates(region, client),
    }
    _clear_cached_results(cache_path, prefix)
    with Path(cache_path, f"{prefix }_{filestamp()}.pkl").open("wb") as stream:
        pickle.dump(prices, stream)
    return prices

get_on_demand_price(instance_type, region=None, client=None, session=None)

fetch on-demand pricing information for a specific instance type.

Source code in hostess/aws/pricing.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def get_on_demand_price(instance_type, region=None, client=None, session=None):
    """
    fetch on-demand pricing information for a specific instance type.
    """
    client = init_client("pricing", client, session)
    region = clarify_region(region, client)
    product = client.get_products(
        ServiceCode="AmazonEC2",
        Filters=[
            {"Type": "TERM_MATCH", "Field": "regionCode", "Value": region},
            {
                "Type": "TERM_MATCH",
                "Field": "operatingSystem",
                "Value": "Linux",
            },
            {
                "Type": "TERM_MATCH",
                "Field": "usagetype",
                "Value": f"BoxUsage:{instance_type}",
            },
            {"Type": "TERM_MATCH", "Field": "preInstalledSw", "Value": "NA"},
        ],
    )["PriceList"][0]
    return float(dig_for_value(json.loads(product), "USD"))

aws.s3

This module provides managed operations on AWS S3 objects. Its centerpiece is a Bucket object providing a high-level interface to operations on a single S3 bucket. This module was originally motivated by the need to quickly construct pandas DataFrames containing inventories of large buckets.

Much of this module wraps lower-level methods of boto3, so it is in some sense an alternative implementation of boto3's own high-level managed S3 methods and objects, designed for cases in which deeper indexing, more response introspection, or more flexible I/O stream manipulation are required. We also like the syntax better.

AZ_IDPAT = re.compile('[a-z]{3}\\d-az(?P<number>\\d)') module-attribute

Legal AZ id

AZ_NAMEPAT = re.compile('[a-z]{2}-[a-z]{3,10}-\\d(?P<letter>[a-z])') module-attribute

Legal AZ name

BUCKET_NAMEPAT = re.compile('[a-z0-9-.]{3,63}') module-attribute

Legal name for general-purpose bucket

BUCKET_TYPE = Literal['general', 'directory'] module-attribute

Pull account ID of owner out of directory bucket ARN

DIRECTORY_BUCKET_NAMEPAT = re.compile('[a-z0-9-.]{1,45}--[a-z]{3}\\d-az\\d--x-s3') module-attribute

Legal name for zonal directory bucket

Puttable = Union[str, Path, IOBase, bytes, None] module-attribute

type alias for Python objects Bucket will write to S3

Bucket

Interface to and representation of an S3 bucket.

Note

Bucket supports only general-purpose and directory buckets, not table buckets.

Source code in hostess/aws/s3.py
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
class Bucket:
    """
    Interface to and representation of an S3 bucket.

    Note:
        Bucket supports only general-purpose and directory buckets, not
        table buckets.
    """

    def __init__(
        self,
        bucket_name: str,
        client: Optional[botocore.client.BaseClient] = None,
        resource: Optional[boto3.resources.base.ServiceResource] = None,
        session: Optional[boto3.session.Session] = None,
        config: Optional[boto3.s3.transfer.TransferConfig] = None,
        n_threads: Optional[int] = 4,
    ):
        """
        Args:
            bucket_name: name of bucket
            client: optional boto3 s3 Client. if not specified, creates a
                default client.
            resource: optional boto3 s3 Resource. if not specified, creates a
                default resource.
            session: optional boto3 Session. if not specified, creates a
                default session.
            config: optional boto3 TransferConfig. if not specified, creates a
                default config.
            n_threads: if not None, automatically multithread some operations.
                note that this is not a hard cap on the number of threads used
                by a single bucket operation. it provides a cap on concurrency
                across operations on multiple objects, not on concurrency on
                operations per object. if you wish to cap concurrency within
                operations on individual objects, modify the `max_concurrency`
                attribute of `config`.
        """
        self.client = init_client("s3", client, session)
        self.resource = init_resource("s3", resource, session)
        self.session = session
        self.name = bucket_name
        self.contents = []
        if config is None:
            config = boto3.s3.transfer.TransferConfig(**S3_DEFAULTS["config"])
        self.config = config
        self.n_threads = n_threads

    @classmethod
    def create(
        cls,
        name: str,
        client: botocore.client.BaseClient | None = None,
        session: boto3.session.Session = None,
        *,
        bucket_type: BUCKET_TYPE = "general",
        az: str | int | None = None,
        tags: dict[str, str] | None = None,
        bucket_config: Mapping | None = None,
        **bucket_kwargs
    ):
        """
        Create a new bucket on S3 and return it as a Bucket object.

        Args:
            name: Name of bucket. If creating a directory bucket, do not
                include the AZ suffix (e.g. pass "something" instead of
                "something--use1-az4--x-s3"). `Bucket` will automatically add
                the correct suffix.
            client: optional boto3 s3 Client. if not specified, creates a
                default client.
            session: optional boto3 Session. if not specified, creates a
                default session.
            bucket_type: "general" (default, meaning a general-purpose bucket)
                or "directory" (meaning a directory bucket). Note that this
                method only supports zonal directory buckets.
            az: Name, letter, ID, or number of the Availability Zone (AZ) in
                which to create a directory bucket. For instance, if `session`
                is associated with the us-east-1 region, 'us-east-1c',
                `'use1-az4'`, `'c'`, and `4` all refer to the same AZ. Note
                that creating a bucket in a region other than the one `client`
                (or `session`, if `client` is not passed) is not supported.

                This argument is ignored for general-purpose buckets.

                Note that not all AZs support directory buckets, and there is
                no mechanism to discover which do and do not via the API
                (other than actually attempting to create one). See:
                https://docs.aws.amazon.com/AmazonS3/latest/userguide/endpoint-directory-buckets-AZ.html
            tags: Keys and values of bucket tags to set after successful
                  bucket creation. If not None, there must be at least one
                  item in this dict. Note that directory buckets do not
                  support tags; this method will raise a ValueError if
                  provided tags for a directory bucket.
            bucket_config: passed to the botocore `create_bucket()` method as
                the 'CreateBucketConfig' argument.
            bucket_kwargs: passed directly to `Bucket.__init__()`.

        Caution:
            In all regions other than us-east-1, the S3 API returns a
            'bucket aready exists and is owned by you' error if a user
            attempts to create a bucket with the same name as a bucket they
            already own. In us-east-1, it instead returns a standard success
            response and silently erases all ACLs associated with that bucket.
            We are unwilling to spring this on users, and for this reason,
            `Bucket.create()` behaves slightly differently in us-east-1. It
            checks the user already owns a bucket with the requested name,
            and raises an exception if so. This means that an account must
            have the ListBuckets permission to use `Bucket.create()` in
            us-east-1.
        """
        client = init_client("s3", client, session)
        if bucket_type not in ("general", "directory"):
            raise ValueError("'bucket_type' must be 'general' or 'directory'.")
        if bucket_type == "directory" and az is None:
            raise TypeError("'az' must not be None for a directory bucket.")
        elif bucket_type == "directory":
            azid = check_az(az, region_name=client.meta.region_name)
            name = _attach_directory_bucket_suffix(name, azid)
            pat = DIRECTORY_BUCKET_NAMEPAT
        else:
            pat, azid = BUCKET_NAMEPAT, None
        if pat.match(name) is None:
            raise ValueError(f"{name} is not a valid bucket name.")
        if client.meta.region_name == 'us-east-1':
            _raise_for_owned_use1_bucket(client, name, bucket_type)
        conf = dict(bucket_config) if bucket_config is not None else {}
        if bucket_type == "directory":
            if len({'Location', 'Bucket'}.intersection(conf.keys())) > 0:
                raise ValueError(
                    "Please do not specify custom 'Location' or 'Bucket' "
                    "values in bucket config for directory buckets."
                )
            conf |= {
                'Location': {'Type': "AvailabilityZone", 'Name': azid},
                'Bucket': {
                    'Type': 'Directory',
                    'DataRedundancy': 'SingleAvailabilityZone'
                }
            }

        # note that we're just relying on botocore for exceptions at this
        # stage. If it doesn't raise one, we assume it worked.
        kwargs = {'Bucket': name}
        if len(conf) > 0:
            kwargs['CreateBucketConfiguration'] = conf
        try:
            client.create_bucket(**kwargs)
        except ClientError as ce:
            if "InvalidBucketName" in str(ce) and bucket_type == "directory":
                raise ValueError(
                    f"Although {name} is a valid directory bucket name, the "
                    f"S3 API returned an InvalidBucketName error. This "
                    f"typically indicates that Availability Zone {az} in "
                    f"{client.meta.region_name} does not support directory "
                    f"buckets."
                )
            else:
                raise ce
        bucket = Bucket(name, client=client, **bucket_kwargs)
        if tags is not None:
            bucket.set_tags(**tags)
        return bucket

    def delete(self):
        """
        Delete this bucket.

        Notes:
            S3 will not delete a bucket that contains any objects, and
            `hostess` does not provide a 'force'-type operation that
            auto-empties a bucket before deletion.
        """
        self.client.delete_bucket(Bucket=self.name)

    def update_contents(
        self,
        prefix: Optional[str] = None,
        cache: Optional[Union[str, Path, IOBase]] = None,
        fetch_owner: bool = False
    ):
        """
        recursively scan the contents of the bucket and store the result in
        self.contents.

        Args:
            prefix: prefix at which to begin scan. if not passed, scans the
                entire bucket.
            cache: optional file or filelike object to write scan results to
                in addition to storing them in self.contents.
            fetch_owner: if True, include owner of objects in response.
        """
        self.contents = self.ls(
            recursive=True,
            prefix=prefix,
            cache=cache,
            formatting="contents",
            fetch_owner=fetch_owner,
        )

    def chunk_putter_factory(
        self,
        key: str,
        upload_threads: Optional[int] = 4,
        download_threads: Optional[int] = None,
        verbose: bool = False,
    ):
        """
        construct a callable chunk uploader. this can be used in relatively
        direct ways or passed to complex pipelines as a callback.
        """
        if download_threads is not None:
            raise NotImplementedError(
                "Asynchronous downloads are not yet implemented; "
                "please pass download_threads=None"
            )
        parts = {}
        multipart = self.create_multipart_upload(key)
        if upload_threads is None:
            exc = None
        else:
            exc = ThreadPoolExecutor(upload_threads)
        kwargs = {
            "config": self.config,
            "download_cache": [b""],
            "multipart": multipart,
            "upload_numerator": naturals(),
            "parts": parts,
            "exc": exc,
            "verbose": verbose,
        }
        return partial(self._put_stream_chunk, **kwargs), parts, multipart

    def df(self) -> pd.DataFrame:
        """
        Construct a manifest of all known objects in bucket as a pandas
        DataFrame. If update_contents() has never been called, greedily scan
        the contents of the bucket rather than returning an empty DataFrame.

        Returns:
            Manifest of all known objects in bucket.
        """
        if len(self.contents) == 0:
            self.update_contents()
        return pd.DataFrame(self.contents)

    def put_stream(
        self,
        obj: Union[Iterator, IO, str, Path],
        key: str,
        config: Optional[boto3.s3.transfer.TransferConfig] = None,
        upload_threads: Optional[int] = 4,
        # download_threads: Optional[int] = None,
        verbose: bool = False,
        explicit_length: Optional[int] = None,
        # TODO: overrides chunksize in config -- maybe make an easier interface
        #  to this
        chunksize: Optional[int] = None,
    ) -> Optional[dict]:
        """
        Create an S3 object from a byte stream via a managed multipart upload.
        Intended primarily for intermittent streams, incremental writes of
        larger-than-memory data, direct streams from remote resources, and
        streams of unknown length. If you are just uploading on-disk files
        or discrete in-memory objects, Bucket.put() is usuallygh preferable.

        Args:
            obj: source of stream to upload. May be a path, a URL, a filelike
                object, or any iterator that yields `bytes` objects.
            key: key of object to create from stream (fully-qualified 'path'
                relative to bucket root)
            config: optional transfer config
            upload_threads: number of subprocesses to use for upload (None
                means upload serially)
            verbose: print and log progress of streaming upload
            explicit_length: optional explicit length specification, for
                streams of known length
            chunksize: size of individual upload chunks; overrides any setting
                in config. if stream length is explicitly specified or inferred
                to be less than chunksize, this function will fall back to a
                simple put operation.

        Returns:
            API response to multipart upload completion, or None if stream
                length < chunksize and we fell back to simple upload
        """
        if config is None:
            config = self.config
        if chunksize is not None:
            config.multipart_chunksize = chunksize
        stream = obj
        if isinstance(stream, str):
            if stream.startswith("http") and ("//" in stream):
                stream = requests.get(stream, stream=True)
            elif stream.startswith("s3") and ("//" in stream):
                raise NotImplementedError(
                    "dispatch for handling s3 urls is not yet implemented. "
                    "please use something else for now."
                )
        if isinstance(stream, requests.Response):
            stream.raise_for_status()
        target_chunksize = config.multipart_chunksize
        if explicit_length is not None:
            length = explicit_length
        else:
            length = infer_stream_length(stream)
        if (length is not None) and (length < config.multipart_chunksize):
            if verbose:
                console_and_log(
                    "Stream shorter than chunksize, falling back to basic put."
                )
            return self.put(obj=stream, key=key, config=config)
        if isinstance(stream, (str, Path)):
            stream = Path(stream).open("rb")
        if isinstance(stream, requests.Response):
            stream = stream.iter_content(chunk_size=target_chunksize)
        if "read" in dir(stream):
            reader = partial(stream.read, target_chunksize)
        elif "__next__" in dir(stream):
            reader = stream.__next__
        else:
            raise TypeError(
                "can't determine how to consume bytes from stream."
            )
        put_chunk, parts, multipart_upload = self.chunk_putter_factory(
            key, upload_threads, None, verbose
        )
        try:
            chunk = reader()
            if len(chunk) == 0:
                raise ValueError("Empty stream.")
            while len(chunk) > 0:
                chunk = reader()
                put_chunk(chunk)
        except StopIteration:
            pass
        except ValueError:
            self.abort_multipart_upload(multipart=multipart_upload)
            raise
        del chunk
        put_chunk(b"", flush=True)
        del put_chunk
        if upload_threads is not None:
            while not all(f.done() for f in parts.values()):
                time.sleep(0.05)
            parts = {number: f.result() for number, f in parts.items()}
        return self.complete_multipart_upload(
            multipart=multipart_upload, parts=parts
        )

    def _put_stream_chunk(
        self,
        blob: bytes,
        download_cache: list[bytes],
        parts: MutableMapping,
        multipart: Mapping,
        upload_numerator: Iterator[int],
        config: boto3.s3.transfer.TransferConfig,
        exc: Optional[ThreadPoolExecutor],
        verbose: bool = False,
        flush: bool = False,
    ):
        """helper function for Bucket.put_stream()"""
        download_cache[0] += blob
        if (len(download_cache[0]) < config.multipart_chunksize) and (
            flush is False
        ):
            return
        number = next(upload_numerator)
        if verbose is True:
            infix = "received" if flush is False else "flushing buffer as"
            console_and_log(
                f"{stamp()}: {infix} chunk {number} for {multipart['Key']}, "
                f"initiating upload"
            )
        kwargs = {
            "Body": download_cache.pop(),
            "Bucket": self.name,
            "Key": multipart["Key"],
            "PartNumber": number,
            "UploadId": multipart["UploadId"],
        }
        download_cache.append(b"")
        if exc is not None:
            parts[number] = exc.submit(self.client.upload_part, **kwargs)
        else:
            parts[number] = self.client.upload_part(**kwargs)

    @splitwrap(seq_arity=1)
    def freeze(
        self,
        key: Union[str, Sequence[str]],
        storage_class: str = "DEEP_ARCHIVE",
    ) -> Union[str, list[Union[str, Exception]]]:
        """
        Modify the storage class of an object or objects. Intended primarily
        for moving objects from S3 Standard to one of the Glacier classes.

        Args:
            key: object key(s) (fully-qualified 'path' relative to bucket root)
            storage_class: target storage class

        Returns:
            uri: URI of frozen object, or list containing URI of each
                frozen object if its freeze succeeded and an Exception if not
        """
        return self.cp(key, StorageClass=storage_class)

    @splitwrap(seq_arity=1)
    def restore(
        self,
        key: Union[str, Sequence[str]],
        tier: Literal["Expedited", "Standard", "Bulk"] = "Bulk",
        days: int = 5,
    ) -> Union[dict, list[Union[dict, Exception]]]:
        """
        Issue a request to temporarily restore one or more objects from S3
        Glacier Flexible Retrival or Deep Archive to S3 Standard. Note that
        object restoration is not instantaneous. Depending on retrieval tier
        and storage class, AWS guarantees retrieval times ranging from 5
        minutes to 48 hours. See https://docs.aws.amazon.com/AmazonS3/latest
        /userguide/restoring-objects-retrieval-options.html for details.

        You can check the progress of restore requests using Bucket.head().

        Args:
            key: key(s) of object(s) to restore (fully-qualified 'paths')
            tier: retrieval tier. In order of speed and expense, high to low,
                options are "Expedited", "Standard", and "Bulk". "Expedited" is
                not available for Deep Archive.
            days: number of days object(s) should remain restored before
                reverting to Glaciered state

        Returns:
            RestoreObject API response, or list of responses and/or Exceptions
        """
        restore_request = {
            "Days": days,
            "GlacierJobParameters": {"Tier": tier},
        }
        return self.client.restore_object(
            Bucket=self.name, Key=key, RestoreRequest=restore_request
        )

    @splitwrap(seq_arity=2, splittable=["checksum"])
    def put(
        self,
        obj: Union[Puttable, Sequence[Puttable]] = b"",
        key: Optional[Union[str, Sequence[str]]] = None,
        literal_str: bool = False,
        config: Optional[boto3.s3.transfer.TransferConfig] = None,
        checksum: Optional[Union[str, Sequence[str]]] = None,
        **extra_args: str
    ) -> Union[None, list[Optional[Exception]]]:
        """
        Upload files or buffers to an S3 bucket

        Args:
            obj: An individual str, Path, or filelike / buffer object to
                upload, or a sequence of such objects
            key: S3 key (fully-qualified 'path' from bucket root); or, if `obj`
                is a sequence, a sequence of keys of the same length of `obj`.
                If `key` is not specified, key(s) are generated from the
                string representation(s) of the uploaded object(s), truncated
                to 1024 characters (maximum length of an S3 key).
            literal_str: If True, and `obj` is a string or a sequence
                containing strings, write all such strings directly to objects.
                Otherwise, interpret them as paths to local files
            config: boto3.s3.transfer.TransferConfig; bucket's default if None
            checksum: Optional base64-encoded raw 4-byte CRC32 checksum of
                object, or sequence of such checksums for each object. Used
                for full-object S3 checksum verification. Other checksum
                types and algorithms are not supported.
            extra_args: ExtraArgs for boto3 bucket object

        Returns:
            None, or, for multi-upload, a list containing None for each
                successful put and an Exception for each failed one
        """
        config = self.config if config is None else config
        # If S3 key was not specified, use string rep of
        # passed object, up to 1024 characters
        key = str(obj)[:1024] if key is None else key
        if checksum is not None:
            extra_args |= {
                "ChecksumAlgorithm": "CRC32",
                "ChecksumCRC32": checksum,
                "ChecksumType": "FULL_OBJECT"
            }

        base_kwargs = {
            "Bucket": self.name,
            "Key": key,
            "Config": config,
            "ExtraArgs": extra_args
        }
        # 'touch' - type behavior
        if obj is None:
            obj = BytesIO()
        # directly upload file from local storage
        if _should_be_file(obj, literal_str):
            return self.client.upload_file(Filename=str(obj), **base_kwargs)
        # or: upload in-memory objects
        # encode string to bytes if we're writing it to an S3 object instead
        # of interpreting it as a path
        if isinstance(obj, str):
            obj = obj.encode("utf-8")
        if isinstance(obj, bytes):
            obj = BytesIO(obj)
        # if it's not string or bytes, it has to be buffer/file-like.
        # this isn't a perfect heuristic, of course!
        elif not hasattr(obj, "read"):
            raise TypeError(f"Cannot put object of type {type(obj)}")
        return self.client.upload_fileobj(Fileobj=obj, **base_kwargs)

    @splitwrap(seq_arity=2)
    def get(
        self,
        key: Union[str, Sequence[str]],
        destination: Union[
            Union[str, Path, IOBase, None],
            Sequence[Union[str, Path, IOBase, None]],
        ] = None,
        config: Optional[boto3.s3.transfer.TransferConfig] = None,
        start_byte: Optional[int] = None,
        end_byte: Optional[int] = None,
        **extra_args: str
    ) -> Union[Path, str, IOBase] | list[Union[Path, str, IOBase, Exception]]:
        """
        write S3 object(s) into file(s) or filelike object(s).

        Args:
            key: object key(s) (fully-qualified 'path(s)' from root)
            destination: where to write the retrieved object(s). May be path(s)
                or filelike object(s). If not specified, constructs new BytesIO
                buffer(s).
            config: optional transfer config
            start_byte: Byte index at which to begin read. None (default) or
                0 means the first byte of the object. Negative integers are
                interpreted as Python-style negative slice indices. e.g.,
                `start_byte=0` and `end_byte=-1` means 'read all the bytes
                but the last one', analogous to `my_list[0:-1]`.
            end_byte: Byte index at which to end read. None (default) means
                the last byte of the object.
            extra_args: passed directly to `TransferManager.download()`
        Returns:
            outpath: the path, string, or buffer we wrote the object to, or,
                for multi-get, a list containing one such outpath for each
                successful write and an Exception for each failed write

        Caution:
            This does not currently support specifying different byte ranges
            for different objects. In other words, this is not legal:
            ```
            Bucket.get(
                [k1, k2], [f1, f2], start_byte=[s1, s2], end_byte=[e1, e2]
            )
            ```
            If specified in a multi-object call to `get()`, `start_byte` and
            `end_byte` will fetch the same range from each object.

            This may change in the future.
        """
        # TODO: add more useful error messages for streams opened in text mode
        config = self.config if config is None else config
        if destination is None:
            dest = BytesIO()
        elif isinstance(destination, Path):
            dest = str(destination)
        else:
            dest = destination
        start_byte = None if start_byte == 0 else start_byte
        args, kwargs = (self.name, key, dest), {'extra_args': extra_args}
        if start_byte is not None or end_byte is not None:
            manager_class = TransferManagerWithRange
            kwargs |= {'start_byte': start_byte, 'end_byte': end_byte}
        else:
            manager_class = TransferManager
        with manager_class(self.client, config) as manager:
            dirs_we_made = []
            if not isinstance(dest, IOBase):
                for p in reversed(Path(dest).parents):
                    if p.exists() is False:
                        p.mkdir()
                        dirs_we_made.append(p)
            future = manager.download(*args, **kwargs)
            ok = False
            try:
                # this call is strictly intended to raise exceptions,
                # e.g. attempts to get objects that don't exist.
                future.result()
                ok = True
            finally:
                if ok is False:
                    for d in reversed(dirs_we_made):
                        d.rmdir()
        if hasattr(dest, "seek"):
            dest.seek(0)
        return dest

    def read(
        self,
        key: str,
        mode: Literal["r", "rb"] = "r",
        return_buffer: bool = False,
        start_byte: Optional[int] = None,
        end_byte: Optional[int] = None
    ) -> Union[bytes, str, BytesIO, StringIO]:
        """
        Read an S3 object into memory.

        Args:
            key: fully-qualified 'path' to object
            mode: 'r' to read as text, 'rb' as bytes
            return_buffer: if True, return object as a StringIO/BytesIO; if
                False, return it as str/bytes
            start_byte: if not None, start reading at this byte
            end_byte: if not None, stop reading at this byte

        Returns:
            Contents of object, in format specified by `mode` and
                `return_buffer`.
        """
        buf = self.get(key, start_byte=start_byte, end_byte=end_byte)
        if return_buffer is True:
            if mode == "rb":
                return buf
            strbuf = StringIO()
            strbuf.write(buf.read().decode())
            strbuf.seek(0)
            return strbuf
        if mode == "rb":
            return buf.read()
        return buf.read().decode()

    # TODO, maybe: rewrite this using lower-level methods. This may not be
    #  required, because flexibility with S3 -> S3 copies is less often useful
    #  than with uploads.
    @splitwrap(seq_arity=2)
    def cp(
        self,
        source: Union[str, Sequence[str]],
        destination: Union[Optional[str], Sequence[Optional[str]]] = None,
        destination_bucket: Optional[str] = None,
        config: Optional[boto3.s3.transfer.TransferConfig] = None,
        **extra_args: str
    ) -> Union[str, list[Union[str, Exception]]]:
        """
        Copy S3 object(s) to another location on S3.

        Args:
            source: key(s) of object(s) to copy (fully-qualified 'path(s)')
            destination: key(s) to copy object(s) to (by default, uses
                source key(s))
            destination_bucket: bucket to copy object(s) to. if not
                specified, uses source bucket. this means that if destination
                and destination_bucket are both None, it overwrites the object
                inplace. this is useful for things like storage class changes.
            config: optional transfer config
            extra_args: ExtraArgs for boto3 bucket object

        Returns:
            uri: S3 URI of newly-created copy, or, for multi-copy, a list
                containing an S3 URI for each successful copy and an Exception
                for each failed
        """
        config = self.config if config is None else config
        if destination_bucket is None:
            destination_bucket = self.name
        # supporting copy-self-to-self for storage class changes etc.
        destination = source if destination is None else destination

        # use boto3's high-level Bucket object to perform a managed transfer
        # (in order to easily support objects > 5 GB)
        destination_bucket_object = self.resource.Bucket(destination_bucket)
        copy_source = {"Bucket": self.name, "Key": source}
        destination_bucket_object.copy(
            copy_source, destination, ExtraArgs=extra_args, Config=config
        )
        return f"s3://{destination_bucket}:{destination}"

    def append(
        self,
        obj: Puttable,
        key: str,
        literal_str: bool = False,
        offset: int | None = None
    ) -> None:
        """
        Write data at an offset to an S3 Express One Zone object. If no offset
        is specified, appends data to the end of the object.

        Args:
            obj: string, Path, bytes, or filelike / buffer object to write.
                If None and `key` does not yet exist, performs "touch"-type
                behavior (creates an empty object).
            key: key of S3 object in this bucket to write `obj` to.
            literal_str: if True and `obj` is a `str`, write that string to
                `key`. Otherwise, interpret it as a path to a local file.
            offset:

        Returns:
            None.

        Cautions:
            `append()` does not currently perform managed multipart uploads.
            This means that if `obj` is > 5 GB, the operation will fail, and
            also that `append()` is generally inefficient for large writes.
            In its current state, it is primarily intended for tasks like
            tail-writes to logs. This may change in the future.
        """
        touch = True
        try:
            head = self.head(key)
            if offset in (None, 0):
                offset = head["ContentLength"]
            touch = False
        except ClientError as ce:
            if "not found" not in str(ce).lower():
                raise ce
        file = None
        try:
            if obj is None:
                file = BytesIO()
            elif _should_be_file(obj, literal_str):
                file = Path(obj).open("rb")
            elif isinstance(obj, str):
                file = BytesIO(obj.encode('utf-8'))
            elif isinstance(obj, bytes):
                file = BytesIO(obj)
            elif not hasattr(obj, "read") and hasattr(obj, "seek"):
                raise TypeError(f"Cannot put object of type {type(obj)}")
            else:
                file = obj
            file.seek(0)
            kwargs = {"Bucket": self.name, "Body": file, "Key": key}
            if touch is False:
                kwargs["WriteOffsetBytes"] = offset
            resp = self.client.put_object(**kwargs)
        finally:
            if file is not None:
                file.close()
        return resp

    @cached_property
    def _buckethead(self):
        """
        Cached response to HeadBucket call against this bucket. Not currently
        formatted in an interesting way for public use.
        """
        return self.client.head_bucket(Bucket=self.name)

    @cached_property
    def bucket_type(self) -> BUCKET_TYPE:
        """
        Top-level type of bucket, 'general' or 'directory'. Does not
        distinguish directory bucket subtypes.
        """
        if 'BucketLocationType' in self._buckethead:
            return 'directory'
        return 'general'

    def _s3c_kwargs(self):
        """Produces a dict containing kwargs s3control actions want."""
        arn = self._buckethead["BucketArn"]
        account_id = DIRECTORY_BUCKET_ARN_ACCOUNTPAT.search(arn).group(1)
        return {'AccountId': account_id, 'ResourceArn': arn}

    def _directory_bucket_tagrecs(self):
        if self.bucket_type != "directory":
            raise ValueError(
                "Don't call this method on general-purpose buckets"
            )
        s3c = init_client("s3control", None, self.session)
        return s3c.list_tags_for_resource(**self._s3c_kwargs()).get('Tags', [])

    @cached_property
    def tags(self) -> dict:
        """
        Bucket tags represented as a dict. If there are no tags, returns
        an empty dict.
        """
        if self.bucket_type == "directory":
            recs = self._directory_bucket_tagrecs()
        else:
            recs = self.client.get_bucket_tagging(Bucket=self.name)['TagSet']
        return {rec["Key"]: rec["Value"] for rec in recs}

    def set_tags(self, *, replace_all: bool = False, **tags: str):
        """
        Set tags for this bucket.

        Args:
            replace_all: if True, removes _all_ existing tags along with
                setting passed tag values (this is the default behavior of the
                PutBucketTagging API operation). If False, leaves existing
                tags unchanged unless they are redefined in `tags`. Raises
                a ValueError if passed for a directory bucket, because the
                API call required to set tags behaves entirely differently,
                and we do not wish to implement that behavior here.
            **tags: Argument names are tag keys; argument values are tag
                values. At least one tag must be defined.

        Returns:
            API response for PutBucketTagging operation.
        """
        if len(tags) == 0:
            raise ValueError("No tags to set")
        if not all(isinstance(x, str) for x in tags.values()):
            raise TypeError("Tag values must be strings")
        if replace_all is True and self.bucket_type == 'directory':
            raise ValueError(
                "replace_all=True not supported for directory buckets."
            )
        if replace_all is False:
            tags = self.tags | tags
        tagset = [{'Key': k, 'Value': v} for k, v in tags.items()]
        if self.bucket_type == "directory":
            s3c = init_client("s3control", None, self.session)
            response = s3c.tag_resource(
                **self._s3c_kwargs(), Tags=tagset
            )
        else:
            response = self.client.put_bucket_tagging(
                Bucket=self.name, Tagging={'TagSet': tagset}
            )
        try:
            # noinspection PyPropertyAccess
            del self.tags
        except AttributeError:
            pass
        return response

    # TODO: verify types of returned dict values
    @splitwrap(seq_arity=1)
    def head(
        self, key: Union[str, Sequence[str]]
    ) -> Union[dict[str, str], list[Union[dict[str, str], Exception]]]:
        """
        Get basic information about S3 objects in a nicely formatted dict.

        Args:
            key: object key(s) (fully-qualified 'path(s)' from bucket root)

        Returns:
            dict containing a curated selection of object headers, or, for
            a multi-object call, a list containing a dict for each
            successful head and an Exception for each failed
        """
        response = self.client.head_object(
            Bucket=self.name, Key=key, ChecksumMode="ENABLED"
        )
        headers = response["ResponseMetadata"].get("HTTPHeaders", {})
        interesting_responses = (
            "ContentLength",
            "ContentType",
            "ETag",
            "LastModified",
            "Metadata",
            "Restore",
            "StorageClass",
        )
        interesting_headers = (
            "x-amz-restore-request-date",
            "x-amz-restore-expiry-days",
            "x-amz-restore-tier",
        )
        head_dict = {}
        head_dict |= keyfilter(
            lambda k: k in interesting_responses or k.startswith("Checksum"),
            response
        )
        head_dict |= keyfilter(lambda k: k in interesting_headers, headers)
        if "LastModified" in head_dict:
            head_dict["LastModified"] = head_dict["LastModified"].isoformat()
        return head_dict

    def tail(
        self,
        key: str,
        destination: MutableSequence | BinaryIO | TextIO | IOBase | Path | str,
        start_pos: int | None = None,
        poll: float = 1,
        text_mode: bool = True,
        permit_missing: bool = False
    ):
        """
        Provides asynchronous `tail -f`-like functionality for S3 objects.

        Args:
            key: object key (fully-qualified 'path' from root)
            destination: where to write the tail chunks. If a sequence,
                appends each chunk as a new item.
            start_pos: start reading from where? If None, start at the length
                of the object when `tail()` is called.
            poll: poll rate in seconds
            text_mode: if True, decode each chunk as utf-8 text
            permit_missing: if True, don't raise an error if the object
                doesn't exist; instead, periodically check to see if the
                object comes into existence, and if it does, start tailing it.

        Returns:
            A `StoppableFuture` for the poll loop. Call its `stop()` method to
                stop tailing. Note that this future's result will always be
                `None`.

        Warnings:
            This function is primarily intended for following append-writes
            to SEOZ objects, and if the size of the object _decreases_ while
            tailing, the method may no longer accurately fetch subsequent
            writes to the file. In the future, we may add an option to
            perform an additional HEAD request to check object length before
            each normal HEAD / GET pair.

        Notes:
            Exclusive of egress costs, running this for a full day at the
            default 1-second poll rate on a single S3 Standard object costs
            approximately $0.07. On a SEOZ object, it costs approximately
            $0.005. Egress costs in this application should be equal to
            egress for (size of object when tailing ends) minus (the smaller
            of start_pos or size of object when tailing starts)
        """
        return StoppableFuture.launch_into(
            ThreadPoolExecutor(1),
            _poll_obj,
            bucket=self,
            key=key,
            start_pos=start_pos,
            text_mode=text_mode,
            destination=destination,
            poll=poll,
            permit_missing=permit_missing
        )

    def ls(
        self,
        prefix: Optional[str] = None,
        recursive: bool = False,
        formatting: Literal["simple", "contents", "df", "raw"] = "simple",
        cache: Union[str, Path, IOBase, None] = None,
        start_after: Optional[str] = None,
        cache_only: bool = False,
        fetch_owner: bool = False
    ) -> Union[tuple, pd.DataFrame, None]:
        """
        list objects in a bucket.

        Args:
            prefix: prefix ('folder') to list (if not specified, defaults to
                bucket root)
            recursive: recursively list all objects in tree rooted at prefix?
            formatting: how to format list results
                * "simple": tuple containing object names only
                * "contents": tuple of dicts containing object names,
                    modification times, etc.
                * "df": pandas DataFrame produced from contents
                * "raw": API response as reported by boto3
            cache: optional file or filelike object to write results to.
            start_after: if specified, begin listing objects only "after" this
                prefix. intended principally for sequential calls.
            cache_only: _only_ write results into the specified cache; do not
                retain them in memory. intended mainly for cases in which the
                full list would be larger than available memory.
            fetch_owner: if True, include owner of objects in output. Not
                enabled by default due to permissioning and performance issues.

        Returns:
            Manifest of contents, format dependent on `formatting`; or None
                if `cache_only` is True.
        """
        kwargs = {"Bucket": self.name}
        if recursive is False:
            # try to treat prefixes like directories.
            # note that 'recursive' behavior is default -- the
            # ListObjects* methods don't treat prefixes as anything but parts
            # of an object key by default. _also_ note that this is different
            # from the default awscli s3 behavior, but not awscli s3api.
            kwargs["Delimiter"] = "/"
        if recursive is False and prefix is None:
            kwargs["Prefix"] = ""
        elif prefix is not None:
            kwargs["Prefix"] = prefix.lstrip("/")
        if start_after is not None:
            kwargs["StartAfter"] = start_after
        kwargs["FetchOwner"] = fetch_owner
        # pagination is typically slightly faster than iteratively passing
        # StartAfter based on the last key of a truncated response
        paginator = self.client.get_paginator("list_objects_v2")
        cache = Path(cache) if isinstance(cache, str) else cache
        self._maybe_prep_ls_cache(cache)
        pages = []
        for page in iter(paginator.paginate(**kwargs)):
            self._maybe_write_ls_cache(page, cache)
            if cache_only is False:
                pages.append(page)
        if cache_only is True:
            return None
        if formatting not in ("raw", "simple", "df", "contents"):
            warnings.warn(
                f"invalid formatting '{formatting}', defaulting to 'simple'"
            )
            formatting = "simple"
        if formatting == "raw":
            return tuple(pages)
        objects = chain(*(p.get("Contents", []) for p in pages))
        prefixes = chain(*(p.get("CommonPrefixes", []) for p in pages))
        if formatting == "simple":
            return tuple(
                [obj["Key"] for obj in objects]
                + [obj["Prefix"] for obj in prefixes]
            )
        if formatting == "contents":
            return tuple(objects)
        if fetch_owner is False:
            return pd.DataFrame(objects)
        objects = tuple(objects)
        owner_info = pd.DataFrame([o.pop('Owner') for o in objects])
        return pd.concat(map(pd.DataFrame, (objects, owner_info)), axis=1)

    @staticmethod
    def _maybe_prep_ls_cache(cache: Optional[Union[Path, IO]]):
        if cache is None:
            return
        columns = "Key,LastModified,ETag,Size,StorageClass\n"
        if isinstance(cache, Path):
            if cache.exists() and cache.stat().st_size > 0:
                return
            with cache.open("w") as stream:
                stream.write(columns)
        elif cache.tell() == 0:
            cache.write(columns)

    @staticmethod
    def _maybe_write_ls_cache(page: dict, cache: Optional[Union[Path, IO]]):
        """helper function for Bucket.ls()"""
        if (cache is None) or (objects := page.get("Contents")) is None:
            return
        stream = cache if not isinstance(cache, Path) else cache.open("a+")
        try:
            for rec in objects:
                stream.write(",".join(map(_dtstr, rec.values())) + "\n")
        finally:
            if isinstance(cache, Path):
                stream.close()

    @splitwrap(seq_arity=1)
    def rm(self, key: str) -> Union[None, list[Optional[None]]]:
        """
        Delete an S3 object.

        Args:
            key: key of object to delete (fully-qualified 'path' from root)

        Returns:
            None, or, for multi-delete, a list containing None for successful
                deletes and Exceptions for failed
        """
        return self.client.delete_object(Bucket=self.name, Key=key)

    def ls_multipart(self) -> dict:
        """
        List all multipart uploads associated with this bucket.

        Returns:
            API response
        """
        return self.client.list_multipart_uploads(Bucket=self.name)

    def create_multipart_upload(self, key: str) -> dict:
        """
        Prepare an S3 multipart upload. Note that this is not itself an upload
        operation! It merely _sets up_ an upload.

        Args:
            key: upload target key (fully-qualified 'path' from bucket root)

        Returns:
            API response
        """
        return self.client.create_multipart_upload(Bucket=self.name, Key=key)

    def abort_multipart_upload(self, multipart: Mapping) -> dict:
        """
        Abort a multipart upload operation.

        Args:
            multipart: API response received when multipart upload operation
                was created

        Returns:
            API response

        """
        return self.client.abort_multipart_upload(
            Bucket=self.name,
            Key=multipart["Key"],
            UploadId=multipart["UploadId"],
        )

    def complete_multipart_upload(
        self,
        multipart: Mapping,
        parts: Mapping,
    ) -> dict:
        """
        Notify S3 that all parts of an object have been uploaded and it may
        close the multipart upload operation and actually create the object.

        Args:
            multipart: API response received when multipart upload was created
            parts: API responses received after uploading each part

        Returns:
            API response
        """
        return self.client.complete_multipart_upload(
            Bucket=self.name,
            Key=multipart["Key"],
            UploadId=multipart["UploadId"],
            MultipartUpload={
                "Parts": [
                    {"ETag": part["ETag"], "PartNumber": number}
                    for number, part in parts.items()
                ]
            },
        )

    def __str__(self):
        return f"s3 bucket {self.name} ({self.client.meta.region_name})"

    def __repr__(self):
        return self.__str__()
_buckethead cached property

Cached response to HeadBucket call against this bucket. Not currently formatted in an interesting way for public use.

bucket_type cached property

Top-level type of bucket, 'general' or 'directory'. Does not distinguish directory bucket subtypes.

tags cached property

Bucket tags represented as a dict. If there are no tags, returns an empty dict.

__init__(bucket_name, client=None, resource=None, session=None, config=None, n_threads=4)

Parameters:

Name Type Description Default
bucket_name str

name of bucket

required
client Optional[BaseClient]

optional boto3 s3 Client. if not specified, creates a default client.

None
resource Optional[ServiceResource]

optional boto3 s3 Resource. if not specified, creates a default resource.

None
session Optional[Session]

optional boto3 Session. if not specified, creates a default session.

None
config Optional[TransferConfig]

optional boto3 TransferConfig. if not specified, creates a default config.

None
n_threads Optional[int]

if not None, automatically multithread some operations. note that this is not a hard cap on the number of threads used by a single bucket operation. it provides a cap on concurrency across operations on multiple objects, not on concurrency on operations per object. if you wish to cap concurrency within operations on individual objects, modify the max_concurrency attribute of config.

4
Source code in hostess/aws/s3.py
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
def __init__(
    self,
    bucket_name: str,
    client: Optional[botocore.client.BaseClient] = None,
    resource: Optional[boto3.resources.base.ServiceResource] = None,
    session: Optional[boto3.session.Session] = None,
    config: Optional[boto3.s3.transfer.TransferConfig] = None,
    n_threads: Optional[int] = 4,
):
    """
    Args:
        bucket_name: name of bucket
        client: optional boto3 s3 Client. if not specified, creates a
            default client.
        resource: optional boto3 s3 Resource. if not specified, creates a
            default resource.
        session: optional boto3 Session. if not specified, creates a
            default session.
        config: optional boto3 TransferConfig. if not specified, creates a
            default config.
        n_threads: if not None, automatically multithread some operations.
            note that this is not a hard cap on the number of threads used
            by a single bucket operation. it provides a cap on concurrency
            across operations on multiple objects, not on concurrency on
            operations per object. if you wish to cap concurrency within
            operations on individual objects, modify the `max_concurrency`
            attribute of `config`.
    """
    self.client = init_client("s3", client, session)
    self.resource = init_resource("s3", resource, session)
    self.session = session
    self.name = bucket_name
    self.contents = []
    if config is None:
        config = boto3.s3.transfer.TransferConfig(**S3_DEFAULTS["config"])
    self.config = config
    self.n_threads = n_threads
_maybe_write_ls_cache(page, cache) staticmethod

helper function for Bucket.ls()

Source code in hostess/aws/s3.py
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
@staticmethod
def _maybe_write_ls_cache(page: dict, cache: Optional[Union[Path, IO]]):
    """helper function for Bucket.ls()"""
    if (cache is None) or (objects := page.get("Contents")) is None:
        return
    stream = cache if not isinstance(cache, Path) else cache.open("a+")
    try:
        for rec in objects:
            stream.write(",".join(map(_dtstr, rec.values())) + "\n")
    finally:
        if isinstance(cache, Path):
            stream.close()
_put_stream_chunk(blob, download_cache, parts, multipart, upload_numerator, config, exc, verbose=False, flush=False)

helper function for Bucket.put_stream()

Source code in hostess/aws/s3.py
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
def _put_stream_chunk(
    self,
    blob: bytes,
    download_cache: list[bytes],
    parts: MutableMapping,
    multipart: Mapping,
    upload_numerator: Iterator[int],
    config: boto3.s3.transfer.TransferConfig,
    exc: Optional[ThreadPoolExecutor],
    verbose: bool = False,
    flush: bool = False,
):
    """helper function for Bucket.put_stream()"""
    download_cache[0] += blob
    if (len(download_cache[0]) < config.multipart_chunksize) and (
        flush is False
    ):
        return
    number = next(upload_numerator)
    if verbose is True:
        infix = "received" if flush is False else "flushing buffer as"
        console_and_log(
            f"{stamp()}: {infix} chunk {number} for {multipart['Key']}, "
            f"initiating upload"
        )
    kwargs = {
        "Body": download_cache.pop(),
        "Bucket": self.name,
        "Key": multipart["Key"],
        "PartNumber": number,
        "UploadId": multipart["UploadId"],
    }
    download_cache.append(b"")
    if exc is not None:
        parts[number] = exc.submit(self.client.upload_part, **kwargs)
    else:
        parts[number] = self.client.upload_part(**kwargs)
_s3c_kwargs()

Produces a dict containing kwargs s3control actions want.

Source code in hostess/aws/s3.py
1251
1252
1253
1254
1255
def _s3c_kwargs(self):
    """Produces a dict containing kwargs s3control actions want."""
    arn = self._buckethead["BucketArn"]
    account_id = DIRECTORY_BUCKET_ARN_ACCOUNTPAT.search(arn).group(1)
    return {'AccountId': account_id, 'ResourceArn': arn}
abort_multipart_upload(multipart)

Abort a multipart upload operation.

Parameters:

Name Type Description Default
multipart Mapping

API response received when multipart upload operation was created

required

Returns:

Type Description
dict

API response

Source code in hostess/aws/s3.py
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
def abort_multipart_upload(self, multipart: Mapping) -> dict:
    """
    Abort a multipart upload operation.

    Args:
        multipart: API response received when multipart upload operation
            was created

    Returns:
        API response

    """
    return self.client.abort_multipart_upload(
        Bucket=self.name,
        Key=multipart["Key"],
        UploadId=multipart["UploadId"],
    )
append(obj, key, literal_str=False, offset=None)

Write data at an offset to an S3 Express One Zone object. If no offset is specified, appends data to the end of the object.

Parameters:

Name Type Description Default
obj Puttable

string, Path, bytes, or filelike / buffer object to write. If None and key does not yet exist, performs "touch"-type behavior (creates an empty object).

required
key str

key of S3 object in this bucket to write obj to.

required
literal_str bool

if True and obj is a str, write that string to key. Otherwise, interpret it as a path to a local file.

False
offset int | None
None

Returns:

Type Description
None

None.

Cautions

append() does not currently perform managed multipart uploads. This means that if obj is > 5 GB, the operation will fail, and also that append() is generally inefficient for large writes. In its current state, it is primarily intended for tasks like tail-writes to logs. This may change in the future.

Source code in hostess/aws/s3.py
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
def append(
    self,
    obj: Puttable,
    key: str,
    literal_str: bool = False,
    offset: int | None = None
) -> None:
    """
    Write data at an offset to an S3 Express One Zone object. If no offset
    is specified, appends data to the end of the object.

    Args:
        obj: string, Path, bytes, or filelike / buffer object to write.
            If None and `key` does not yet exist, performs "touch"-type
            behavior (creates an empty object).
        key: key of S3 object in this bucket to write `obj` to.
        literal_str: if True and `obj` is a `str`, write that string to
            `key`. Otherwise, interpret it as a path to a local file.
        offset:

    Returns:
        None.

    Cautions:
        `append()` does not currently perform managed multipart uploads.
        This means that if `obj` is > 5 GB, the operation will fail, and
        also that `append()` is generally inefficient for large writes.
        In its current state, it is primarily intended for tasks like
        tail-writes to logs. This may change in the future.
    """
    touch = True
    try:
        head = self.head(key)
        if offset in (None, 0):
            offset = head["ContentLength"]
        touch = False
    except ClientError as ce:
        if "not found" not in str(ce).lower():
            raise ce
    file = None
    try:
        if obj is None:
            file = BytesIO()
        elif _should_be_file(obj, literal_str):
            file = Path(obj).open("rb")
        elif isinstance(obj, str):
            file = BytesIO(obj.encode('utf-8'))
        elif isinstance(obj, bytes):
            file = BytesIO(obj)
        elif not hasattr(obj, "read") and hasattr(obj, "seek"):
            raise TypeError(f"Cannot put object of type {type(obj)}")
        else:
            file = obj
        file.seek(0)
        kwargs = {"Bucket": self.name, "Body": file, "Key": key}
        if touch is False:
            kwargs["WriteOffsetBytes"] = offset
        resp = self.client.put_object(**kwargs)
    finally:
        if file is not None:
            file.close()
    return resp
chunk_putter_factory(key, upload_threads=4, download_threads=None, verbose=False)

construct a callable chunk uploader. this can be used in relatively direct ways or passed to complex pipelines as a callback.

Source code in hostess/aws/s3.py
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
def chunk_putter_factory(
    self,
    key: str,
    upload_threads: Optional[int] = 4,
    download_threads: Optional[int] = None,
    verbose: bool = False,
):
    """
    construct a callable chunk uploader. this can be used in relatively
    direct ways or passed to complex pipelines as a callback.
    """
    if download_threads is not None:
        raise NotImplementedError(
            "Asynchronous downloads are not yet implemented; "
            "please pass download_threads=None"
        )
    parts = {}
    multipart = self.create_multipart_upload(key)
    if upload_threads is None:
        exc = None
    else:
        exc = ThreadPoolExecutor(upload_threads)
    kwargs = {
        "config": self.config,
        "download_cache": [b""],
        "multipart": multipart,
        "upload_numerator": naturals(),
        "parts": parts,
        "exc": exc,
        "verbose": verbose,
    }
    return partial(self._put_stream_chunk, **kwargs), parts, multipart
complete_multipart_upload(multipart, parts)

Notify S3 that all parts of an object have been uploaded and it may close the multipart upload operation and actually create the object.

Parameters:

Name Type Description Default
multipart Mapping

API response received when multipart upload was created

required
parts Mapping

API responses received after uploading each part

required

Returns:

Type Description
dict

API response

Source code in hostess/aws/s3.py
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
def complete_multipart_upload(
    self,
    multipart: Mapping,
    parts: Mapping,
) -> dict:
    """
    Notify S3 that all parts of an object have been uploaded and it may
    close the multipart upload operation and actually create the object.

    Args:
        multipart: API response received when multipart upload was created
        parts: API responses received after uploading each part

    Returns:
        API response
    """
    return self.client.complete_multipart_upload(
        Bucket=self.name,
        Key=multipart["Key"],
        UploadId=multipart["UploadId"],
        MultipartUpload={
            "Parts": [
                {"ETag": part["ETag"], "PartNumber": number}
                for number, part in parts.items()
            ]
        },
    )
cp(source, destination=None, destination_bucket=None, config=None, **extra_args)

Copy S3 object(s) to another location on S3.

Parameters:

Name Type Description Default
source Union[str, Sequence[str]]

key(s) of object(s) to copy (fully-qualified 'path(s)')

required
destination Union[Optional[str], Sequence[Optional[str]]]

key(s) to copy object(s) to (by default, uses source key(s))

None
destination_bucket Optional[str]

bucket to copy object(s) to. if not specified, uses source bucket. this means that if destination and destination_bucket are both None, it overwrites the object inplace. this is useful for things like storage class changes.

None
config Optional[TransferConfig]

optional transfer config

None
extra_args str

ExtraArgs for boto3 bucket object

{}

Returns:

Name Type Description
uri Union[str, list[Union[str, Exception]]]

S3 URI of newly-created copy, or, for multi-copy, a list containing an S3 URI for each successful copy and an Exception for each failed

Source code in hostess/aws/s3.py
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
@splitwrap(seq_arity=2)
def cp(
    self,
    source: Union[str, Sequence[str]],
    destination: Union[Optional[str], Sequence[Optional[str]]] = None,
    destination_bucket: Optional[str] = None,
    config: Optional[boto3.s3.transfer.TransferConfig] = None,
    **extra_args: str
) -> Union[str, list[Union[str, Exception]]]:
    """
    Copy S3 object(s) to another location on S3.

    Args:
        source: key(s) of object(s) to copy (fully-qualified 'path(s)')
        destination: key(s) to copy object(s) to (by default, uses
            source key(s))
        destination_bucket: bucket to copy object(s) to. if not
            specified, uses source bucket. this means that if destination
            and destination_bucket are both None, it overwrites the object
            inplace. this is useful for things like storage class changes.
        config: optional transfer config
        extra_args: ExtraArgs for boto3 bucket object

    Returns:
        uri: S3 URI of newly-created copy, or, for multi-copy, a list
            containing an S3 URI for each successful copy and an Exception
            for each failed
    """
    config = self.config if config is None else config
    if destination_bucket is None:
        destination_bucket = self.name
    # supporting copy-self-to-self for storage class changes etc.
    destination = source if destination is None else destination

    # use boto3's high-level Bucket object to perform a managed transfer
    # (in order to easily support objects > 5 GB)
    destination_bucket_object = self.resource.Bucket(destination_bucket)
    copy_source = {"Bucket": self.name, "Key": source}
    destination_bucket_object.copy(
        copy_source, destination, ExtraArgs=extra_args, Config=config
    )
    return f"s3://{destination_bucket}:{destination}"
create(name, client=None, session=None, *, bucket_type='general', az=None, tags=None, bucket_config=None, **bucket_kwargs) classmethod

Create a new bucket on S3 and return it as a Bucket object.

Parameters:

Name Type Description Default
name str

Name of bucket. If creating a directory bucket, do not include the AZ suffix (e.g. pass "something" instead of "something--use1-az4--x-s3"). Bucket will automatically add the correct suffix.

required
client BaseClient | None

optional boto3 s3 Client. if not specified, creates a default client.

None
session Session

optional boto3 Session. if not specified, creates a default session.

None
bucket_type BUCKET_TYPE

"general" (default, meaning a general-purpose bucket) or "directory" (meaning a directory bucket). Note that this method only supports zonal directory buckets.

'general'
az str | int | None

Name, letter, ID, or number of the Availability Zone (AZ) in which to create a directory bucket. For instance, if session is associated with the us-east-1 region, 'us-east-1c', 'use1-az4', 'c', and 4 all refer to the same AZ. Note that creating a bucket in a region other than the one client (or session, if client is not passed) is not supported.

This argument is ignored for general-purpose buckets.

Note that not all AZs support directory buckets, and there is no mechanism to discover which do and do not via the API (other than actually attempting to create one). See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/endpoint-directory-buckets-AZ.html

None
tags dict[str, str] | None

Keys and values of bucket tags to set after successful bucket creation. If not None, there must be at least one item in this dict. Note that directory buckets do not support tags; this method will raise a ValueError if provided tags for a directory bucket.

None
bucket_config Mapping | None

passed to the botocore create_bucket() method as the 'CreateBucketConfig' argument.

None
bucket_kwargs

passed directly to Bucket.__init__().

{}
Caution

In all regions other than us-east-1, the S3 API returns a 'bucket aready exists and is owned by you' error if a user attempts to create a bucket with the same name as a bucket they already own. In us-east-1, it instead returns a standard success response and silently erases all ACLs associated with that bucket. We are unwilling to spring this on users, and for this reason, Bucket.create() behaves slightly differently in us-east-1. It checks the user already owns a bucket with the requested name, and raises an exception if so. This means that an account must have the ListBuckets permission to use Bucket.create() in us-east-1.

Source code in hostess/aws/s3.py
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
@classmethod
def create(
    cls,
    name: str,
    client: botocore.client.BaseClient | None = None,
    session: boto3.session.Session = None,
    *,
    bucket_type: BUCKET_TYPE = "general",
    az: str | int | None = None,
    tags: dict[str, str] | None = None,
    bucket_config: Mapping | None = None,
    **bucket_kwargs
):
    """
    Create a new bucket on S3 and return it as a Bucket object.

    Args:
        name: Name of bucket. If creating a directory bucket, do not
            include the AZ suffix (e.g. pass "something" instead of
            "something--use1-az4--x-s3"). `Bucket` will automatically add
            the correct suffix.
        client: optional boto3 s3 Client. if not specified, creates a
            default client.
        session: optional boto3 Session. if not specified, creates a
            default session.
        bucket_type: "general" (default, meaning a general-purpose bucket)
            or "directory" (meaning a directory bucket). Note that this
            method only supports zonal directory buckets.
        az: Name, letter, ID, or number of the Availability Zone (AZ) in
            which to create a directory bucket. For instance, if `session`
            is associated with the us-east-1 region, 'us-east-1c',
            `'use1-az4'`, `'c'`, and `4` all refer to the same AZ. Note
            that creating a bucket in a region other than the one `client`
            (or `session`, if `client` is not passed) is not supported.

            This argument is ignored for general-purpose buckets.

            Note that not all AZs support directory buckets, and there is
            no mechanism to discover which do and do not via the API
            (other than actually attempting to create one). See:
            https://docs.aws.amazon.com/AmazonS3/latest/userguide/endpoint-directory-buckets-AZ.html
        tags: Keys and values of bucket tags to set after successful
              bucket creation. If not None, there must be at least one
              item in this dict. Note that directory buckets do not
              support tags; this method will raise a ValueError if
              provided tags for a directory bucket.
        bucket_config: passed to the botocore `create_bucket()` method as
            the 'CreateBucketConfig' argument.
        bucket_kwargs: passed directly to `Bucket.__init__()`.

    Caution:
        In all regions other than us-east-1, the S3 API returns a
        'bucket aready exists and is owned by you' error if a user
        attempts to create a bucket with the same name as a bucket they
        already own. In us-east-1, it instead returns a standard success
        response and silently erases all ACLs associated with that bucket.
        We are unwilling to spring this on users, and for this reason,
        `Bucket.create()` behaves slightly differently in us-east-1. It
        checks the user already owns a bucket with the requested name,
        and raises an exception if so. This means that an account must
        have the ListBuckets permission to use `Bucket.create()` in
        us-east-1.
    """
    client = init_client("s3", client, session)
    if bucket_type not in ("general", "directory"):
        raise ValueError("'bucket_type' must be 'general' or 'directory'.")
    if bucket_type == "directory" and az is None:
        raise TypeError("'az' must not be None for a directory bucket.")
    elif bucket_type == "directory":
        azid = check_az(az, region_name=client.meta.region_name)
        name = _attach_directory_bucket_suffix(name, azid)
        pat = DIRECTORY_BUCKET_NAMEPAT
    else:
        pat, azid = BUCKET_NAMEPAT, None
    if pat.match(name) is None:
        raise ValueError(f"{name} is not a valid bucket name.")
    if client.meta.region_name == 'us-east-1':
        _raise_for_owned_use1_bucket(client, name, bucket_type)
    conf = dict(bucket_config) if bucket_config is not None else {}
    if bucket_type == "directory":
        if len({'Location', 'Bucket'}.intersection(conf.keys())) > 0:
            raise ValueError(
                "Please do not specify custom 'Location' or 'Bucket' "
                "values in bucket config for directory buckets."
            )
        conf |= {
            'Location': {'Type': "AvailabilityZone", 'Name': azid},
            'Bucket': {
                'Type': 'Directory',
                'DataRedundancy': 'SingleAvailabilityZone'
            }
        }

    # note that we're just relying on botocore for exceptions at this
    # stage. If it doesn't raise one, we assume it worked.
    kwargs = {'Bucket': name}
    if len(conf) > 0:
        kwargs['CreateBucketConfiguration'] = conf
    try:
        client.create_bucket(**kwargs)
    except ClientError as ce:
        if "InvalidBucketName" in str(ce) and bucket_type == "directory":
            raise ValueError(
                f"Although {name} is a valid directory bucket name, the "
                f"S3 API returned an InvalidBucketName error. This "
                f"typically indicates that Availability Zone {az} in "
                f"{client.meta.region_name} does not support directory "
                f"buckets."
            )
        else:
            raise ce
    bucket = Bucket(name, client=client, **bucket_kwargs)
    if tags is not None:
        bucket.set_tags(**tags)
    return bucket
create_multipart_upload(key)

Prepare an S3 multipart upload. Note that this is not itself an upload operation! It merely sets up an upload.

Parameters:

Name Type Description Default
key str

upload target key (fully-qualified 'path' from bucket root)

required

Returns:

Type Description
dict

API response

Source code in hostess/aws/s3.py
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
def create_multipart_upload(self, key: str) -> dict:
    """
    Prepare an S3 multipart upload. Note that this is not itself an upload
    operation! It merely _sets up_ an upload.

    Args:
        key: upload target key (fully-qualified 'path' from bucket root)

    Returns:
        API response
    """
    return self.client.create_multipart_upload(Bucket=self.name, Key=key)
delete()

Delete this bucket.

Notes

S3 will not delete a bucket that contains any objects, and hostess does not provide a 'force'-type operation that auto-empties a bucket before deletion.

Source code in hostess/aws/s3.py
653
654
655
656
657
658
659
660
661
662
def delete(self):
    """
    Delete this bucket.

    Notes:
        S3 will not delete a bucket that contains any objects, and
        `hostess` does not provide a 'force'-type operation that
        auto-empties a bucket before deletion.
    """
    self.client.delete_bucket(Bucket=self.name)
df()

Construct a manifest of all known objects in bucket as a pandas DataFrame. If update_contents() has never been called, greedily scan the contents of the bucket rather than returning an empty DataFrame.

Returns:

Type Description
DataFrame

Manifest of all known objects in bucket.

Source code in hostess/aws/s3.py
722
723
724
725
726
727
728
729
730
731
732
733
def df(self) -> pd.DataFrame:
    """
    Construct a manifest of all known objects in bucket as a pandas
    DataFrame. If update_contents() has never been called, greedily scan
    the contents of the bucket rather than returning an empty DataFrame.

    Returns:
        Manifest of all known objects in bucket.
    """
    if len(self.contents) == 0:
        self.update_contents()
    return pd.DataFrame(self.contents)
freeze(key, storage_class='DEEP_ARCHIVE')

Modify the storage class of an object or objects. Intended primarily for moving objects from S3 Standard to one of the Glacier classes.

Parameters:

Name Type Description Default
key Union[str, Sequence[str]]

object key(s) (fully-qualified 'path' relative to bucket root)

required
storage_class str

target storage class

'DEEP_ARCHIVE'

Returns:

Name Type Description
uri Union[str, list[Union[str, Exception]]]

URI of frozen object, or list containing URI of each frozen object if its freeze succeeded and an Exception if not

Source code in hostess/aws/s3.py
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
@splitwrap(seq_arity=1)
def freeze(
    self,
    key: Union[str, Sequence[str]],
    storage_class: str = "DEEP_ARCHIVE",
) -> Union[str, list[Union[str, Exception]]]:
    """
    Modify the storage class of an object or objects. Intended primarily
    for moving objects from S3 Standard to one of the Glacier classes.

    Args:
        key: object key(s) (fully-qualified 'path' relative to bucket root)
        storage_class: target storage class

    Returns:
        uri: URI of frozen object, or list containing URI of each
            frozen object if its freeze succeeded and an Exception if not
    """
    return self.cp(key, StorageClass=storage_class)
get(key, destination=None, config=None, start_byte=None, end_byte=None, **extra_args)

write S3 object(s) into file(s) or filelike object(s).

Parameters:

Name Type Description Default
key Union[str, Sequence[str]]

object key(s) (fully-qualified 'path(s)' from root)

required
destination Union[Union[str, Path, IOBase, None], Sequence[Union[str, Path, IOBase, None]]]

where to write the retrieved object(s). May be path(s) or filelike object(s). If not specified, constructs new BytesIO buffer(s).

None
config Optional[TransferConfig]

optional transfer config

None
start_byte Optional[int]

Byte index at which to begin read. None (default) or 0 means the first byte of the object. Negative integers are interpreted as Python-style negative slice indices. e.g., start_byte=0 and end_byte=-1 means 'read all the bytes but the last one', analogous to my_list[0:-1].

None
end_byte Optional[int]

Byte index at which to end read. None (default) means the last byte of the object.

None
extra_args str

passed directly to TransferManager.download()

{}

Returns: outpath: the path, string, or buffer we wrote the object to, or, for multi-get, a list containing one such outpath for each successful write and an Exception for each failed write

Caution

This does not currently support specifying different byte ranges for different objects. In other words, this is not legal:

Bucket.get(
    [k1, k2], [f1, f2], start_byte=[s1, s2], end_byte=[e1, e2]
)

If specified in a multi-object call to get(), start_byte and end_byte will fetch the same range from each object.

This may change in the future.

Source code in hostess/aws/s3.py
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
@splitwrap(seq_arity=2)
def get(
    self,
    key: Union[str, Sequence[str]],
    destination: Union[
        Union[str, Path, IOBase, None],
        Sequence[Union[str, Path, IOBase, None]],
    ] = None,
    config: Optional[boto3.s3.transfer.TransferConfig] = None,
    start_byte: Optional[int] = None,
    end_byte: Optional[int] = None,
    **extra_args: str
) -> Union[Path, str, IOBase] | list[Union[Path, str, IOBase, Exception]]:
    """
    write S3 object(s) into file(s) or filelike object(s).

    Args:
        key: object key(s) (fully-qualified 'path(s)' from root)
        destination: where to write the retrieved object(s). May be path(s)
            or filelike object(s). If not specified, constructs new BytesIO
            buffer(s).
        config: optional transfer config
        start_byte: Byte index at which to begin read. None (default) or
            0 means the first byte of the object. Negative integers are
            interpreted as Python-style negative slice indices. e.g.,
            `start_byte=0` and `end_byte=-1` means 'read all the bytes
            but the last one', analogous to `my_list[0:-1]`.
        end_byte: Byte index at which to end read. None (default) means
            the last byte of the object.
        extra_args: passed directly to `TransferManager.download()`
    Returns:
        outpath: the path, string, or buffer we wrote the object to, or,
            for multi-get, a list containing one such outpath for each
            successful write and an Exception for each failed write

    Caution:
        This does not currently support specifying different byte ranges
        for different objects. In other words, this is not legal:
        ```
        Bucket.get(
            [k1, k2], [f1, f2], start_byte=[s1, s2], end_byte=[e1, e2]
        )
        ```
        If specified in a multi-object call to `get()`, `start_byte` and
        `end_byte` will fetch the same range from each object.

        This may change in the future.
    """
    # TODO: add more useful error messages for streams opened in text mode
    config = self.config if config is None else config
    if destination is None:
        dest = BytesIO()
    elif isinstance(destination, Path):
        dest = str(destination)
    else:
        dest = destination
    start_byte = None if start_byte == 0 else start_byte
    args, kwargs = (self.name, key, dest), {'extra_args': extra_args}
    if start_byte is not None or end_byte is not None:
        manager_class = TransferManagerWithRange
        kwargs |= {'start_byte': start_byte, 'end_byte': end_byte}
    else:
        manager_class = TransferManager
    with manager_class(self.client, config) as manager:
        dirs_we_made = []
        if not isinstance(dest, IOBase):
            for p in reversed(Path(dest).parents):
                if p.exists() is False:
                    p.mkdir()
                    dirs_we_made.append(p)
        future = manager.download(*args, **kwargs)
        ok = False
        try:
            # this call is strictly intended to raise exceptions,
            # e.g. attempts to get objects that don't exist.
            future.result()
            ok = True
        finally:
            if ok is False:
                for d in reversed(dirs_we_made):
                    d.rmdir()
    if hasattr(dest, "seek"):
        dest.seek(0)
    return dest
head(key)

Get basic information about S3 objects in a nicely formatted dict.

Parameters:

Name Type Description Default
key Union[str, Sequence[str]]

object key(s) (fully-qualified 'path(s)' from bucket root)

required

Returns:

Type Description
Union[dict[str, str], list[Union[dict[str, str], Exception]]]

dict containing a curated selection of object headers, or, for

Union[dict[str, str], list[Union[dict[str, str], Exception]]]

a multi-object call, a list containing a dict for each

Union[dict[str, str], list[Union[dict[str, str], Exception]]]

successful head and an Exception for each failed

Source code in hostess/aws/s3.py
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
@splitwrap(seq_arity=1)
def head(
    self, key: Union[str, Sequence[str]]
) -> Union[dict[str, str], list[Union[dict[str, str], Exception]]]:
    """
    Get basic information about S3 objects in a nicely formatted dict.

    Args:
        key: object key(s) (fully-qualified 'path(s)' from bucket root)

    Returns:
        dict containing a curated selection of object headers, or, for
        a multi-object call, a list containing a dict for each
        successful head and an Exception for each failed
    """
    response = self.client.head_object(
        Bucket=self.name, Key=key, ChecksumMode="ENABLED"
    )
    headers = response["ResponseMetadata"].get("HTTPHeaders", {})
    interesting_responses = (
        "ContentLength",
        "ContentType",
        "ETag",
        "LastModified",
        "Metadata",
        "Restore",
        "StorageClass",
    )
    interesting_headers = (
        "x-amz-restore-request-date",
        "x-amz-restore-expiry-days",
        "x-amz-restore-tier",
    )
    head_dict = {}
    head_dict |= keyfilter(
        lambda k: k in interesting_responses or k.startswith("Checksum"),
        response
    )
    head_dict |= keyfilter(lambda k: k in interesting_headers, headers)
    if "LastModified" in head_dict:
        head_dict["LastModified"] = head_dict["LastModified"].isoformat()
    return head_dict
ls(prefix=None, recursive=False, formatting='simple', cache=None, start_after=None, cache_only=False, fetch_owner=False)

list objects in a bucket.

Parameters:

Name Type Description Default
prefix Optional[str]

prefix ('folder') to list (if not specified, defaults to bucket root)

None
recursive bool

recursively list all objects in tree rooted at prefix?

False
formatting Literal['simple', 'contents', 'df', 'raw']

how to format list results * "simple": tuple containing object names only * "contents": tuple of dicts containing object names, modification times, etc. * "df": pandas DataFrame produced from contents * "raw": API response as reported by boto3

'simple'
cache Union[str, Path, IOBase, None]

optional file or filelike object to write results to.

None
start_after Optional[str]

if specified, begin listing objects only "after" this prefix. intended principally for sequential calls.

None
cache_only bool

only write results into the specified cache; do not retain them in memory. intended mainly for cases in which the full list would be larger than available memory.

False
fetch_owner bool

if True, include owner of objects in output. Not enabled by default due to permissioning and performance issues.

False

Returns:

Type Description
Union[tuple, DataFrame, None]

Manifest of contents, format dependent on formatting; or None if cache_only is True.

Source code in hostess/aws/s3.py
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
def ls(
    self,
    prefix: Optional[str] = None,
    recursive: bool = False,
    formatting: Literal["simple", "contents", "df", "raw"] = "simple",
    cache: Union[str, Path, IOBase, None] = None,
    start_after: Optional[str] = None,
    cache_only: bool = False,
    fetch_owner: bool = False
) -> Union[tuple, pd.DataFrame, None]:
    """
    list objects in a bucket.

    Args:
        prefix: prefix ('folder') to list (if not specified, defaults to
            bucket root)
        recursive: recursively list all objects in tree rooted at prefix?
        formatting: how to format list results
            * "simple": tuple containing object names only
            * "contents": tuple of dicts containing object names,
                modification times, etc.
            * "df": pandas DataFrame produced from contents
            * "raw": API response as reported by boto3
        cache: optional file or filelike object to write results to.
        start_after: if specified, begin listing objects only "after" this
            prefix. intended principally for sequential calls.
        cache_only: _only_ write results into the specified cache; do not
            retain them in memory. intended mainly for cases in which the
            full list would be larger than available memory.
        fetch_owner: if True, include owner of objects in output. Not
            enabled by default due to permissioning and performance issues.

    Returns:
        Manifest of contents, format dependent on `formatting`; or None
            if `cache_only` is True.
    """
    kwargs = {"Bucket": self.name}
    if recursive is False:
        # try to treat prefixes like directories.
        # note that 'recursive' behavior is default -- the
        # ListObjects* methods don't treat prefixes as anything but parts
        # of an object key by default. _also_ note that this is different
        # from the default awscli s3 behavior, but not awscli s3api.
        kwargs["Delimiter"] = "/"
    if recursive is False and prefix is None:
        kwargs["Prefix"] = ""
    elif prefix is not None:
        kwargs["Prefix"] = prefix.lstrip("/")
    if start_after is not None:
        kwargs["StartAfter"] = start_after
    kwargs["FetchOwner"] = fetch_owner
    # pagination is typically slightly faster than iteratively passing
    # StartAfter based on the last key of a truncated response
    paginator = self.client.get_paginator("list_objects_v2")
    cache = Path(cache) if isinstance(cache, str) else cache
    self._maybe_prep_ls_cache(cache)
    pages = []
    for page in iter(paginator.paginate(**kwargs)):
        self._maybe_write_ls_cache(page, cache)
        if cache_only is False:
            pages.append(page)
    if cache_only is True:
        return None
    if formatting not in ("raw", "simple", "df", "contents"):
        warnings.warn(
            f"invalid formatting '{formatting}', defaulting to 'simple'"
        )
        formatting = "simple"
    if formatting == "raw":
        return tuple(pages)
    objects = chain(*(p.get("Contents", []) for p in pages))
    prefixes = chain(*(p.get("CommonPrefixes", []) for p in pages))
    if formatting == "simple":
        return tuple(
            [obj["Key"] for obj in objects]
            + [obj["Prefix"] for obj in prefixes]
        )
    if formatting == "contents":
        return tuple(objects)
    if fetch_owner is False:
        return pd.DataFrame(objects)
    objects = tuple(objects)
    owner_info = pd.DataFrame([o.pop('Owner') for o in objects])
    return pd.concat(map(pd.DataFrame, (objects, owner_info)), axis=1)
ls_multipart()

List all multipart uploads associated with this bucket.

Returns:

Type Description
dict

API response

Source code in hostess/aws/s3.py
1548
1549
1550
1551
1552
1553
1554
1555
def ls_multipart(self) -> dict:
    """
    List all multipart uploads associated with this bucket.

    Returns:
        API response
    """
    return self.client.list_multipart_uploads(Bucket=self.name)
put(obj=b'', key=None, literal_str=False, config=None, checksum=None, **extra_args)

Upload files or buffers to an S3 bucket

Parameters:

Name Type Description Default
obj Union[Puttable, Sequence[Puttable]]

An individual str, Path, or filelike / buffer object to upload, or a sequence of such objects

b''
key Optional[Union[str, Sequence[str]]]

S3 key (fully-qualified 'path' from bucket root); or, if obj is a sequence, a sequence of keys of the same length of obj. If key is not specified, key(s) are generated from the string representation(s) of the uploaded object(s), truncated to 1024 characters (maximum length of an S3 key).

None
literal_str bool

If True, and obj is a string or a sequence containing strings, write all such strings directly to objects. Otherwise, interpret them as paths to local files

False
config Optional[TransferConfig]

boto3.s3.transfer.TransferConfig; bucket's default if None

None
checksum Optional[Union[str, Sequence[str]]]

Optional base64-encoded raw 4-byte CRC32 checksum of object, or sequence of such checksums for each object. Used for full-object S3 checksum verification. Other checksum types and algorithms are not supported.

None
extra_args str

ExtraArgs for boto3 bucket object

{}

Returns:

Type Description
Union[None, list[Optional[Exception]]]

None, or, for multi-upload, a list containing None for each successful put and an Exception for each failed one

Source code in hostess/aws/s3.py
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
@splitwrap(seq_arity=2, splittable=["checksum"])
def put(
    self,
    obj: Union[Puttable, Sequence[Puttable]] = b"",
    key: Optional[Union[str, Sequence[str]]] = None,
    literal_str: bool = False,
    config: Optional[boto3.s3.transfer.TransferConfig] = None,
    checksum: Optional[Union[str, Sequence[str]]] = None,
    **extra_args: str
) -> Union[None, list[Optional[Exception]]]:
    """
    Upload files or buffers to an S3 bucket

    Args:
        obj: An individual str, Path, or filelike / buffer object to
            upload, or a sequence of such objects
        key: S3 key (fully-qualified 'path' from bucket root); or, if `obj`
            is a sequence, a sequence of keys of the same length of `obj`.
            If `key` is not specified, key(s) are generated from the
            string representation(s) of the uploaded object(s), truncated
            to 1024 characters (maximum length of an S3 key).
        literal_str: If True, and `obj` is a string or a sequence
            containing strings, write all such strings directly to objects.
            Otherwise, interpret them as paths to local files
        config: boto3.s3.transfer.TransferConfig; bucket's default if None
        checksum: Optional base64-encoded raw 4-byte CRC32 checksum of
            object, or sequence of such checksums for each object. Used
            for full-object S3 checksum verification. Other checksum
            types and algorithms are not supported.
        extra_args: ExtraArgs for boto3 bucket object

    Returns:
        None, or, for multi-upload, a list containing None for each
            successful put and an Exception for each failed one
    """
    config = self.config if config is None else config
    # If S3 key was not specified, use string rep of
    # passed object, up to 1024 characters
    key = str(obj)[:1024] if key is None else key
    if checksum is not None:
        extra_args |= {
            "ChecksumAlgorithm": "CRC32",
            "ChecksumCRC32": checksum,
            "ChecksumType": "FULL_OBJECT"
        }

    base_kwargs = {
        "Bucket": self.name,
        "Key": key,
        "Config": config,
        "ExtraArgs": extra_args
    }
    # 'touch' - type behavior
    if obj is None:
        obj = BytesIO()
    # directly upload file from local storage
    if _should_be_file(obj, literal_str):
        return self.client.upload_file(Filename=str(obj), **base_kwargs)
    # or: upload in-memory objects
    # encode string to bytes if we're writing it to an S3 object instead
    # of interpreting it as a path
    if isinstance(obj, str):
        obj = obj.encode("utf-8")
    if isinstance(obj, bytes):
        obj = BytesIO(obj)
    # if it's not string or bytes, it has to be buffer/file-like.
    # this isn't a perfect heuristic, of course!
    elif not hasattr(obj, "read"):
        raise TypeError(f"Cannot put object of type {type(obj)}")
    return self.client.upload_fileobj(Fileobj=obj, **base_kwargs)
put_stream(obj, key, config=None, upload_threads=4, verbose=False, explicit_length=None, chunksize=None)

Create an S3 object from a byte stream via a managed multipart upload. Intended primarily for intermittent streams, incremental writes of larger-than-memory data, direct streams from remote resources, and streams of unknown length. If you are just uploading on-disk files or discrete in-memory objects, Bucket.put() is usuallygh preferable.

Parameters:

Name Type Description Default
obj Union[Iterator, IO, str, Path]

source of stream to upload. May be a path, a URL, a filelike object, or any iterator that yields bytes objects.

required
key str

key of object to create from stream (fully-qualified 'path' relative to bucket root)

required
config Optional[TransferConfig]

optional transfer config

None
upload_threads Optional[int]

number of subprocesses to use for upload (None means upload serially)

4
verbose bool

print and log progress of streaming upload

False
explicit_length Optional[int]

optional explicit length specification, for streams of known length

None
chunksize Optional[int]

size of individual upload chunks; overrides any setting in config. if stream length is explicitly specified or inferred to be less than chunksize, this function will fall back to a simple put operation.

None

Returns:

Type Description
Optional[dict]

API response to multipart upload completion, or None if stream length < chunksize and we fell back to simple upload

Source code in hostess/aws/s3.py
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
def put_stream(
    self,
    obj: Union[Iterator, IO, str, Path],
    key: str,
    config: Optional[boto3.s3.transfer.TransferConfig] = None,
    upload_threads: Optional[int] = 4,
    # download_threads: Optional[int] = None,
    verbose: bool = False,
    explicit_length: Optional[int] = None,
    # TODO: overrides chunksize in config -- maybe make an easier interface
    #  to this
    chunksize: Optional[int] = None,
) -> Optional[dict]:
    """
    Create an S3 object from a byte stream via a managed multipart upload.
    Intended primarily for intermittent streams, incremental writes of
    larger-than-memory data, direct streams from remote resources, and
    streams of unknown length. If you are just uploading on-disk files
    or discrete in-memory objects, Bucket.put() is usuallygh preferable.

    Args:
        obj: source of stream to upload. May be a path, a URL, a filelike
            object, or any iterator that yields `bytes` objects.
        key: key of object to create from stream (fully-qualified 'path'
            relative to bucket root)
        config: optional transfer config
        upload_threads: number of subprocesses to use for upload (None
            means upload serially)
        verbose: print and log progress of streaming upload
        explicit_length: optional explicit length specification, for
            streams of known length
        chunksize: size of individual upload chunks; overrides any setting
            in config. if stream length is explicitly specified or inferred
            to be less than chunksize, this function will fall back to a
            simple put operation.

    Returns:
        API response to multipart upload completion, or None if stream
            length < chunksize and we fell back to simple upload
    """
    if config is None:
        config = self.config
    if chunksize is not None:
        config.multipart_chunksize = chunksize
    stream = obj
    if isinstance(stream, str):
        if stream.startswith("http") and ("//" in stream):
            stream = requests.get(stream, stream=True)
        elif stream.startswith("s3") and ("//" in stream):
            raise NotImplementedError(
                "dispatch for handling s3 urls is not yet implemented. "
                "please use something else for now."
            )
    if isinstance(stream, requests.Response):
        stream.raise_for_status()
    target_chunksize = config.multipart_chunksize
    if explicit_length is not None:
        length = explicit_length
    else:
        length = infer_stream_length(stream)
    if (length is not None) and (length < config.multipart_chunksize):
        if verbose:
            console_and_log(
                "Stream shorter than chunksize, falling back to basic put."
            )
        return self.put(obj=stream, key=key, config=config)
    if isinstance(stream, (str, Path)):
        stream = Path(stream).open("rb")
    if isinstance(stream, requests.Response):
        stream = stream.iter_content(chunk_size=target_chunksize)
    if "read" in dir(stream):
        reader = partial(stream.read, target_chunksize)
    elif "__next__" in dir(stream):
        reader = stream.__next__
    else:
        raise TypeError(
            "can't determine how to consume bytes from stream."
        )
    put_chunk, parts, multipart_upload = self.chunk_putter_factory(
        key, upload_threads, None, verbose
    )
    try:
        chunk = reader()
        if len(chunk) == 0:
            raise ValueError("Empty stream.")
        while len(chunk) > 0:
            chunk = reader()
            put_chunk(chunk)
    except StopIteration:
        pass
    except ValueError:
        self.abort_multipart_upload(multipart=multipart_upload)
        raise
    del chunk
    put_chunk(b"", flush=True)
    del put_chunk
    if upload_threads is not None:
        while not all(f.done() for f in parts.values()):
            time.sleep(0.05)
        parts = {number: f.result() for number, f in parts.items()}
    return self.complete_multipart_upload(
        multipart=multipart_upload, parts=parts
    )
read(key, mode='r', return_buffer=False, start_byte=None, end_byte=None)

Read an S3 object into memory.

Parameters:

Name Type Description Default
key str

fully-qualified 'path' to object

required
mode Literal['r', 'rb']

'r' to read as text, 'rb' as bytes

'r'
return_buffer bool

if True, return object as a StringIO/BytesIO; if False, return it as str/bytes

False
start_byte Optional[int]

if not None, start reading at this byte

None
end_byte Optional[int]

if not None, stop reading at this byte

None

Returns:

Type Description
Union[bytes, str, BytesIO, StringIO]

Contents of object, in format specified by mode and return_buffer.

Source code in hostess/aws/s3.py
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
def read(
    self,
    key: str,
    mode: Literal["r", "rb"] = "r",
    return_buffer: bool = False,
    start_byte: Optional[int] = None,
    end_byte: Optional[int] = None
) -> Union[bytes, str, BytesIO, StringIO]:
    """
    Read an S3 object into memory.

    Args:
        key: fully-qualified 'path' to object
        mode: 'r' to read as text, 'rb' as bytes
        return_buffer: if True, return object as a StringIO/BytesIO; if
            False, return it as str/bytes
        start_byte: if not None, start reading at this byte
        end_byte: if not None, stop reading at this byte

    Returns:
        Contents of object, in format specified by `mode` and
            `return_buffer`.
    """
    buf = self.get(key, start_byte=start_byte, end_byte=end_byte)
    if return_buffer is True:
        if mode == "rb":
            return buf
        strbuf = StringIO()
        strbuf.write(buf.read().decode())
        strbuf.seek(0)
        return strbuf
    if mode == "rb":
        return buf.read()
    return buf.read().decode()
restore(key, tier='Bulk', days=5)

Issue a request to temporarily restore one or more objects from S3 Glacier Flexible Retrival or Deep Archive to S3 Standard. Note that object restoration is not instantaneous. Depending on retrieval tier and storage class, AWS guarantees retrieval times ranging from 5 minutes to 48 hours. See https://docs.aws.amazon.com/AmazonS3/latest /userguide/restoring-objects-retrieval-options.html for details.

You can check the progress of restore requests using Bucket.head().

Parameters:

Name Type Description Default
key Union[str, Sequence[str]]

key(s) of object(s) to restore (fully-qualified 'paths')

required
tier Literal['Expedited', 'Standard', 'Bulk']

retrieval tier. In order of speed and expense, high to low, options are "Expedited", "Standard", and "Bulk". "Expedited" is not available for Deep Archive.

'Bulk'
days int

number of days object(s) should remain restored before reverting to Glaciered state

5

Returns:

Type Description
Union[dict, list[Union[dict, Exception]]]

RestoreObject API response, or list of responses and/or Exceptions

Source code in hostess/aws/s3.py
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
@splitwrap(seq_arity=1)
def restore(
    self,
    key: Union[str, Sequence[str]],
    tier: Literal["Expedited", "Standard", "Bulk"] = "Bulk",
    days: int = 5,
) -> Union[dict, list[Union[dict, Exception]]]:
    """
    Issue a request to temporarily restore one or more objects from S3
    Glacier Flexible Retrival or Deep Archive to S3 Standard. Note that
    object restoration is not instantaneous. Depending on retrieval tier
    and storage class, AWS guarantees retrieval times ranging from 5
    minutes to 48 hours. See https://docs.aws.amazon.com/AmazonS3/latest
    /userguide/restoring-objects-retrieval-options.html for details.

    You can check the progress of restore requests using Bucket.head().

    Args:
        key: key(s) of object(s) to restore (fully-qualified 'paths')
        tier: retrieval tier. In order of speed and expense, high to low,
            options are "Expedited", "Standard", and "Bulk". "Expedited" is
            not available for Deep Archive.
        days: number of days object(s) should remain restored before
            reverting to Glaciered state

    Returns:
        RestoreObject API response, or list of responses and/or Exceptions
    """
    restore_request = {
        "Days": days,
        "GlacierJobParameters": {"Tier": tier},
    }
    return self.client.restore_object(
        Bucket=self.name, Key=key, RestoreRequest=restore_request
    )
rm(key)

Delete an S3 object.

Parameters:

Name Type Description Default
key str

key of object to delete (fully-qualified 'path' from root)

required

Returns:

Type Description
Union[None, list[Optional[None]]]

None, or, for multi-delete, a list containing None for successful deletes and Exceptions for failed

Source code in hostess/aws/s3.py
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
@splitwrap(seq_arity=1)
def rm(self, key: str) -> Union[None, list[Optional[None]]]:
    """
    Delete an S3 object.

    Args:
        key: key of object to delete (fully-qualified 'path' from root)

    Returns:
        None, or, for multi-delete, a list containing None for successful
            deletes and Exceptions for failed
    """
    return self.client.delete_object(Bucket=self.name, Key=key)
set_tags(*, replace_all=False, **tags)

Set tags for this bucket.

Parameters:

Name Type Description Default
replace_all bool

if True, removes all existing tags along with setting passed tag values (this is the default behavior of the PutBucketTagging API operation). If False, leaves existing tags unchanged unless they are redefined in tags. Raises a ValueError if passed for a directory bucket, because the API call required to set tags behaves entirely differently, and we do not wish to implement that behavior here.

False
**tags str

Argument names are tag keys; argument values are tag values. At least one tag must be defined.

{}

Returns:

Type Description

API response for PutBucketTagging operation.

Source code in hostess/aws/s3.py
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
def set_tags(self, *, replace_all: bool = False, **tags: str):
    """
    Set tags for this bucket.

    Args:
        replace_all: if True, removes _all_ existing tags along with
            setting passed tag values (this is the default behavior of the
            PutBucketTagging API operation). If False, leaves existing
            tags unchanged unless they are redefined in `tags`. Raises
            a ValueError if passed for a directory bucket, because the
            API call required to set tags behaves entirely differently,
            and we do not wish to implement that behavior here.
        **tags: Argument names are tag keys; argument values are tag
            values. At least one tag must be defined.

    Returns:
        API response for PutBucketTagging operation.
    """
    if len(tags) == 0:
        raise ValueError("No tags to set")
    if not all(isinstance(x, str) for x in tags.values()):
        raise TypeError("Tag values must be strings")
    if replace_all is True and self.bucket_type == 'directory':
        raise ValueError(
            "replace_all=True not supported for directory buckets."
        )
    if replace_all is False:
        tags = self.tags | tags
    tagset = [{'Key': k, 'Value': v} for k, v in tags.items()]
    if self.bucket_type == "directory":
        s3c = init_client("s3control", None, self.session)
        response = s3c.tag_resource(
            **self._s3c_kwargs(), Tags=tagset
        )
    else:
        response = self.client.put_bucket_tagging(
            Bucket=self.name, Tagging={'TagSet': tagset}
        )
    try:
        # noinspection PyPropertyAccess
        del self.tags
    except AttributeError:
        pass
    return response
tail(key, destination, start_pos=None, poll=1, text_mode=True, permit_missing=False)

Provides asynchronous tail -f-like functionality for S3 objects.

Parameters:

Name Type Description Default
key str

object key (fully-qualified 'path' from root)

required
destination MutableSequence | BinaryIO | TextIO | IOBase | Path | str

where to write the tail chunks. If a sequence, appends each chunk as a new item.

required
start_pos int | None

start reading from where? If None, start at the length of the object when tail() is called.

None
poll float

poll rate in seconds

1
text_mode bool

if True, decode each chunk as utf-8 text

True
permit_missing bool

if True, don't raise an error if the object doesn't exist; instead, periodically check to see if the object comes into existence, and if it does, start tailing it.

False

Returns:

Type Description

A StoppableFuture for the poll loop. Call its stop() method to stop tailing. Note that this future's result will always be None.

Notes

Exclusive of egress costs, running this for a full day at the default 1-second poll rate on a single S3 Standard object costs approximately $0.07. On a SEOZ object, it costs approximately $0.005. Egress costs in this application should be equal to egress for (size of object when tailing ends) minus (the smaller of start_pos or size of object when tailing starts)

Source code in hostess/aws/s3.py
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
def tail(
    self,
    key: str,
    destination: MutableSequence | BinaryIO | TextIO | IOBase | Path | str,
    start_pos: int | None = None,
    poll: float = 1,
    text_mode: bool = True,
    permit_missing: bool = False
):
    """
    Provides asynchronous `tail -f`-like functionality for S3 objects.

    Args:
        key: object key (fully-qualified 'path' from root)
        destination: where to write the tail chunks. If a sequence,
            appends each chunk as a new item.
        start_pos: start reading from where? If None, start at the length
            of the object when `tail()` is called.
        poll: poll rate in seconds
        text_mode: if True, decode each chunk as utf-8 text
        permit_missing: if True, don't raise an error if the object
            doesn't exist; instead, periodically check to see if the
            object comes into existence, and if it does, start tailing it.

    Returns:
        A `StoppableFuture` for the poll loop. Call its `stop()` method to
            stop tailing. Note that this future's result will always be
            `None`.

    Warnings:
        This function is primarily intended for following append-writes
        to SEOZ objects, and if the size of the object _decreases_ while
        tailing, the method may no longer accurately fetch subsequent
        writes to the file. In the future, we may add an option to
        perform an additional HEAD request to check object length before
        each normal HEAD / GET pair.

    Notes:
        Exclusive of egress costs, running this for a full day at the
        default 1-second poll rate on a single S3 Standard object costs
        approximately $0.07. On a SEOZ object, it costs approximately
        $0.005. Egress costs in this application should be equal to
        egress for (size of object when tailing ends) minus (the smaller
        of start_pos or size of object when tailing starts)
    """
    return StoppableFuture.launch_into(
        ThreadPoolExecutor(1),
        _poll_obj,
        bucket=self,
        key=key,
        start_pos=start_pos,
        text_mode=text_mode,
        destination=destination,
        poll=poll,
        permit_missing=permit_missing
    )
update_contents(prefix=None, cache=None, fetch_owner=False)

recursively scan the contents of the bucket and store the result in self.contents.

Parameters:

Name Type Description Default
prefix Optional[str]

prefix at which to begin scan. if not passed, scans the entire bucket.

None
cache Optional[Union[str, Path, IOBase]]

optional file or filelike object to write scan results to in addition to storing them in self.contents.

None
fetch_owner bool

if True, include owner of objects in response.

False
Source code in hostess/aws/s3.py
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
def update_contents(
    self,
    prefix: Optional[str] = None,
    cache: Optional[Union[str, Path, IOBase]] = None,
    fetch_owner: bool = False
):
    """
    recursively scan the contents of the bucket and store the result in
    self.contents.

    Args:
        prefix: prefix at which to begin scan. if not passed, scans the
            entire bucket.
        cache: optional file or filelike object to write scan results to
            in addition to storing them in self.contents.
        fetch_owner: if True, include owner of objects in response.
    """
    self.contents = self.ls(
        recursive=True,
        prefix=prefix,
        cache=cache,
        formatting="contents",
        fetch_owner=fetch_owner,
    )

_clean_putter_process_records(records, block=True, threshold=1, poll_delay=0.05)

helper function for multithreaded put_stream()

Source code in hostess/aws/s3.py
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
def _clean_putter_process_records(
    records, block=True, threshold=1, poll_delay=0.05
):
    """helper function for multithreaded put_stream()"""
    i_am_actively_blocking = True
    while i_am_actively_blocking is True:
        process_records = valfilter(lambda v: "process" in v.keys(), records)
        alive_count = 0
        for record in process_records.values():
            if not record["process"]._closed:
                if record["process"].is_alive():
                    alive_count += 1
                    continue
            if "result" not in record.keys():
                record["result"] = record["pipe"].recv()
                record["pipe"].close()
            if not record["process"]._closed:
                record["process"].close()
        i_am_actively_blocking = (alive_count >= threshold) and block
        if i_am_actively_blocking is True:
            time.sleep(poll_delay)

_dtstr(thing)

convert thing to its isoformat() if it's a datetime, otherwise return its string representation. helper function for Bucket.ls() in cached mode.

Source code in hostess/aws/s3.py
480
481
482
483
484
485
486
487
def _dtstr(thing: Any):
    """
    convert thing to its isoformat() if it's a datetime, otherwise return
    its string representation. helper function for Bucket.ls() in cached mode.
    """
    if isinstance(thing, dt.datetime):
        return thing.isoformat()
    return str(thing)

check_az(az, region_name)

Validate existence of an AvailabilityZone in a particular region, and return its canonical Zone ID.

Parameters:

Name Type Description Default
az str | int

identifier for Availability Zone. May be its Zone Name, Zone ID, number, or letter.

required
region_name str

canonical, unabbreviated region name (e.g. 'us-east-1')

required

Returns:

Type Description

Unabbreviated Zone ID for referenced Availability Zone.

Source code in hostess/aws/s3.py
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
def check_az(az: str | int, region_name: str):
    """
    Validate existence of an AvailabilityZone in a particular region, and
    return its canonical Zone ID.

    Args:
        az: identifier for Availability Zone. May be its Zone Name, Zone ID,
            number, or letter.
        region_name: canonical, unabbreviated region name (e.g. 'us-east-1')

    Returns:
        Unabbreviated Zone ID for referenced Availability Zone.
    """
    if isinstance(az, int):
        az, ident = str(az), "number"
    elif not isinstance(az, str):
        raise TypeError(f"'az_name' must be int or string, got {type(az)}")
    elif len(az) == 1:
        ident = "number" if re.match(r"\d", az) else "letter"
    elif AZ_NAMEPAT.match(az):
        ident = "name"
    elif AZ_IDPAT.match(az):
        ident = "id"
    else:
        raise ValueError(f"Unrecognized pattern for 'az'.")
    ec2 = init_client("ec2", region=region_name)
    zones = ec2.describe_availability_zones()["AvailabilityZones"]
    if ident == "number":
        match = [z for z in zones if z['ZoneId'][-1] == az]
    elif ident == "name":
        match = [z for z in zones if z['ZoneName'] == az]
    elif ident == "letter":
        match = [z for z in zones if z['ZoneName'][-1] == az]
    else:
        match = [z for z in zones if z['ZoneId'] == az]
    if len(match) == 0:
        raise ValueError(f"No AZ in {region_name} found matching {az}")
    elif len(match) > 1:
        raise ValueError(
            "Multiple AZ matches. This may indicate a bug or an API error."
        )
    return match[0]['ZoneId']

split_optional_kwargs(bound, splittable, split_len)

Remove splittable kwargs from bound and return an iterator of per-call kwarg patches.

Scalar values repeat. Sequence values split item-by-item. Missing values remain missing, so normal defaults still work.

Source code in hostess/aws/s3.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def split_optional_kwargs(bound, splittable, split_len):
    """
    Remove splittable kwargs from bound and return an iterator of per-call
    kwarg patches.

    Scalar values repeat.
    Sequence values split item-by-item.
    Missing values remain missing, so normal defaults still work.
    """
    per_item = {}

    for name in splittable:
        if name not in bound:
            continue

        value = bound.pop(name)

        if is_split_sequence(value):
            if len(value) != split_len:
                raise ValueError(f"Mismatched {name!r} sequence length")
            per_item[name] = iter(value)
        else:
            per_item[name] = repeat(value, split_len)

    if not per_item:
        return ({} for _ in range(split_len))

    keys = tuple(per_item)

    return (
        dict(zip(keys, values))
        for values in zip(*(per_item[name] for name in keys))
    )

splitwrap(*, seq_arity, splittable=())

splitwrap(*, seq_arity: Literal[1], splittable: Iterable[str] = ()) -> Callable[[BMethOne], BMethOneList]
splitwrap(*, seq_arity: Literal[2], splittable: Iterable[str] = ()) -> Callable[[BMethTwo], BMethTwoList]

Decorator for methods of Bucket that permits them to accept either single source and/or destination arguments or sequences of them. Automatically maps sequences into a thread pool, unless instructed to run serially, and returns all results in a list. Fails gracefully, returning Exceptions raised by any individual function call rather than raising them.

Extra named arguments listed in splittable may be either scalar or sequence. Scalars are repeated for every split call. Sequences are split item-by-item.

Source code in hostess/aws/s3.py
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
def splitwrap(
    *,
    seq_arity: Literal[1, 2],
    splittable: Iterable[str] = (),
) -> (
    Callable[[BMethOne], BMethOneList]
    | Callable[[BMethTwo], BMethTwoList]
):
    """
    Decorator for methods of Bucket that permits them to accept either single
    source and/or destination arguments or sequences of them. Automatically
    maps sequences into a thread pool, unless instructed to run serially,
    and returns all results in a list. Fails gracefully, returning Exceptions
    raised by any individual function call rather than raising them.

    Extra named arguments listed in splittable may be either scalar or sequence.
    Scalars are repeated for every split call. Sequences are split item-by-item.
    """

    if seq_arity == 1:
        return lambda method: splitwrap_arity_1(
            method,
            splittable=splittable,
        )

    elif seq_arity == 2:
        return lambda method: splitwrap_arity_2(
            method,
            splittable=splittable,
        )

    raise ValueError(f"seq_arity must be 1 or 2, not {seq_arity!r}")

aws.utilities

_check_cached_results(path, prefix, max_age=5)

check for a 'fresh' cached API call by string-matching against filename prefix and our standardized in-filename timestamp format.

Parameters:

Name Type Description Default
path Path

path to check for results

required
prefix str

filename prefix for results

required
max_age float

age, in days, after which a result is no longer 'fresh'

5

Returns:

Type Description
Optional[Path]

Path for first matching result, if it is 'fresh'. None if it is not fresh or there is no matching result.

Source code in hostess/aws/utilities.py
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
def _check_cached_results(
    path: Path, prefix: str, max_age: float = 5
) -> Optional[Path]:
    """
    check for a 'fresh' cached API call by string-matching against filename
    prefix and our standardized in-filename timestamp format.

    Args:
        path: path to check for results
        prefix: filename prefix for results
        max_age: age, in days, after which a result is no longer 'fresh'

    Returns:
        Path for first matching result, if it is 'fresh'. None if it is not
            fresh or there is no matching result.
    """
    cache_filter = filter(lambda p: p.name.startswith(prefix), path.iterdir())
    try:
        result = first(cache_filter)
        timestamp = re.search(
            r"(\d{4})_(\d{2})_(\d{2})T(\d{2})_(\d{2})_(\d{2})", result.name
        )
        cache_age = dt.datetime.now() - dt.datetime(
            *map(int, timestamp.groups())
        )
        if cache_age.days > max_age:
            return None
    except StopIteration:
        return None
    return result

_clear_cached_results(folder, pre)

quick way to delete all files in folder whose names begin with pre.

Parameters:

Name Type Description Default
folder Path

folder to clear.

required
pre str

filename prefix that triggers deletion.

required
Source code in hostess/aws/utilities.py
406
407
408
409
410
411
412
413
414
415
def _clear_cached_results(folder: Path, pre: str):
    """
    quick way to delete all files in folder whose names begin with `pre`.

    Args:
        folder: folder to clear.
        pre: filename prefix that triggers deletion.
    """
    for result in filter(lambda p: p.name.startswith(pre), folder.iterdir()):
        result.unlink()

autopage(client, operation, agg=None, **api_kwargs)

Perform an AWS API call that returns paginated results, greedily page through all of them, and return the aggregated results.

Parameters:

Name Type Description Default
client BaseClient

boto Client object to make API call

required
operation str

name of API call to perform

required
agg Optional[Union[str, Sequence[str], Callable]]

optional special aggregator. If agg is a str, it means: 'concatenate the values of the key named agg from all pages of the response'. If agg is a sequence of str, it means: 'concatenate the values of each of the named keys in agg from all pages of the response in separate lists'. if agg is callable, it means: 'just feed the pager to agg and let it do its thing'. If not specified, aggregate the values of the key whose value is longest in the first response. (This is a heuristic for naively getting the actual responses and ignoring pagination metadata.)

None
**api_kwargs Any

kwargs to pass to the API call.

{}

Returns:

Type Description
tuple

Tuple of aggregated responses, format depending on agg.

Source code in hostess/aws/utilities.py
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
def autopage(
    client: botocore.client.BaseClient,
    operation: str,
    agg: Optional[Union[str, Sequence[str], Callable]] = None,
    **api_kwargs: Any,
) -> tuple:
    """
    Perform an AWS API call that returns paginated results, greedily page
    through all of them, and return the aggregated results.

    Args:
        client: boto Client object to make API call
        operation: name of API call to perform
        agg: optional special aggregator. If `agg` is a `str`, it means:
            'concatenate the values of the key named `agg` from all pages of
            the response'. If `agg` is a sequence of `str`, it means:
            'concatenate the values of each of the named keys in `agg` from
            all pages of the response in separate lists'.  if `agg` is
            callable, it means: 'just feed the pager to `agg` and let it do
            its thing'. If not specified, aggregate the values of the key
            whose value is longest in the first response. (This is a heuristic
            for naively getting the actual responses and ignoring pagination
            metadata.)
        **api_kwargs: kwargs to pass to the API call.

    Returns:
        Tuple of aggregated responses, format depending on `agg`.
    """
    assert client.can_paginate(operation)
    if isinstance(agg, (str, Sequence)):
        agg = mapcat(get(agg))
    pager = iter(client.get_paginator(operation).paginate(**api_kwargs))
    if agg is not None:
        return tuple(agg(pager))
    page = next(pager)
    lengths = [(k, len(v)) for k, v in page.items() if isinstance(v, list)]
    aggkey = [k for k, v in lengths if v == max([v for _, v in lengths])][0]
    return tuple(get(aggkey)(page) + list(mapcat(get(aggkey))(pager)))

clarify_region(region=None, boto_obj=None)

attempt to determine the AWS region associated with an object (presumably some kind of boto object).

Parameters:

Name Type Description Default
region Optional[str]

if not None, this acts as a strict override: the function simply returns region.

None
boto_obj Any

object whose AWS region to determine.

None

Returns:

Type Description
str

name of AWS region (e.g. 'us-east-2').

Raises:

Type Description
AttributeError

if region is None and we can't figure out how to read a region from boto_obj.

Source code in hostess/aws/utilities.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
def clarify_region(region: Optional[str] = None, boto_obj: Any = None) -> str:
    """
    attempt to determine the AWS region associated with an object (presumably
    some kind of boto object).

    Args:
        region: if not None, this acts as a strict override: the function
            simply returns `region`.
        boto_obj: object whose AWS region to determine.

    Returns:
        name of AWS region (e.g. 'us-east-2').

    Raises:
        AttributeError: if `region` is None and we can't figure out how to read
            a region from `boto_obj`.
    """
    if region is not None:
        return region
    if "region_name" in dir(boto_obj):
        return boto_obj.region_name
    if "_client_config" in dir(boto_obj):
        return boto_obj._client_config.region_name
    raise AttributeError(f"Don't know how to read region from {boto_obj}.")

crc32_base64(path)

Checksum a file the way S3 likes: base64-encoded 32-bit CRC32 bytes, MSB.

Parameters:

Name Type Description Default
path Union[Path, str]

Path to file to checksum.

required

Returns:

Type Description
str

Base64-encoded raw CRC32 bytes, suitable for passing as

str

Bucket.put()'s checksum argument.

Source code in hostess/aws/utilities.py
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
def crc32_base64(path: Union[Path, str]) -> str:
    """
    Checksum a file the way S3 likes: base64-encoded 32-bit CRC32 bytes,
    MSB.

    Args:
        path: Path to file to checksum.

    Returns:
        Base64-encoded raw CRC32 bytes, suitable for passing as
        `Bucket.put()`'s `checksum` argument.
    """
    crc = 0
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b""):
            crc = zlib.crc32(chunk, crc)
    return (
        base64.b64encode(struct.pack(">I", crc & 0xFFFFFFFF)).decode("ascii")
    )

init_client(service, client=None, session=None, **client_kwargs)

Utility function used throughout hostess.aws to selectively initialize boto clients.

Parameters:

Name Type Description Default
service str

service to produce client for (e.g. "ec2")

required
client Optional[BaseClient]

if not None, simply return client

None
session Optional[Session]

if not None and client is None, initialize newly-made client using this session. Otherwise use a default session. Does nothing if client is not None.

None
client_kwargs

passed to make_boto_client() or boto client constructor

{}

Returns:

Type Description
BaseClient

boto client for service.

Source code in hostess/aws/utilities.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def init_client(
    service: str,
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
    **client_kwargs
) -> botocore.client.BaseClient:
    """
    Utility function used throughout `hostess.aws` to selectively initialize
    boto clients.

    Args:
        service: service to produce client for (e.g. "ec2")
        client: if not None, simply return `client`
        session: if not None and `client` is None, initialize newly-made
            client using this session. Otherwise use a default session. Does
            nothing if `client` is not None.
        client_kwargs: passed to make_boto_client() or boto client constructor

    Returns:
        boto client for `service`.
    """
    if client is not None:
        return client
    if session is not None:
        if "region" in client_kwargs:
            client_kwargs["region_name"] = client_kwargs.pop("region")
        return session.client(service, **client_kwargs)
    return make_boto_client(service, **client_kwargs)

init_resource(service, resource=None, session=None)

Utility function used throughout hostess.aws to selectively initialize boto resources.

Parameters:

Name Type Description Default
service str

service to produce resource for (e.g. "ec2")

required
resource Optional[ServiceResource]

if not None, simply return resource

None
session Optional[Session]

if not None and resource is None, initialize newly-made resource using this session. Otherwise use a default session. Does nothing if resource is not None.

None

Returns:

Type Description
ServiceResource

boto resource for service.

Source code in hostess/aws/utilities.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def init_resource(
    service: str,
    resource: Optional[boto3.resources.base.ServiceResource] = None,
    session: Optional[boto3.Session] = None,
) -> boto3.resources.base.ServiceResource:
    """
    Utility function used throughout `hostess.aws` to selectively initialize
    boto resources.

    Args:
        service: service to produce resource for (e.g. "ec2")
        resource: if not None, simply return `resource`
        session: if not None and `resource` is None, initialize newly-made
            resource using this session. Otherwise use a default session. Does
            nothing if `resource` is not None.

    Returns:
        boto resource for `service`.
    """
    if resource is not None:
        return resource
    if session is not None:
        return session.resource(service)
    return make_boto_resource(service)

make_boto_client(service, profile=None, credential_file=None, region=None, **client_kwargs)

Create a new boto client.

Parameters:

Name Type Description Default
service str

service to create client for, e.g. "ec2"

required
profile Optional[str]

optional name of AWS profile to use (default profile if not specified)

None
credential_file Optional[Union[str, Path]]

optional path to credential file (looks in default credential path if not specified)

None
region Optional[str]

optional name of AWS region, e.g. "us-east-1". (uses profile's default region if not specified)

None
client_kwargs

passed directly to botocore client constructor

{}

Returns:

Type Description
BaseClient

boto client for service.

Source code in hostess/aws/utilities.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def make_boto_client(
    service: str,
    profile: Optional[str] = None,
    credential_file: Optional[Union[str, Path]] = None,
    region: Optional[str] = None,
    **client_kwargs
) -> botocore.client.BaseClient:
    """
    Create a new boto client.

    Args:
        service: service to create client for, e.g. "ec2"
        profile: optional name of AWS profile to use (default profile if not
            specified)
        credential_file: optional path to credential file (looks in default
            credential path if not specified)
        region: optional name of AWS region, e.g. "us-east-1". (uses profile's
            default region if not specified)
        client_kwargs: passed directly to botocore client constructor

    Returns:
        boto client for service.
    """
    # a little redundant but whatever
    session = make_boto_session(profile, credential_file, region)
    return session.client(service, **client_kwargs)

make_boto_resource(service, profile=None, credential_file=None, region=None, **resource_kwargs)

Create a new boto resource.

Parameters:

Name Type Description Default
service str

service to create resource for, e.g. "ec2"

required
profile Optional[str]

optional name of AWS profile to use (default profile if not specified)

None
credential_file Optional[Union[str, Path]]

optional path to credential file (looks in default credential path if not specified)

None
region Optional[str]

optional name of AWS region, e.g. "us-east-1". (uses profile's default region if not specified)

None
resource_kwargs

passed directly to boto resource constructor

{}

Returns:

Type Description
ServiceResource

boto resource for service.

Source code in hostess/aws/utilities.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def make_boto_resource(
    service: str,
    profile: Optional[str] = None,
    credential_file: Optional[Union[str, Path]] = None,
    region: Optional[str] = None,
    **resource_kwargs
) -> boto3.resources.base.ServiceResource:
    """
    Create a new boto resource.

    Args:
        service: service to create resource for, e.g. "ec2"
        profile: optional name of AWS profile to use (default profile if not
            specified)
        credential_file: optional path to credential file (looks in default
            credential path if not specified)
        region: optional name of AWS region, e.g. "us-east-1". (uses profile's
            default region if not specified)
        resource_kwargs: passed directly to boto resource constructor

    Returns:
        boto resource for service.
    """
    if (
        "region_name" in resource_kwargs
        and region is not None
        and resource_kwargs["region_name"] != region
    ):
        raise ValueError(
            "Please do not pass conflicting regions in 'resource_kwargs' "
            "and 'region'."
        )
    elif "region_name" in resource_kwargs:
        region = resource_kwargs.pop("region_name")
    session = make_boto_session(profile, credential_file, region)
    return session.resource(service, **resource_kwargs)

make_boto_session(profile=None, credential_file=None, region=None, **session_kwargs)

Create a new boto session.

Parameters:

Name Type Description Default
profile Optional[str]

name of AWS profile to use (default profile if not specified)

None
credential_file Optional[Union[str, Path]]

path to credential file (looks in default credential path if not specified)

None
region Optional[str]

name of AWS region, e.g. "us-east-1". (uses profile's default region if not specified)

None
session_kwargs

passed directly to botocore Session constructor

{}

Returns:

Type Description
Session

boto session.

Source code in hostess/aws/utilities.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def make_boto_session(
    profile: Optional[str] = None,
    credential_file: Optional[Union[str, Path]] = None,
    region: Optional[str] = None,
    **session_kwargs
) -> boto3.Session:
    """
    Create a new boto session.

    Args:
        profile: name of AWS profile to use (default profile if not specified)
        credential_file: path to credential file (looks in default credential
            path if not specified)
        region: name of AWS region, e.g. "us-east-1". (uses profile's default
            region if not specified)
        session_kwargs: passed directly to botocore Session constructor

    Returns:
        boto session.
    """
    if credential_file is None:
        return boto3.Session(profile_name=profile, region_name=region)
    creds = parse_aws_identity_file(credential_file, profile)
    for disliked_kwarg in ("user_name", "password"):
        if disliked_kwarg in creds.keys():
            del creds[disliked_kwarg]
    return boto3.Session(**creds, region_name=region)

parse_aws_identity_file(path, profile=None)

Parse an AWS config, credentials, or downloaded IAM secrets file.

Parameters:

Name Type Description Default
path Union[str, Path]

path to file

required
profile Optional[str]

if specified, look for settings for this profile specifically. Otherwise, use the first profile in the file. Ignored if file appears to be an IAM secrets file.

None

Returns:

Type Description
dict[str, str]

dict of parsed key-value pairs from identity file.

Raises:

Type Description
OSError

if profile is specified but not in identity file, or if identity file is obviously malformatted.

Source code in hostess/aws/utilities.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def parse_aws_identity_file(
    path: Union[str, Path], profile: Optional[str] = None
) -> dict[str, str]:
    """
    Parse an AWS config, credentials, or downloaded IAM secrets file.

    Args:
        path: path to file
        profile: if specified, look for settings for this profile
            specifically. Otherwise, use the first profile in the file.
            Ignored if file appears to be an IAM secrets file.

    Returns:
        `dict` of parsed key-value pairs from identity file.

    Raises:
        OSError: if `profile` is specified but not in identity file, or if
            identity file is obviously malformatted.
    """
    with open(path) as config_file:
        lines = config_file.readlines()
    if "," in lines[0]:
        # this is a downloaded secret key file.
        parsed = next(csv.DictReader(iter(lines)))
        return {"_".join(k.lower().split(" ")): v for k, v in parsed.items()}
    parsed = {}
    if profile is not None:
        err, search = f"{profile} not described in identity file", f"[{profile}"
    else:
        err, search = "Identity file empty or malformatted", "["
    # TODO: I think this is a bug
    try:
        lineno = first(
            i for i, l in enumerate(lines) if l.strip().startswith(search)
        )
    except StopIteration:
        raise OSError(err)
    else:
        try:
            lineno = first(
                i for i, l in enumerate(lines) if l.strip().startswith("[")
            )
        except StopIteration:
            raise OSError("Identity file empty or malformatted")
    for line in lines[lineno + 1 :]:
        if line.strip().startswith("["):
            break
        try:
            parameter, value = map(str.strip, line.split("="))
            parsed[parameter] = value
        except ValueError:
            continue
    return parsed

tagfilter(description, filters, regex=True)

Simple predicate function that permits resource matching based on Arrays of Tags in API responses related to that resource. See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Tag.html.

Parameters:

Name Type Description Default
description dict[str, Any]

dict produced from an AWS API response.

required
filters dict[str, str]

dict of {tag_name: tag_value}. All tag names must be present, and their values must match the associated tag_values.

required
regex bool

if True, treat the values of filters as regex expressions. if False, treat them as required substrings of tag values.

True

Returns:

Type Description
bool

True if all filters pass; False if any fail. Note that a filter will always fail if the API response lacks a Tags/tags array, but also that this function will always return True if filters has length 0.

Source code in hostess/aws/utilities.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
def tagfilter(
    description: dict[str, Any], filters: dict[str, str], regex: bool = True
) -> bool:
    """
    Simple predicate function that permits resource matching based on Arrays of
    Tags in API responses related to that resource.
    See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_Tag.html.

    Args:
        description: dict produced from an AWS API response.
        filters: dict of {tag_name: tag_value}. All tag names must be present,
            and their values must match the associated tag_values.
        regex: if True, treat the values of `filters` as regex expressions.
            if False, treat them as required substrings of tag values.

    Returns:
        True if all filters pass; False if any fail. Note that a filter will
            always fail if the API response lacks a Tags/tags array, but also
            that this function will always return True if filters has length 0.
    """
    if "Tags" in description:
        tags = tag_dict(description.get("Tags"), lower=True)
    elif "tags" in description:
        tags = tag_dict(description.get("tags"), lower=True)
    elif len(filters) > 0:
        return False
    else:
        return True
    # noinspection PyArgumentList
    matcher = flip(contains) if regex is False else re.search
    for key, value in filters.items():
        if key.lower() not in tags.keys():
            return False
        if not matcher(value, tags[key.lower()]):
            return False
    return True

whoami(client=None, session=None)

Make an STS GetCallerIdentity request and return just the essentials from the response.

Note that every AWS account is allowed to make this request: attempts to deny access to the sts:GetCallerIdentity action don't do anything. That means that if other AWS operations are failing, you can call this function to rule out a couple of very basic failure modes. If it fails, it means that either your credentials for the account are invalid or you can't connect to AWS at all (your network is down or blocking access to the API endpoint).

Parameters:

Name Type Description Default
client Optional[BaseClient]

optional STS client.

None
session Optional[Session]

optional boto Session.

None

Returns:

Type Description
dict[str, str]

dict with keys 'user_id', 'account', and 'arn'.

Source code in hostess/aws/utilities.py
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
def whoami(
    client: Optional[botocore.client.BaseClient] = None,
    session: Optional[boto3.Session] = None,
) -> dict[str, str]:
    """
    Make an STS GetCallerIdentity request and return just the essentials from
    the response.

    Note that _every_ AWS account is allowed to make this request: attempts to
    deny access to the sts:GetCallerIdentity action don't do anything. That
    means that if other AWS operations are failing, you can call this function
    to rule out a couple of very basic failure modes. If it fails, it means
    that either your credentials for the account are invalid or you can't
    connect to AWS at all (your network is down or blocking access to the API
    endpoint).

    Args:
        client: optional STS client.
        session: optional boto Session.

    Returns:
        `dict` with keys 'user_id', 'account', and 'arn'.
    """
    sts = init_client("sts", client, session)
    response = sts.get_caller_identity()
    return {
        "user_id": response["UserId"],
        "account": response["Account"],
        "arn": response["Arn"],
    }

caller

ad-hoc RPC functionality

CallerCompressionType = Literal['gzip', None] module-attribute

code for payload compression method

CallerSerializationType = Literal['json', 'pickle', None] module-attribute

code for payload serialization method

CallerUnpackingOperator = Literal['', '*', '**'] module-attribute

string representation of unpacking operator, if any, used to insert reconstructed payload into called function

_check_mode(serialization, compression)

should we think of our output as in text or binary mode?

Source code in hostess/caller.py
208
209
210
211
212
213
214
def _check_mode(
    serialization: CallerSerializationType, compression: CallerCompressionType
) -> Literal["text", "binary"]:
    """should we think of our output as in text or binary mode?"""
    if (compression is None) and serialization in (None, "json"):
        return "text"
    return "binary"

_check_reconstructable(typeobj, serialization, compression)

Raise an error if we are attempting to transfer a compressed, unserialized in-memory object with no stable binary representation, which is, for our purposes, anything but a string.

Parameters:

Name Type Description Default
typeobj type

type of payload object

required
serialization CallerSerializationType

name of serialization method used

required
compression CallerCompressionType

name of compression method used

required
Source code in hostess/caller.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def _check_reconstructable(
    typeobj: type,
    serialization: CallerSerializationType,
    compression: CallerCompressionType,
):
    """
    Raise an error if we are attempting to transfer a compressed, unserialized
    in-memory object with no stable binary representation, which is, for our
    purposes, anything but a string.

    Args:
        typeobj: type of payload object
        serialization: name of serialization method used
        compression: name of compression method used
    """
    if (
        (typeobj is not str)
        and (compression is not None)
        and (serialization is None)
    ):
        raise ValueError(
            "non-string compressed objects will not reconstruct correctly "
            "unless serialized. try compress='gzip', serialization='json' "
            "or serialize='pickle'"
        )

encode_payload(obj, serialization, compression, b64)

encode the 'payload' of a remote procedure call.

Parameters:

Name Type Description Default
obj Any

object to encode

required
serialization CallerSerializationType

serialization method for obj

required
compression CallerCompressionType

how to compress the serialized object (None means uncompressed)

required
b64 bool

base64-encode 'binary' objects?

required

Returns:

Type Description
Union[str, bytes]

string or bytes containing encoded payload.

Source code in hostess/caller.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def encode_payload(
    obj: Any,
    serialization: CallerSerializationType,
    compression: CallerCompressionType,
    b64: bool
) -> Union[str, bytes]:
    """
    encode the 'payload' of a remote procedure call.

    Args:
        obj: object to encode
        serialization: serialization method for obj
        compression: how to compress the serialized object (None means
            uncompressed)
        b64: base64-encode 'binary' objects?

    Returns:
        string or bytes containing encoded payload.
    """
    if serialization == "json":
        import json

        serial = json.dumps(obj)
    elif serialization == "pickle":
        import pickle

        serial = pickle.dumps(obj)
    elif serialization is None:
        serial = obj.__repr__()
    else:
        raise NotImplementedError
    if _check_mode(serialization, compression) == "text":
        if serialization == "json":
            return f"\"\"\"{serial}\"\"\""
        return serial
    if isinstance(serial, str):
        serial = serial.encode("ascii")
    if compression == "gzip":
        import gzip

        serial = gzip.compress(serial)
    elif compression is not None:
        raise NotImplementedError
    if b64 is False:
        return serial
    import base64

    return base64.b64encode(serial)

format_decompressor(serialized, serialization, compression, b64)

create decompression section of RPC script.

Parameters:

Name Type Description Default
serialized Union[str, bytes]

serialized payload

required
serialization CallerSerializationType

name of serialization method used

required
compression CallerCompressionType

name of compression method used

required
b64 bool

are we expecting a base64-encoded payload?

required

Returns:

Type Description
str

decompression source code block

Source code in hostess/caller.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def format_decompressor(
    serialized: Union[str, bytes],
    serialization: CallerSerializationType,
    compression: CallerCompressionType,
    b64: bool
) -> str:
    """
    create decompression section of RPC script.

    Args:
        serialized: serialized payload
        serialization: name of serialization method used
        compression: name of compression method used
        b64: are we expecting a base64-encoded payload?

    Returns:
        decompression source code block
    """
    if _check_mode(serialization, compression) == "text":
        return f"payload = {serialized}\n"
    if b64 is True:
        paystring = f"""import base64
payload = base64.b64decode({serialized})
"""
    else:
        paystring = f"payload = {serialized}\n"
    if compression is None:
        return paystring
    if compression == "gzip":
        return paystring + f"""import gzip
payload = gzip.decompress(payload)
"""
    raise NotImplementedError("only gzip compression is currently supported")

format_deserializer(serialization)

create deserialization section of RPC script.

Parameters:

Name Type Description Default
serialization CallerSerializationType

serialization method used

required

Returns:

Type Description
str

deserialization source code block

Source code in hostess/caller.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def format_deserializer(serialization: CallerSerializationType) -> str:
    """
    create deserialization section of RPC script.

    Args:
        serialization: serialization method used

    Returns:
        deserialization source code block
    """
    if serialization is None:
        return ""
    if serialization == "json":
        return """import json
payload = json.loads(payload)
"""
    elif serialization == "pickle":
        return """import pickle
payload = pickle.loads(payload)
"""
    raise NotImplementedError("Unknown serializer. use 'json' or 'pickle'")

format_importer(module, func)

formatting function for import section of RPC script.

Parameters:

Name Type Description Default
module Optional[str]

name of or path to module

required
func str

name of function in module

required

Returns:

Type Description
str

import source code block

Source code in hostess/caller.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def format_importer(module: Optional[str], func: str) -> str:
    """
    formatting function for import section of RPC script.

    Args:
        module: name of or path to module
        func: name of function in module

    Returns:
        import source code block
    """
    if module is None:
        return f"target = {func}\n"
    if module.endswith(".py"):
        importer = f"""import importlib.util
import sys
spec = importlib.util.spec_from_file_location(
    "{Path(module).stem}", "{module}"
)
module = importlib.util.module_from_spec(spec)
sys.modules["{Path(module).stem}"] = module
spec.loader.exec_module(module)
"""
    else:
        importer = f"""import {module}
module = {module}
"""
    if func is not None:
        importer += f"""target = getattr(module, "{func}")
"""
    return importer

format_kwarg_filter(filter_kwargs, splat)

generate kwarg filter section of RPC script, if necessary and requested

Parameters:

Name Type Description Default
filter_kwargs bool

should we filter unwanted kwargs or not?

required
splat CallerUnpackingOperator

unpacking operator we're using. if it's not '**', never generate this block -- it's unnecessary.

required

Returns:

Type Description
str

kwarg-filtering source code block.

Source code in hostess/caller.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
def format_kwarg_filter(
    filter_kwargs: bool, splat: CallerUnpackingOperator
) -> str:
    """
    generate kwarg filter section of RPC script, if necessary and requested

    Args:
        filter_kwargs: should we filter unwanted kwargs or not?
        splat: unpacking operator we're using. if it's not '**',
            never generate this block -- it's unnecessary.

    Returns:
        kwarg-filtering source code block.
    """
    if (filter_kwargs is not True) or (splat != "**"):
        return ""
    return """from inspect import getfullargspec
spec = getfullargspec(target)
payload = {
    k: v for k, v in payload.items() 
    if k in spec.args + spec.kwonlyargs
}
"""

format_returner(return_result, return_compression, return_serialization, b64, sep)

format return section of RPC script.

Source code in hostess/caller.py
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
def format_returner(
    return_result: bool,
    return_compression: CallerCompressionType,
    return_serialization: CallerSerializationType,
    b64: bool,
    sep: Optional[str]
) -> str:
    """format return section of RPC script."""
    if return_result is False:
        return ""
    if return_serialization is None:
        returnval = "\nreturnval = result\n"
    elif return_serialization == "json":
        returnval = f"""
import json
returnval = json.dumps(result)
"""
    elif return_serialization == "pickle":
        returnval = """
import pickle
returnval = pickle.dumps(result)
"""
    else:
        raise NotImplementedError("Unknown serializer. use 'json' or 'pickle'")
    if return_compression == "gzip":
        returnval += """
import gzip
"""
        if return_serialization == "json":
            returnval += (
                "\nreturnval = gzip.compress(returnval.encode('ascii'))\n"
            )
        else:
            returnval += "\nreturnval = gzip.compress(returnval)\n"
    elif return_compression is not None:
        raise NotImplementedError("Unsupported compression.")
    if sep is not None:
        returnval += f"print({sep})\n"
    return returnval + "print(returnval)"

generic_python_endpoint(module, func=None, payload=None, *, compression=None, serialization=None, splat='', payload_encoded=False, return_result=True, filter_kwargs=False, interpreter=None, for_bash=True, literal_none=False, return_serialization=None, return_compression=None, b64=True, sep=None)

dynamically construct a Python source code snippet that imports a module and calls a function from it with a given 'payload' (effectively, an argument or arguments, possibly in serialized and/or compressed form). by default, wrap it in a shell script that executes the snippet from bash. this can be used to perform remote procedure calls, inject code into existing applications, etc.

Parameters:

Name Type Description Default
module str

name of, or path to, the target module

required
func Optional[str]

name of the function to call. must be a member of the target module (or explicitly imported by that module). If not specified, the generated code simply imports module (which is sometimes enough, depending on what module does when imported). If func is None, all subsequent arguments other than for_bash, interpreter, and print_result have no effect.

None
payload Any

object from which to construct func's call arguments. In many cases, this can simply be a Python object or objects you'd like to pass to func. If the payload is not well-defined by its string representation, an appropriate serialization must be specified for the call to work. For instance, [1, 2, 3] is a fine payload without serialization; np.random.poisson(5, (100, 100)) is not.

None
compression CallerCompressionType

how to compress the payload. 'gzip' or None. 'gzip' is good for jamming larger payloads into a shell command without breaking the shell.

None
serialization CallerSerializationType

how to serialize payload. 'json' means serialize to JSON; 'pickle' means serialize using pickle; None means just use the string representation of payload. None is only suitable for objects that can be reconstructed from their string representations.

None
splat CallerUnpackingOperator

Operator for splatting payload into the function call. Allows you to use payload as multiple arguments or keyword arguments."*" means func(*payload), "**" means func(**payload); '' means func(payload).

''
payload_encoded bool

set to True if you have already serialized and/or compressed the payload using the specified methods, so the generated script should decode it, but this function should not re-encode it.

False
return_result bool

if True, the generated script also prints the return value of the called function to stdout, with encoding and compression specified by return_encoding and return_compression.

True
filter_kwargs bool

if True, the generated script will attempt to filter func-inappropriate kwargs from the payload. Not guaranteed to work on functions with complex signatures. Does nothing if splat != '**'.

False
interpreter Optional[str]

path to Python interpreter that should be specified in the shell command. can either be a fully-qualified path or any name you expect to be on the calling user's $PATH -- e.g., if you expect there to be a system Python and want to use it, just 'python'. If None, assume the path to the desired interpreter is the same as the path to the interpreter that is running this function. Does nothing if for_bash is False.

None
for_bash bool

if True (the default), return a bash command that runs the Python script in the specified interpreter. otherwise, simply return the generated script.

True
literal_none bool

if False (the default), interpret payload=None as meaning "run module.func()". otherwise, interpret it as meaning "run module.func(None)".

False
return_serialization CallerSerializationType

serialization for return value. does nothing if return_result is False.

None
return_compression CallerCompressionType

compression for return value. does nothing if return_result is False.

None
b64 bool

if True, base64-encode any 'binary' values. For insertion into bash or transmission over HTTP.

True
sep Optional[str]

if not None, generated script prints this string prior to printing the return value, to facilitate parsing return values from scripts that might generate other output while running.

None

Returns: Bash command that executes function call in specified interpreter, or, if for_bash is False, just Python source code for function call.

Source code in hostess/caller.py
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
def generic_python_endpoint(
    module: str,
    func: Optional[str] = None,
    payload: Any = None,
    *,
    compression: CallerCompressionType = None,
    serialization: CallerSerializationType = None,
    splat: CallerUnpackingOperator = "",
    payload_encoded: bool = False,
    return_result: bool = True,
    filter_kwargs: bool = False,
    interpreter: Optional[str] = None,
    for_bash: bool = True,
    literal_none: bool = False,
    return_serialization: CallerSerializationType = None,
    return_compression: CallerCompressionType = None,
    b64: bool = True,
    sep: Optional[str] = None
) -> str:
    """
    dynamically construct a Python source code snippet that imports a module
    and calls a function from it with a given 'payload' (effectively, an
    argument or arguments, possibly in serialized and/or compressed form). by
    default, wrap it in a shell script that executes the snippet from bash.
    this can be used to perform remote procedure calls, inject code into
    existing applications, etc.

    Args:
        module: name of, or path to, the target module
        func: name of the function to call. must be a member of the target
            module (or explicitly imported by that module). If not specified,
            the generated code simply imports `module` (which is sometimes
            enough, depending on what `module` does when imported). If `func`
            is None, all subsequent arguments other than `for_bash`,
            `interpreter`, and `print_result` have no effect.
        payload: object from which to construct func's call arguments. In many
            cases, this can simply be a Python object or objects you'd like
            to pass to `func`. If the payload is not well-defined by its
            string representation, an appropriate `serialization` must be
            specified for the call to work. For instance, `[1, 2, 3]` is a
            fine payload without serialization;
            `np.random.poisson(5, (100, 100))` is not.
        compression: how to compress the payload. 'gzip' or None. 'gzip' is
            good for jamming larger payloads into a shell command without
            breaking the shell.
        serialization: how to serialize `payload`. 'json' means serialize
            to JSON; 'pickle' means serialize using pickle; None means just
            use the string representation of `payload`. None is only
            suitable for objects that can be reconstructed from their string
            representations.
        splat: Operator for splatting `payload` into the function call.
            Allows you to use `payload` as multiple arguments or keyword
            arguments.`"*"` means `func(*payload)`, `"**"` means
            `func(**payload)`; '' means `func(payload)`.
        payload_encoded: set to True if you have already serialized and/or
            compressed the payload using the specified methods, so the
            generated script should decode it, but this function should not
            re-encode it.
        return_result: if True, the generated script also prints the return
            value of the called function to stdout, with encoding and
            compression specified by `return_encoding` and
            `return_compression`.
        filter_kwargs: if True, the generated script will attempt to filter
            func-inappropriate kwargs from the payload. Not guaranteed to work
            on functions with complex signatures. Does nothing if
            `splat != '**'`.
        interpreter: path to Python interpreter that should be specified in
            the shell command. can either be a fully-qualified path or
            any name you expect to be on the calling user's $PATH -- e.g., if
            you expect there to be a system Python and want to use it, just
            'python'. If None, assume the path to the desired interpreter is
            the same as the path to the interpreter that is running this
            function. Does nothing if `for_bash` is False.
        for_bash: if True (the default), return a bash command that
            runs the Python script in the specified interpreter. otherwise,
            simply return the generated script.
        literal_none: if False (the default), interpret `payload=None` as
            meaning "run `module.func()`". otherwise, interpret it as meaning
            "run `module.func(None)`".
        return_serialization: serialization for return value. does nothing if
            `return_result` is False.
        return_compression: compression for return value. does nothing if
            `return_result` is False.
        b64: if True, base64-encode any 'binary' values. For insertion into
            `bash` or transmission over HTTP.
        sep: if not None, generated script prints this string
            prior to printing the return value, to facilitate parsing return
            values from scripts that might generate other output while running.
    Returns:
        Bash command that executes function call in specified interpreter,
        or, if `for_bash` is False, just Python source code for function call.
    """
    if (payload is not None or literal_none is True) and (func is None):
        raise ValueError("Must pass a function name to pass a payload.")
    no_payload = payload is None and literal_none is False
    _check_reconstructable(type(payload), serialization, compression)
    import_ = format_importer(module, func)
    if func is None:
        return import_
    if no_payload is True:
        encoded = ""
    elif payload_encoded is True:
        encoded = repr(payload)
    else:
        encoded = encode_payload(payload, serialization, compression, b64)
    decompress = format_decompressor(encoded, serialization, compression, b64)
    deserialize = format_deserializer(serialization)
    kwarg_filter = format_kwarg_filter(filter_kwargs, splat)
    call = f"result = target({splat}payload)\n"
    rval = format_returner(
        return_result, return_compression, return_serialization, b64, sep
    )
    endpoint = import_ + decompress + deserialize + kwarg_filter + call + rval
    endpoint = "\n".join("    " + line for line in endpoint.splitlines())
    endpoint = f'if __name__ == "__main__":\n{endpoint}'
    if for_bash is True:
        if interpreter is None:
            interpreter = sys.executable
        return f"{interpreter} <<{to_heredoc(endpoint)}"
    return endpoint

make_python_endpoint_factory(module, func=None, **endpoint_kwargs)

factory function for endpoint factory functions. use this to create callables that generate shell scripts that call either a specific Python function or Python functions from a specific named module, using specific application-correct configurations.

Parameters:

Name Type Description Default
module str

name of, or path to, module to use as quasi-namespace of endpoint factory.

required
func Optional[str]

optional name of function from module. If this is None, the returned function can be used to call any function from module.

None
endpoint_kwargs Union[bool, str, CallerCompressionType, CallerSerializationType, CallerUnpackingOperator]

kwargs to partially evaluate / bind to the endpoint factory. see generic_python_endpoint() for a full description of options.

{}

Returns:

Type Description
Union[Callable[[str, Any], str], Callable[[Any], str]]

a function that, when called, produces shell scripts. If the func argument was None, this function's call signature is (function_name: str, payload: Any). If it was not None, this function's call signature is (payload: Any); it always generates scripts that call module.func.

Source code in hostess/caller.py
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
def make_python_endpoint_factory(
    module: str,
    func: Optional[str] = None,
    **endpoint_kwargs: Union[
        bool,
        str,
        CallerCompressionType,
        CallerSerializationType,
        CallerUnpackingOperator,
    ],
) -> Union[Callable[[str, Any], str], Callable[[Any], str]]:
    """
    factory function for endpoint factory functions. use this to create
    callables that generate shell scripts that call either a specific Python
    function or Python functions from a specific named module, using specific
    application-correct configurations.

    Args:
        module: name of, or path to, module to use as quasi-namespace of
            endpoint factory.
        func: optional name of function from module. If this is None, the
            returned function can be used to call any function from module.
        endpoint_kwargs: kwargs to partially evaluate / bind to the endpoint
            factory. see generic_python_endpoint() for a full description of
            options.

    Returns:
        a function that, when called, produces shell scripts. If the `func`
            argument was None, this function's call signature is
            (function_name: str, payload: Any). If it was not None, this
            function's call signature is (payload: Any); it always
            generates scripts that call `module.func`.
    """
    if "payload" in endpoint_kwargs.keys():
        raise ValueError("cannot bind a payload to the endpoint factory")

    if func is not None:
        def endpoint_factory(payload):
            return generic_python_endpoint(
                module, func=func, payload=payload, **endpoint_kwargs
            )

    else:
        def endpoint_factory(function_name, payload):
            return generic_python_endpoint(
                module, function_name, payload=payload, **endpoint_kwargs
            )

    return endpoint_factory

to_heredoc(heredoc_content, addition='', identifier='__BOUNDARYTAG__')

create a bash heredoc statement.

Parameters:

Name Type Description Default
heredoc_content str

content of the heredoc.

required
addition str

optional additional statement between heredoc identifier and body

''
identifier str

heredoc delimiting identifier.

'__BOUNDARYTAG__'

Returns:

Type Description
str

bash heredoc statement.

Source code in hostess/caller.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def to_heredoc(
    heredoc_content: str,
    addition: str = "",
    identifier: str = "__BOUNDARYTAG__",
) -> str:
    """
    create a bash heredoc statement.

    Args:
        heredoc_content: content of the heredoc.
        addition: optional additional statement between heredoc identifier
            and body
        identifier: heredoc delimiting identifier.

    Returns:
        bash heredoc statement.

    """
    return f"{identifier} {addition}\n{heredoc_content}\n{identifier}\n"

config

config.config

desired settings should be placed in hostess/user_config/user_config.py.

config.user_config

settings in this module override settings in config.py, and may also be used to define settings that do not exist in config.py, none of which are currently imagined.

directory

utilities for indexing and summarizing filesystems

LSFrame = pd.DataFrame module-attribute

DataFrame suitable for use by several functions in this module. typically produced by calling the DataFrame constructor on a list of LSRecords.

LSRecord = dict[str, Union[str, float, bool, dt.datetime]] module-attribute

a record containing identifying information about a file. produced by lsdashl and used by other functions in this module. has keys:

  • "path": str (string version of relative path)
  • "size": float (file size in MB, rounded to 3 places)
  • "excluded": bool (placeholder for exclusions, always False)
  • "directory": bool (is it a directory?)
  • "suffix": str (last filename suffix)
  • "atime", "mtime", "ctime": datetime (UNIX file times)

TreeFrame = pd.DataFrame module-attribute

DataFrame containing hierarchical 'tree' information. Produced by calling make_treeframe on an LSFrame.

_make_levelframe(group, squish)

helper function for make_treeframe()

Source code in hostess/directory.py
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def _make_levelframe(group: pd.DataFrame, squish: bool) -> pd.DataFrame:
    """helper function for make_treeframe()"""
    levels = {}
    join = group.dropna(axis=1).copy()
    if squish is True:
        levelframe = _squishlevels(join, levels)
    else:
        levelframe = join.rename(columns={join.columns[-2]: "filename"})
    try:
        # TODO: probably superfluous
        levelframe["suffix"] = levelframe["filename"].str.split(
            ".", expand=True, n=1
        )[1]
    except KeyError:
        levelframe["suffix"] = ""
    levelframe["size"] = group["size"]
    return levelframe

_parse_fileinfo(magic_viewer)

parses stdout from the POSIX file utility.

Source code in hostess/directory.py
140
141
142
143
144
145
146
147
148
149
150
def _parse_fileinfo(magic_viewer: Viewer) -> list[dict[str, str]]:
    """parses stdout from the POSIX `file` utility."""
    fileinfo = []
    for line in "".join(magic_viewer.out).split("\n"):
        if line == "":
            continue
        fn, result = re.split(":", line, maxsplit=1)
        fileinfo.append(
            {"path": fn.strip(), "info": result.strip().replace(",", ";")}
        )
    return fileinfo

_squishlevels(join, levels)

helper function for _make_levelframe()

Source code in hostess/directory.py
185
186
187
188
189
190
191
192
193
def _squishlevels(join: pd.DataFrame, levels: dict) -> pd.DataFrame:
    """helper function for _make_levelframe()"""
    for ix in range(0, len(join.columns) - 2):
        join.iloc[:, ix] += "/"
    for ix in range(1, len(join.columns) - 1):
        levels[ix] = join.iloc[:, :ix].sum(axis=1)
    levelframe = pd.DataFrame(levels)
    levelframe["filename"] = levelframe.iloc[:, -2]
    return levelframe

check_inclusion(record, skip_directories=())

simple prefiltering function. sets an LSRecord's "excluded" value to True if the record represents a directory and the directory name matches any of the regex patterns in skip_directories.

Parameters:

Name Type Description Default
record LSRecord

record to prefilter

required
skip_directories Collection[Union[str, Pattern]]

list of regex expressions that define exclusions

()
Source code in hostess/directory.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def check_inclusion(
    record: LSRecord, skip_directories: Collection[Union[str, re.Pattern]] = ()
) -> LSRecord:
    """
    simple prefiltering function. sets an LSRecord's "excluded" value to True
    if the record represents a directory and the directory name matches any
    of the regex patterns in `skip_directories`.

    Args:
        record: record to prefilter
        skip_directories: list of regex expressions that define exclusions
    """
    matcher = partial(re.match, string=record["path"])
    if record["directory"] is True and any(map(matcher, skip_directories)):
        record["excluded"] = True
    return record

do_magic(manifest, log=zero)

Adds 'magic' info from the POSIX file utility to a DataFrame of file / directory listings.

Parameters:

Name Type Description Default
manifest LSFrame

dataframe of file information, probably produced via pd.DataFrame(index_breadth_first(something))

required
log Callable[[str], Any]

logger function (by default just throws log info away)

zero

Returns:

Type Description
LSFrame

manifest with an 'info' column added in-place.

Source code in hostess/directory.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def do_magic(manifest: LSFrame, log: Callable[[str], Any] = zero) -> LSFrame:
    """
    Adds 'magic' info from the POSIX `file` utility to a DataFrame of
    file / directory listings.

    Args:
        manifest: dataframe of file information, probably produced via
            `pd.DataFrame(index_breadth_first(something))`
        log: logger function (by default just throws log info away)

    Returns:
        `manifest` with an 'info' column added in-place.
    """
    files = manifest.loc[manifest["directory"] == False]["path"].to_list()
    log(f"performing magic on {len(files)} files")
    viewers = [
        Viewer.from_command("file", *chunk) for chunk in chunked(files, 100)
    ]
    while any(not v.done for v in viewers):
        time.sleep(0.05)
    infoframe = pd.DataFrame(
        list(chain.from_iterable(map(_parse_fileinfo, viewers)))
    )
    infoframe.index = infoframe["path"]
    manifest["info"] = None
    available = manifest["path"].loc[manifest["path"].isin(infoframe.index)]
    manifest.loc[available.index, "info"] = (
        infoframe["info"].loc[available].to_numpy()
    )
    return manifest

index_breadth_first(root)

Recursively index all directories under root.

Parameters:

Name Type Description Default
root Union[str, Path]

top directory of index

required

Returns:

Type Description
list[LSRecord]

list of LSRecords describing contents of all directories under and including root.

Source code in hostess/directory.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def index_breadth_first(root: Union[str, Path]) -> list[LSRecord]:
    """
    Recursively index all directories under `root`.

    Args:
        root: top directory of index

    Returns:
        list of LSRecords describing contents of all directories under and
            including `root`.
    """
    discoveries = []
    search_targets = deque([root])
    while len(search_targets) > 0:
        target = search_targets.pop()
        try:
            contents = tuple(map(check_inclusion, lsdashl(target)))
        except PermissionError:
            continue
        discoveries += contents
        for record in contents:
            if (record["directory"] is True) and (record["excluded"] is False):
                search_targets.append(record["path"])
    return discoveries

lsdashl(directory, include_directories=True)

a kind of ls -l. returns a list of records containing identifying information about the contents of directory.

Parameters:

Name Type Description Default
directory Union[str, Path]

directory to list

required
include_directories bool

include or omit subdirectories

True
Source code in hostess/directory.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def lsdashl(
    directory: Union[str, Path], include_directories: bool = True
) -> list[LSRecord]:
    """
    a kind of `ls -l`. returns a list of records containing identifying
    information about the contents of `directory`.

    Args:
        directory: directory to list
        include_directories: include or omit subdirectories
    """
    listings = []
    for path in Path(directory).iterdir():
        if (include_directories is False) and (path.is_dir()):
            continue
        try:
            stat = path.stat()
        except FileNotFoundError:
            continue
        listings.append(
            {
                "path": str(path),
                "size": mb(stat.st_size, 3),
                "excluded": False,
                "directory": path.is_dir(),
                "suffix": path.suffix,
            }
            | mtimes(stat)
        )
    return listings

make_level_table(treeframe)

Make a DataFrame of summary information about directory sizes, file count, etc. from a TreeFrame.

Parameters:

Name Type Description Default
treeframe TreeFrame

dataframe containing path information created using make_treeframe

required
Source code in hostess/directory.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
def make_level_table(treeframe: TreeFrame) -> pd.DataFrame:
    """
    Make a DataFrame of summary information about directory sizes, file count,
    etc. from a TreeFrame.

    Args:
        treeframe: dataframe containing path information created using
            `make_treeframe`
    """
    level_tables = []
    levels = [
        c for c in treeframe.columns if c not in ("filename", "size", "suffix")
    ]
    for level in levels:
        table = treeframe.pivot_table(
            values="size", index=level, aggfunc=["sum", len]
        )
        table["level"] = level
        table.columns = ["size", "count", "level"]
        table = table.sort_values(by="count", ascending=False)
        level_tables.append(table)
    level_table = pd.concat(level_tables)
    level_table["size"] = level_table["size"].round(2)
    return level_table

make_treeframe(manifest, squish=False)

Takes a DataFrame of file and directory listings and produces a new DataFrame with additional columns representing hierarchical components of the contents' paths.

Parameters:

Name Type Description Default
manifest LSFrame

file/directory DataFrame

required
squish bool

squish levels together in output?

False
Source code in hostess/directory.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def make_treeframe(manifest: LSFrame, squish: bool = False) -> TreeFrame:
    """
    Takes a DataFrame of file and directory listings and produces a new
    DataFrame with additional columns representing hierarchical components
    of the contents' paths.

    Args:
        manifest: file/directory DataFrame
        squish: squish levels together in output?
    """
    stripped = manifest.loc[~manifest["directory"], "path"].copy()
    parts = stripped.str.split("/", expand=True)
    parts["size"] = manifest["size"]
    n_parts, levelframes = parts.isna().sum(axis=1), []
    for _, group in parts.groupby(n_parts):
        levelframes.append(_make_levelframe(group, squish))
    treeframe = pd.concat(levelframes)
    return treeframe

mtimes(stat)

Formats filesystem timestamps into a dictionary of datetimes.

Parameters:

Name Type Description Default
stat stat_result

os.stat_result object (typically from Path.stat or os.stat).

required

Returns:

Type Description
dict[str, datetime]

dictionary whose values are datetimes and whose keys are 'atime', 'mtime', and 'ctime'.

Source code in hostess/directory.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def mtimes(stat: os.stat_result) -> dict[str, dt.datetime]:
    """
    Formats filesystem timestamps into a dictionary of datetimes.

    Args:
        stat: os.stat_result object (typically from `Path.stat` or `os.stat`).

    Returns:
        dictionary whose values are datetimes and whose keys are 'atime',
            'mtime', and 'ctime'.
    """
    return {
        f"{letter.upper()}TIME": unix2dt(getattr(stat, f"st_{letter}time"))
        for letter in ("a", "c", "m")
    }

monitors

tracking, logging, and synchronization objects

DEFAULT_TICKER = Ticker() module-attribute

convenient shared Ticker

AbstractMonitor

Bases: ABC

base monitor class

Source code in hostess/monitors.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
class AbstractMonitor(ABC):
    """base monitor class"""

    def __init__(
        self,
        *,
        digits: Optional[int] = None,
        qualities: Optional[Mapping[str, str]] = None,
        instrument: Callable[[], Union[float, int, Mapping]] = constant(0),
        formatter: Callable[[float], float] = identity,
        name: Optional[str] = None,
    ):
        """

        Args:
            digits: number of digits to round output to. if None, don't round.
            qualities: dictionary of subtypes of monitored quantity. if None,
                the Monitor only measures one thing.
            instrument: function used to perform monitoring.
            formatter: function used to format `instrument`'s output.
            name: default name of the monitor.
        """
        if isinstance(qualities, (list, tuple)):
            qualities = {i: i for i in qualities}
        self.digits = digits
        self.interval: Union[int, dict] = 0
        self.last: Union[int, dict] = 0
        self.absolute: Union[int, dict] = 0
        self.total: Union[int, dict] = 0
        self.first: Union[int, dict] = 0
        self.lap = 0
        self.qualities = qualities
        self.instrument = instrument
        self.formatter = formatter
        self.started = False
        self.paused = False
        self.unitstring = f" {self.units}" if len(self.units) > 0 else ""
        self.name = self.__class__.__name__ if name is None else name

    def _round(self, val: Union[float, Mapping[str, float]]):
        """round a measurement if set to do so."""
        if self.digits is None:
            return val
        if isinstance(val, Mapping):
            return {k: round(v, self.digits) for k, v in val.items()}
        return round(val, self.digits)

    def _unpack_reading(
        self, reading: Union[tuple, Mapping]
    ) -> dict[str, float]:
        """
        unpack an instrument reading. for monitors with multiple qualities.

        Args:
            reading: a named tuple or a Mapping containing measurements for
                various qualities.

        Returns:
            formatted dictionary of readings for each quality
        """
        if isinstance(reading, tuple):
            # noinspection PyProtectedMember,PyUnresolvedReferences
            reading = reading._asdict()
        return {k: self.formatter(v) for k, v in reading.items()}

    def _update_plural(self, reading: Union[tuple, Mapping], lap: bool):
        """
        update registers for each quality from a reading.

        Args:
            reading: instrument output
            lap: record this as a 'lap/split' (i.e., reset interval)?
        """
        self.absolute = {k: reading[v] for k, v in self.qualities.items()}
        if len(self.interval) == 0:
            self.interval = {t: 0 for t in self.qualities}
            self.total = {t: 0 for t in self.qualities}
        else:
            self.interval = {
                k: self.absolute[k] - self.last[k] for k in self.qualities
            }
            if self.cumulative is True:
                self.total = {
                    k: self.interval[k] + self.total[k] for k in self.qualities
                }
            else:
                self.total = {
                    k: self.absolute[k] - self.first[k] for k in self.qualities
                }
        if lap is True:
            self.last = self.absolute

    def _update_single(self, reading: float, lap: bool):
        """
        update registers from a reading.

        Args:
            reading: instrument output
            lap: record this as a 'lap/split' (i.e., reset interval)?
        """
        self.absolute = reading
        self.interval = self.absolute - self.last
        if self.cumulative is True:
            self.total = self.total + self.interval
        else:
            self.total = self.absolute - self.first
        if lap is True:
            self.last = self.absolute

    def update(self, lap: bool = False):
        """
        update the monitor, starting it if necessary. ignored if monitor is
        paused.

        Args:
            lap: record this update as a 'lap/split' (i.e., reset interval)?
        """
        if self.paused is True:
            return
        if self.started is False:
            self.start()
        reading = self.instrument()
        if isinstance(reading, (tuple, Mapping)):
            reading = self._unpack_reading(reading)
            return self._update_plural(reading, lap)
        self._update_single(self.formatter(reading), lap)

    def _display_simple(self, _which):
        """internal formatting function"""
        raise TypeError(
            f"_display_simple() not supported for {self.__class__.__name__}"
        )

    def _display_single(self, value, which):
        """internal formatting function"""
        return f"{self._round(value)}{self.unitstring}{which}"

    def _display_plural(self, register, which):
        """internal formatting function"""
        values = [
            f"{quality} {self._display_single(register[quality], '')}"
            for quality in self.qualities
        ] + [which]
        return ";".join(filter(None, values))

    def display(
        self, which: str = None, say: bool = False, simple: bool = False
    ) -> str:
        """
        return string displaying the contents of one or all registers.

        Args:
            which: which register to print, or "all" for all. None prints
                register defined in self.default_display
            say: include name of register in output?
            simple: format output tersely?
        """
        which = self.default_display if which is None else which
        if which == "all":
            return "\n".join(
                [self.display(this, say, simple) for this in self.registers]
            )
        if simple is True:
            return self._display_simple(which)
        register = getattr(self, which, say)
        whichprint = f" {which}" if say is True else ""
        if isinstance(register, Mapping):
            return self._display_plural(register, whichprint)
        return self._display_single(register, whichprint)

    def rec(
        self, which: Optional[str] = None
    ) -> Union[float, dict[str, float]]:
        """
        return value of one or all registers in numeric form.

        Args:
            which: name of register. self.default_display by default. "all"
                for all.
        """
        which = self.default_display if which is None else which
        if which == "all":
            return {this: self.rec(this) for this in self.registers}
        val = getattr(self, which)
        return self._round(val)

    def peek(
        self,
        which: Optional[str] = None,
        say: bool = False,
        simple: bool = False,
    ) -> str:
        """
        peek at one or all registers. managed shorthand for self.update()
        followed by self.display().

        Args:
            which: which register (default self.default_click, "all" for all)
            say: include name of register in output?
            simple: format output tersely?
        """
        which = self.default_click if which is None else which
        self.update()
        return self.display(which, say, simple)

    def click(self):
        """shorthand for self.update(True)"""
        self.update(True)

    def clickpeek(
        self,
        which: Optional[str] = None,
        say: bool = False,
        simple: bool = False,
    ) -> str:
        """
        click the lap button and look at the monitor. managed shorthand for
        self.update(True) followed by self.display().

        Args:
            which: register to look at (default default_click, "all" for all)
            say: include name of register in output?
            simple: format output tersely?
        """
        which = self.default_click if which is None else which
        self.click()
        return self.display(which, say, simple)

    def start(self, restart: bool = False):
        """
        start the monitor. unpauses if monitor is paused.

        Args:
            restart: if monitor is already started, restart it, clearing all
                entries?
        """
        self.paused = False
        if (restart is False) and (self.started is True):
            return
        self.started = True
        reading = self.instrument()
        if isinstance(reading, (tuple, Mapping)):
            reading = self._unpack_reading(reading)
        else:
            reading = self.formatter(reading)
        if isinstance(reading, Mapping):
            self.first = {k: reading[v] for k, v in self.qualities.items()}
            self.total, self.interval, self.absolute = {}, {}, {}
        else:
            self.first = reading
        self.last = self.first
        self.update()

    def pause(self):
        """pause the monitor."""
        self.update()
        self.paused = True

    def restart(self):
        """restart the monitor."""
        self.start(restart=True)

    def __str__(self):
        return self.__repr__()

    def __repr__(self):
        return f"{self.display()}"

    units = ""
    fake = True
    cumulative = False
    default_display = "total"
    default_click = "interval"
    registers = ("first", "last", "absolute", "interval", "total")

__init__(*, digits=None, qualities=None, instrument=constant(0), formatter=identity, name=None)

Parameters:

Name Type Description Default
digits Optional[int]

number of digits to round output to. if None, don't round.

None
qualities Optional[Mapping[str, str]]

dictionary of subtypes of monitored quantity. if None, the Monitor only measures one thing.

None
instrument Callable[[], Union[float, int, Mapping]]

function used to perform monitoring.

constant(0)
formatter Callable[[float], float]

function used to format instrument's output.

identity
name Optional[str]

default name of the monitor.

None
Source code in hostess/monitors.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
def __init__(
    self,
    *,
    digits: Optional[int] = None,
    qualities: Optional[Mapping[str, str]] = None,
    instrument: Callable[[], Union[float, int, Mapping]] = constant(0),
    formatter: Callable[[float], float] = identity,
    name: Optional[str] = None,
):
    """

    Args:
        digits: number of digits to round output to. if None, don't round.
        qualities: dictionary of subtypes of monitored quantity. if None,
            the Monitor only measures one thing.
        instrument: function used to perform monitoring.
        formatter: function used to format `instrument`'s output.
        name: default name of the monitor.
    """
    if isinstance(qualities, (list, tuple)):
        qualities = {i: i for i in qualities}
    self.digits = digits
    self.interval: Union[int, dict] = 0
    self.last: Union[int, dict] = 0
    self.absolute: Union[int, dict] = 0
    self.total: Union[int, dict] = 0
    self.first: Union[int, dict] = 0
    self.lap = 0
    self.qualities = qualities
    self.instrument = instrument
    self.formatter = formatter
    self.started = False
    self.paused = False
    self.unitstring = f" {self.units}" if len(self.units) > 0 else ""
    self.name = self.__class__.__name__ if name is None else name

_display_plural(register, which)

internal formatting function

Source code in hostess/monitors.py
276
277
278
279
280
281
282
def _display_plural(self, register, which):
    """internal formatting function"""
    values = [
        f"{quality} {self._display_single(register[quality], '')}"
        for quality in self.qualities
    ] + [which]
    return ";".join(filter(None, values))

_display_simple(_which)

internal formatting function

Source code in hostess/monitors.py
266
267
268
269
270
def _display_simple(self, _which):
    """internal formatting function"""
    raise TypeError(
        f"_display_simple() not supported for {self.__class__.__name__}"
    )

_display_single(value, which)

internal formatting function

Source code in hostess/monitors.py
272
273
274
def _display_single(self, value, which):
    """internal formatting function"""
    return f"{self._round(value)}{self.unitstring}{which}"

_round(val)

round a measurement if set to do so.

Source code in hostess/monitors.py
178
179
180
181
182
183
184
def _round(self, val: Union[float, Mapping[str, float]]):
    """round a measurement if set to do so."""
    if self.digits is None:
        return val
    if isinstance(val, Mapping):
        return {k: round(v, self.digits) for k, v in val.items()}
    return round(val, self.digits)

_unpack_reading(reading)

unpack an instrument reading. for monitors with multiple qualities.

Parameters:

Name Type Description Default
reading Union[tuple, Mapping]

a named tuple or a Mapping containing measurements for various qualities.

required

Returns:

Type Description
dict[str, float]

formatted dictionary of readings for each quality

Source code in hostess/monitors.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def _unpack_reading(
    self, reading: Union[tuple, Mapping]
) -> dict[str, float]:
    """
    unpack an instrument reading. for monitors with multiple qualities.

    Args:
        reading: a named tuple or a Mapping containing measurements for
            various qualities.

    Returns:
        formatted dictionary of readings for each quality
    """
    if isinstance(reading, tuple):
        # noinspection PyProtectedMember,PyUnresolvedReferences
        reading = reading._asdict()
    return {k: self.formatter(v) for k, v in reading.items()}

_update_plural(reading, lap)

update registers for each quality from a reading.

Parameters:

Name Type Description Default
reading Union[tuple, Mapping]

instrument output

required
lap bool

record this as a 'lap/split' (i.e., reset interval)?

required
Source code in hostess/monitors.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def _update_plural(self, reading: Union[tuple, Mapping], lap: bool):
    """
    update registers for each quality from a reading.

    Args:
        reading: instrument output
        lap: record this as a 'lap/split' (i.e., reset interval)?
    """
    self.absolute = {k: reading[v] for k, v in self.qualities.items()}
    if len(self.interval) == 0:
        self.interval = {t: 0 for t in self.qualities}
        self.total = {t: 0 for t in self.qualities}
    else:
        self.interval = {
            k: self.absolute[k] - self.last[k] for k in self.qualities
        }
        if self.cumulative is True:
            self.total = {
                k: self.interval[k] + self.total[k] for k in self.qualities
            }
        else:
            self.total = {
                k: self.absolute[k] - self.first[k] for k in self.qualities
            }
    if lap is True:
        self.last = self.absolute

_update_single(reading, lap)

update registers from a reading.

Parameters:

Name Type Description Default
reading float

instrument output

required
lap bool

record this as a 'lap/split' (i.e., reset interval)?

required
Source code in hostess/monitors.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def _update_single(self, reading: float, lap: bool):
    """
    update registers from a reading.

    Args:
        reading: instrument output
        lap: record this as a 'lap/split' (i.e., reset interval)?
    """
    self.absolute = reading
    self.interval = self.absolute - self.last
    if self.cumulative is True:
        self.total = self.total + self.interval
    else:
        self.total = self.absolute - self.first
    if lap is True:
        self.last = self.absolute

click()

shorthand for self.update(True)

Source code in hostess/monitors.py
344
345
346
def click(self):
    """shorthand for self.update(True)"""
    self.update(True)

clickpeek(which=None, say=False, simple=False)

click the lap button and look at the monitor. managed shorthand for self.update(True) followed by self.display().

Parameters:

Name Type Description Default
which Optional[str]

register to look at (default default_click, "all" for all)

None
say bool

include name of register in output?

False
simple bool

format output tersely?

False
Source code in hostess/monitors.py
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
def clickpeek(
    self,
    which: Optional[str] = None,
    say: bool = False,
    simple: bool = False,
) -> str:
    """
    click the lap button and look at the monitor. managed shorthand for
    self.update(True) followed by self.display().

    Args:
        which: register to look at (default default_click, "all" for all)
        say: include name of register in output?
        simple: format output tersely?
    """
    which = self.default_click if which is None else which
    self.click()
    return self.display(which, say, simple)

display(which=None, say=False, simple=False)

return string displaying the contents of one or all registers.

Parameters:

Name Type Description Default
which str

which register to print, or "all" for all. None prints register defined in self.default_display

None
say bool

include name of register in output?

False
simple bool

format output tersely?

False
Source code in hostess/monitors.py
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def display(
    self, which: str = None, say: bool = False, simple: bool = False
) -> str:
    """
    return string displaying the contents of one or all registers.

    Args:
        which: which register to print, or "all" for all. None prints
            register defined in self.default_display
        say: include name of register in output?
        simple: format output tersely?
    """
    which = self.default_display if which is None else which
    if which == "all":
        return "\n".join(
            [self.display(this, say, simple) for this in self.registers]
        )
    if simple is True:
        return self._display_simple(which)
    register = getattr(self, which, say)
    whichprint = f" {which}" if say is True else ""
    if isinstance(register, Mapping):
        return self._display_plural(register, whichprint)
    return self._display_single(register, whichprint)

pause()

pause the monitor.

Source code in hostess/monitors.py
392
393
394
395
def pause(self):
    """pause the monitor."""
    self.update()
    self.paused = True

peek(which=None, say=False, simple=False)

peek at one or all registers. managed shorthand for self.update() followed by self.display().

Parameters:

Name Type Description Default
which Optional[str]

which register (default self.default_click, "all" for all)

None
say bool

include name of register in output?

False
simple bool

format output tersely?

False
Source code in hostess/monitors.py
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def peek(
    self,
    which: Optional[str] = None,
    say: bool = False,
    simple: bool = False,
) -> str:
    """
    peek at one or all registers. managed shorthand for self.update()
    followed by self.display().

    Args:
        which: which register (default self.default_click, "all" for all)
        say: include name of register in output?
        simple: format output tersely?
    """
    which = self.default_click if which is None else which
    self.update()
    return self.display(which, say, simple)

rec(which=None)

return value of one or all registers in numeric form.

Parameters:

Name Type Description Default
which Optional[str]

name of register. self.default_display by default. "all" for all.

None
Source code in hostess/monitors.py
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def rec(
    self, which: Optional[str] = None
) -> Union[float, dict[str, float]]:
    """
    return value of one or all registers in numeric form.

    Args:
        which: name of register. self.default_display by default. "all"
            for all.
    """
    which = self.default_display if which is None else which
    if which == "all":
        return {this: self.rec(this) for this in self.registers}
    val = getattr(self, which)
    return self._round(val)

restart()

restart the monitor.

Source code in hostess/monitors.py
397
398
399
def restart(self):
    """restart the monitor."""
    self.start(restart=True)

start(restart=False)

start the monitor. unpauses if monitor is paused.

Parameters:

Name Type Description Default
restart bool

if monitor is already started, restart it, clearing all entries?

False
Source code in hostess/monitors.py
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
def start(self, restart: bool = False):
    """
    start the monitor. unpauses if monitor is paused.

    Args:
        restart: if monitor is already started, restart it, clearing all
            entries?
    """
    self.paused = False
    if (restart is False) and (self.started is True):
        return
    self.started = True
    reading = self.instrument()
    if isinstance(reading, (tuple, Mapping)):
        reading = self._unpack_reading(reading)
    else:
        reading = self.formatter(reading)
    if isinstance(reading, Mapping):
        self.first = {k: reading[v] for k, v in self.qualities.items()}
        self.total, self.interval, self.absolute = {}, {}, {}
    else:
        self.first = reading
    self.last = self.first
    self.update()

update(lap=False)

update the monitor, starting it if necessary. ignored if monitor is paused.

Parameters:

Name Type Description Default
lap bool

record this update as a 'lap/split' (i.e., reset interval)?

False
Source code in hostess/monitors.py
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
def update(self, lap: bool = False):
    """
    update the monitor, starting it if necessary. ignored if monitor is
    paused.

    Args:
        lap: record this update as a 'lap/split' (i.e., reset interval)?
    """
    if self.paused is True:
        return
    if self.started is False:
        self.start()
    reading = self.instrument()
    if isinstance(reading, (tuple, Mapping)):
        reading = self._unpack_reading(reading)
        return self._update_plural(reading, lap)
    self._update_single(self.formatter(reading), lap)

Bouncer

Bases: FakeBouncer

simple blocking rate-limiter.

Source code in hostess/monitors.py
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
class Bouncer(FakeBouncer):
    """simple blocking rate-limiter."""

    def __init__(
        self,
        ratelimit: float = 0.1,
        window: float = 1,
        blockdelay: Optional[float] = None,
    ):
        """
        Args:
            ratelimit: how many events to permit within a single window
            window: size of window in seconds
            blockdelay: poll rate when blocking (default window/ratelimit)
        """
        self.events = []
        self.ratelimit = ratelimit
        self.window = window
        if blockdelay is None:
            blockdelay = window / ratelimit
        self.blockdelay = blockdelay

    def __new__(cls, ratelimit=0.1, window=1, blockdelay=None, fake=False):
        if fake is True:
            return FakeBouncer()
        return object.__new__(cls)

    def clean(self):
        """clean the list of events"""
        now = time.time()
        self.events = list(
            filter(lambda t: (now - t) < self.window, self.events)
        )

    def block(self):
        """block until there are now longer too many events within window"""
        self.clean()
        while len(self.events) > self.ratelimit:
            time.sleep(self.blockdelay)
            self.clean()

    def click(self, block: bool = True):
        """
        record an event; optionally block

        Args:
            block: if True, call block() after recording event
        """
        self.clean()
        now = time.time()
        self.events.append(now)
        if block is True:
            self.block()

__init__(ratelimit=0.1, window=1, blockdelay=None)

Parameters:

Name Type Description Default
ratelimit float

how many events to permit within a single window

0.1
window float

size of window in seconds

1
blockdelay Optional[float]

poll rate when blocking (default window/ratelimit)

None
Source code in hostess/monitors.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def __init__(
    self,
    ratelimit: float = 0.1,
    window: float = 1,
    blockdelay: Optional[float] = None,
):
    """
    Args:
        ratelimit: how many events to permit within a single window
        window: size of window in seconds
        blockdelay: poll rate when blocking (default window/ratelimit)
    """
    self.events = []
    self.ratelimit = ratelimit
    self.window = window
    if blockdelay is None:
        blockdelay = window / ratelimit
    self.blockdelay = blockdelay

block()

block until there are now longer too many events within window

Source code in hostess/monitors.py
87
88
89
90
91
92
def block(self):
    """block until there are now longer too many events within window"""
    self.clean()
    while len(self.events) > self.ratelimit:
        time.sleep(self.blockdelay)
        self.clean()

clean()

clean the list of events

Source code in hostess/monitors.py
80
81
82
83
84
85
def clean(self):
    """clean the list of events"""
    now = time.time()
    self.events = list(
        filter(lambda t: (now - t) < self.window, self.events)
    )

click(block=True)

record an event; optionally block

Parameters:

Name Type Description Default
block bool

if True, call block() after recording event

True
Source code in hostess/monitors.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def click(self, block: bool = True):
    """
    record an event; optionally block

    Args:
        block: if True, call block() after recording event
    """
    self.clean()
    now = time.time()
    self.events.append(now)
    if block is True:
        self.block()

CPU

Bases: AbstractMonitor

simple CPU monitoring device

Source code in hostess/monitors.py
438
439
440
441
442
443
444
445
446
447
448
449
class CPU(AbstractMonitor):
    """simple CPU monitoring device"""

    def __init__(self, *, digits: Optional[int] = 3):
        super().__init__(digits=digits)
        self.instrument = psutil.cpu_percent
        self.formatter = identity

    units = "%"
    fake = False
    default_display = "absolute"
    default_click = "absolute"

CPUTime

Bases: AbstractMonitor

simple CPU time monitoring device

Source code in hostess/monitors.py
452
453
454
455
456
457
458
459
460
461
462
463
464
465
class CPUTime(AbstractMonitor):
    """simple CPU time monitoring device"""

    def __init__(self, *, digits: Optional[int] = 3):
        super().__init__(digits=digits)
        self.instrument = psutil.cpu_times
        self.qualities = {
            "user": "user",
            "system": "system",
            "idle": "idle",
        }
        # cpu_times() doesn't report iowait on MacOS
        if sys.platform == 'linux':
            self.qualities['iowait'] = 'iowait'

DiskIO

Bases: AbstractMonitor

simple Disk io monitor

Source code in hostess/monitors.py
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
class DiskIO(AbstractMonitor):
    """simple Disk io monitor"""

    def __init__(self, *, digits: Optional[int] = 3):
        super().__init__(digits=digits)
        self.instrument = psutil.disk_io_counters
        self.formatter = mb
        self.qualities = {
            "read": "read_bytes",
            "write": "write_bytes",
            "read count": "read_count",
            "write count": "write_count",
        }

    units = "MB"
    fake = False
    cumulative = True

FakeBouncer

fake blocking rate-limiter. Placeholder for a Bouncer in functions that don't actually want to debounce.

Source code in hostess/monitors.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
class FakeBouncer:
    """
    fake blocking rate-limiter. Placeholder for a `Bouncer` in functions that
    don't actually want to debounce.
    """

    def clean(self):
        pass

    def block(self):
        pass

    def click(self):
        pass

Load

Bases: AbstractMonitor

simple CPU load monitoring device

Source code in hostess/monitors.py
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
class Load(AbstractMonitor):
    """simple CPU load monitoring device"""

    def __init__(self, *, digits: Optional[int] = 3):
        super().__init__(digits=digits)
        self.instrument = psutil.getloadavg
        self.qualities = {
            "1m": 0,
            "5m": 1,
            "15m": 2,
        }

    units = ""
    fake = False
    default_display = "absolute"
    default_click = "absolute"

LogMB

simple text logger/printer for aggregate data volume

Source code in hostess/monitors.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
class LogMB:
    """simple text logger/printer for aggregate data volume"""

    def __init__(self, threshold_mb: float = 25):
        """

        Args:
            threshold_mb: at what interval of MB to log/print
        """

        self._threshold = threshold_mb
        self._seen_so_far = 0
        self._lock = threading.Lock()

    def __call__(self, bytes_amount: int):
        """
        Record a write/transfer/whatever. If this causes running volume to
        cross a multiple of self._threshold, print and log the running volume.

        Args:
            bytes_amount: number of bytes just written/transferred
        """
        with self._lock:
            extra = self._seen_so_far + bytes_amount
            if mb(extra - self._seen_so_far) > self._threshold:
                console_and_log(
                    stamp() + f"transferred {mb(extra)}MB", style="blue"
                )
            self._seen_so_far = extra

__call__(bytes_amount)

Record a write/transfer/whatever. If this causes running volume to cross a multiple of self._threshold, print and log the running volume.

Parameters:

Name Type Description Default
bytes_amount int

number of bytes just written/transferred

required
Source code in hostess/monitors.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def __call__(self, bytes_amount: int):
    """
    Record a write/transfer/whatever. If this causes running volume to
    cross a multiple of self._threshold, print and log the running volume.

    Args:
        bytes_amount: number of bytes just written/transferred
    """
    with self._lock:
        extra = self._seen_so_far + bytes_amount
        if mb(extra - self._seen_so_far) > self._threshold:
            console_and_log(
                stamp() + f"transferred {mb(extra)}MB", style="blue"
            )
        self._seen_so_far = extra

__init__(threshold_mb=25)

Parameters:

Name Type Description Default
threshold_mb float

at what interval of MB to log/print

25
Source code in hostess/monitors.py
111
112
113
114
115
116
117
118
119
120
def __init__(self, threshold_mb: float = 25):
    """

    Args:
        threshold_mb: at what interval of MB to log/print
    """

    self._threshold = threshold_mb
    self._seen_so_far = 0
    self._lock = threading.Lock()

NetworkIO

Bases: AbstractMonitor

simple network I/O monitor

Source code in hostess/monitors.py
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
class NetworkIO(AbstractMonitor):
    """simple network I/O monitor"""

    def __init__(self, *, digits: Optional[int] = 3):
        super().__init__(digits=digits)
        self.instrument = psutil.net_io_counters
        self.formatter = mb
        self.qualities = {
            "sent": "bytes_sent",
            "recv": "bytes_recv",
            "sent count": "packets_sent",
            "recv count": "packets_recv",
        }

    units = "MB"
    fake = False

RAM

Bases: AbstractMonitor

simple memory monitoring device

Source code in hostess/monitors.py
426
427
428
429
430
431
432
433
434
435
class RAM(AbstractMonitor):
    """simple memory monitoring device"""

    def __init__(self, *, digits: Optional[int] = 3):
        super().__init__(digits=digits)
        self.instrument = memory
        self.formatter = mb

    units = "MB"
    fake = False

Recorder

wrapper class for arbitrary callable. makes its interface compatible with make_stat_records().

Source code in hostess/monitors.py
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
class Recorder:
    """
    wrapper class for arbitrary callable. makes its interface compatible
    with `make_stat_records()`.
    """

    def __init__(self, func: Callable):
        self.func = func
        self.cache = None

    def __call__(self, *args, **kwargs):
        return self.func(*args, **kwargs)

    def update(self, *args, **kwargs):
        self.cache = self.func(*args, **kwargs)

    def start(self):
        return self.update()

    def pause(self):
        return self.update()

    def rec(self, *_, **__):
        return self.cache

Stopwatch

Bases: AbstractMonitor

simple timekeeping device

Source code in hostess/monitors.py
415
416
417
418
419
420
421
422
423
class Stopwatch(AbstractMonitor):
    """simple timekeeping device"""

    def __init__(self, *, digits: Optional[int] = 3):
        super().__init__(digits=digits)
        self.instrument = time.perf_counter

    units = "s"
    fake = False

TimeSwitcher

little object that tracks changing times

Source code in hostess/monitors.py
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
class TimeSwitcher:
    """
    little object that tracks changing times
    """

    def __init__(self, start_time: Optional[str] = None):
        """
        Args:
            start_time: optional start time for the timer, in any format
                recognized by dateutil.
        """
        if start_time is not None:
            self.times = [start_time]
        else:
            self.times = []

    def check_time(self, string: str) -> bool:
        """
        if the passed value is parseable as a time, append it to self.times.

        Args:
            string: stringified time (maybe)

        Returns:
            True if `string` could be parsed as a time, False if not.
        """
        try:
            self.times.append(dtp.parse(string).isoformat())
            return True
        except dtp.ParserError:
            return False

    def __repr__(self):
        if len(self.times) > 0:
            return self.times[-1]
        return None

    def __str__(self):
        return self.__repr__()

__init__(start_time=None)

Parameters:

Name Type Description Default
start_time Optional[str]

optional start time for the timer, in any format recognized by dateutil.

None
Source code in hostess/monitors.py
694
695
696
697
698
699
700
701
702
703
def __init__(self, start_time: Optional[str] = None):
    """
    Args:
        start_time: optional start time for the timer, in any format
            recognized by dateutil.
    """
    if start_time is not None:
        self.times = [start_time]
    else:
        self.times = []

check_time(string)

if the passed value is parseable as a time, append it to self.times.

Parameters:

Name Type Description Default
string str

stringified time (maybe)

required

Returns:

Type Description
bool

True if string could be parsed as a time, False if not.

Source code in hostess/monitors.py
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
def check_time(self, string: str) -> bool:
    """
    if the passed value is parseable as a time, append it to self.times.

    Args:
        string: stringified time (maybe)

    Returns:
        True if `string` could be parsed as a time, False if not.
    """
    try:
        self.times.append(dtp.parse(string).isoformat())
        return True
    except dtp.ParserError:
        return False

Usage

Bases: AbstractMonitor

simple disk usage monitor

Source code in hostess/monitors.py
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
class Usage(AbstractMonitor):
    """simple disk usage monitor"""

    def __init__(
        self, *, digits: Optional[int] = 3, path: Union[str, Path] = "/"
    ):
        """
        Args:
            digits: number of digits (as in AbstractMonitor)
            path: root directory to monitor
        """
        super().__init__(digits=digits)
        self.qualities = {"total": "total", "used": "used", "free": "free"}
        self.instrument = partial(psutil.disk_usage, path)
        self.formatter = mb

    units = "MB"
    fake = False

__init__(*, digits=3, path='/')

Parameters:

Name Type Description Default
digits Optional[int]

number of digits (as in AbstractMonitor)

3
path Union[str, Path]

root directory to monitor

'/'
Source code in hostess/monitors.py
471
472
473
474
475
476
477
478
479
480
481
482
def __init__(
    self, *, digits: Optional[int] = 3, path: Union[str, Path] = "/"
):
    """
    Args:
        digits: number of digits (as in AbstractMonitor)
        path: root directory to monitor
    """
    super().__init__(digits=digits)
    self.qualities = {"total": "total", "used": "used", "free": "free"}
    self.instrument = partial(psutil.disk_usage, path)
    self.formatter = mb

log_factory(stamper, stat, log_fields, logfile)

Parameters:

Name Type Description Default
stamper Callable[[], Any]

line identifier function (i.e., a timestamper)

required
stat Callable[[], Any]

statistic-generating function

required
log_fields Sequence[str]

expected kwargs to log function -- this provides an ordering for columns in the output CSV

required
logfile Union[str, Path]

where to write the log

required

Returns:

Type Description
Callable[[Any, ...], None]

a function that, when called, creates, prints, and writes a comma-separated log line.

Source code in hostess/monitors.py
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
def log_factory(
    stamper: Callable[[], Any],
    stat: Callable[[], Any],
    log_fields: Sequence[str],
    logfile: Union[str, Path],
) -> Callable[[Any, ...], None]:
    """
    Args:
        stamper: line identifier function (i.e., a timestamper)
        stat: statistic-generating function
        log_fields: expected kwargs to log function -- this provides an
            ordering for columns in the output CSV
        logfile: where to write the log

    Returns:
        a function that, when called, creates, prints, and writes a
            comma-separated log line.
    """

    def lprint(message):
        print(message)
        with open(logfile, "a") as stream:
            stream.write(message)

    def log(event, **kwargs):
        center = ",".join(
            [event, *[kwargs.get(field, "") for field in log_fields]]
        )
        lprint(f"{stamper()},{center},{stat()}\n")

    return log

make_monitors(*, digits=3)

make a default set of monitors

Source code in hostess/monitors.py
543
544
545
546
547
548
549
550
551
552
553
def make_monitors(*, digits: Optional[int] = 3) -> dict[str, AbstractMonitor]:
    """make a default set of monitors"""
    return {
        "cpu": CPU(digits=digits),
        "cputime": CPUTime(digits=digits),
        "memory": RAM(digits=digits),
        "disk": Usage(digits=digits),
        "diskio": DiskIO(digits=digits),
        "networkio": NetworkIO(digits=digits),
        "time": Stopwatch(digits=digits),
    }

make_stat_printer(monitors)

Parameters:

Name Type Description Default
monitors Mapping[str, AbstractMonitor]

dictionary of AbstractMonitors

required

Returns:

Type Description
Callable[[bool, bool, Any, ...], Union[str, Mapping[str, AbstractMonitor]]]

a function that holds monitors in enclosing scope.

when called, and eject (its second positional parameter) is False, updates all monitors, passing its first positional parameter (lap) to the update methods of all monitors, and kwargs to the display methods of all monitors. it returns a string containing the concatenated output of all monitor displays.

if eject is True, instead returns the dictionary of monitors.

Source code in hostess/monitors.py
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
def make_stat_printer(
    monitors: Mapping[str, AbstractMonitor]
) -> Callable[
    [bool, bool, Any, ...], Union[str, Mapping[str, AbstractMonitor]]
]:
    """
    Args:
        monitors: dictionary of AbstractMonitors

    Returns:
        a function that holds `monitors` in enclosing scope.

            when called, and `eject` (its second positional parameter) is
            False, updates all monitors, passing its first positional
            parameter (`lap`) to the `update` methods of all monitors,
            and kwargs to the `display` methods of all monitors.
            it returns a string containing the concatenated output of all
            monitor displays.

            if `eject` is True, instead returns the dictionary of monitors.
    """

    def printstats(lap=True, eject=False, **display_kwargs):
        if eject is True:
            return monitors
        for v in monitors.values():
            v.update(lap)
        return ";".join(
            [v.display(**display_kwargs) for v in monitors.values()]
        )

    return printstats

make_stat_records(monitors)

Parameters:

Name Type Description Default
monitors MutableMapping[str, Union[AbstractMonitor, Callable[[Any, ...], float]]]

dictionary of AbstractMonitors and/or functions that return floats.

required

Returns:

Type Description
Callable[[bool, bool, Any, ...], Union[Mapping[str, Union[AbstractMonitor, Recorder]], dict[str, Union[dict, float]]]]

a stat-recording function that works much the function produced by make_stat_printer, but returns a dictionary of numerical values rather than simply returning strings.

Source code in hostess/monitors.py
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
def make_stat_records(
    monitors: MutableMapping[
        str, Union[AbstractMonitor, Callable[[Any, ...], float]]
    ]
) -> Callable[
    [bool, bool, Any, ...],
    Union[
        Mapping[str, Union[AbstractMonitor, Recorder]],
        dict[str, Union[dict, float]],
    ],
]:
    """
    Args:
        monitors: dictionary of AbstractMonitors and/or functions that return
            floats.

    Returns:
        a stat-recording function that works much the function produced by
            `make_stat_printer`, but returns a dictionary of numerical values
            rather than simply returning strings.
    """
    for key in monitors.keys():
        if not isinstance(monitors[key], AbstractMonitor):
            monitors[key] = Recorder(monitors[key])

    def recordstats(
        lap: bool = True, eject: bool = False, **display_kwargs
    ) -> Union[
        Mapping[str, Union[AbstractMonitor, Recorder]],
        dict[str, Union[dict, float]],
    ]:
        if eject is True:
            return monitors
        for v in monitors.values():
            v.update(lap)
        return {k: v.rec(**display_kwargs) for k, v in monitors.items()}

    return recordstats

memory()

alias for psutil.Process().memory_info().rss

Returns:

Type Description
int

current process's real set size in bytes

Source code in hostess/monitors.py
27
28
29
30
31
32
33
34
def memory() -> int:
    """
    alias for psutil.Process().memory_info().rss

    Returns:
        current process's real set size in bytes
    """
    return psutil.Process().memory_info().rss

ticked(func, label, ticker)

Modify func so that it records a tick on ticker whenever it's called. To use with @ syntax, do something like:

@ticked(label='login', ticker=DEFAULT_TICKER)
def handle_login(...

Args: func: function to modify label: label to use for tick ticker: Ticker to tick

Returns:

Type Description
Callable

modified version of func

Source code in hostess/monitors.py
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
@curry
def ticked(func: Callable, label: str, ticker: Ticker) -> Callable:
    """
    Modify func so that it records a tick on ticker whenever it's called.
    To use with @ syntax, do something like:

    ```
    @ticked(label='login', ticker=DEFAULT_TICKER)
    def handle_login(...
    ```
    Args:
        func: function to modify
        label: label to use for tick
        ticker: Ticker to tick

    Returns:
        modified version of `func`
    """

    @wraps(func)
    def tickoff(*args, **kwargs):
        ticker.tick(label)
        return func(*args, **kwargs)

    return tickoff

profilers

profiling and introspection utilities

DEFAULT_PROFILER = Profiler({'time': Stopwatch()}) module-attribute

convenient shared Profiler that measures execution times for code blocks.

IdentifyResult = dict[str, Union[int, type, str]] module-attribute

record representing information about a Python object, as produced by identify and functions that call it.

ScopeName = Literal['locals', 'globals', 'builtins'] module-attribute

string that gives the name of a Python scope, not including enclosing/nonlocal scope.

PContext

simple context manager for profiling. typically instantiated via a Profiler's context() method, though this is not mandatory.

Source code in hostess/profilers.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
class PContext:
    """
    simple context manager for profiling. typically instantiated via a
    Profiler's context() method, though this is not mandatory.
    """

    def __init__(self, profiler: Profiler, label: str = ""):
        """
        Args:
            profiler: associated Profiler; readings generated by this PContext
                will be stored in that Profiler's labels data structure.
            label: optional label for this PContext's profiling results.
        """
        self.profiler = profiler
        self.label = label

    def __enter__(self):
        self.profiler.restart()
        return self.profiler

    def __exit__(self, *args):
        self.profiler.pause()
        record = self.profiler.recordstats()
        for monitor, reading in record.items():
            self._save_reading(monitor, reading)

    def _save_reading(self, monitor: str, reading: Union[int, float, Mapping]):
        """
        internal function called on context block exit. saves profiling
        results to the associated Profiler.
        """
        if isinstance(reading, (float, int)):
            self.profiler.labels[self.label][monitor] += reading
        else:
            for quality, value in reading.items():
                self.profiler.labels[self.label][monitor][quality] += value

__init__(profiler, label='')

Parameters:

Name Type Description Default
profiler Profiler

associated Profiler; readings generated by this PContext will be stored in that Profiler's labels data structure.

required
label str

optional label for this PContext's profiling results.

''
Source code in hostess/profilers.py
139
140
141
142
143
144
145
146
147
def __init__(self, profiler: Profiler, label: str = ""):
    """
    Args:
        profiler: associated Profiler; readings generated by this PContext
            will be stored in that Profiler's labels data structure.
        label: optional label for this PContext's profiling results.
    """
    self.profiler = profiler
    self.label = label

_save_reading(monitor, reading)

internal function called on context block exit. saves profiling results to the associated Profiler.

Source code in hostess/profilers.py
159
160
161
162
163
164
165
166
167
168
def _save_reading(self, monitor: str, reading: Union[int, float, Mapping]):
    """
    internal function called on context block exit. saves profiling
    results to the associated Profiler.
    """
    if isinstance(reading, (float, int)):
        self.profiler.labels[self.label][monitor] += reading
    else:
        for quality, value in reading.items():
            self.profiler.labels[self.label][monitor][quality] += value

Profiler

simple profiling object for specific sections of code.

Examples:

>>> from array import array
>>> from hostess.monitors import RAM, Stopwatch
>>> from hostess.profilers import Profiler
>>> prof = Profiler({'time': Stopwatch(), 'memory': RAM()})
>>> with prof.context("f"):
    >>> var1 = array("B", [0 for _ in range(1024**2 * 100)])
>>> with prof.context("g"):
    >>> var2 = array("B", [0 for _ in range(1024**2 * 250)])
>>> print(prof)

general form of expected output (exact results are system-dependent):

Profiler
f
  time: 2.935
  memory: 105.47
g
  time: 7.171
  memory: 261.76
Source code in hostess/profilers.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
class Profiler:
    """
    simple profiling object for specific sections of code.

    Examples:
        >>> from array import array
        >>> from hostess.monitors import RAM, Stopwatch
        >>> from hostess.profilers import Profiler

        >>> prof = Profiler({'time': Stopwatch(), 'memory': RAM()})
        >>> with prof.context("f"):
            >>> var1 = array("B", [0 for _ in range(1024**2 * 100)])
        >>> with prof.context("g"):
            >>> var2 = array("B", [0 for _ in range(1024**2 * 250)])
        >>> print(prof)

        general form of expected output (exact results are system-dependent):
        ```
        Profiler
        f
          time: 2.935
          memory: 105.47
        g
          time: 7.171
          memory: 261.76
        ```
    """

    def __init__(self, monitors: MutableMapping[str, AbstractMonitor]):
        """
        Args:
            monitors: dictionary of AbstractMonitor objects
                (see hostess.monitors for examples)
        """
        self.monitors = monitors
        self.printstats = make_stat_printer(self.monitors)
        self.recordstats = make_stat_records(self.monitors)
        for k, v in self.monitors.items():
            v.default_display = "interval"
        self.labels = defaultdict(self._newcaches)

    def start(self):
        """start all the monitors."""
        for v in self.monitors.values():
            v.start()

    def pause(self):
        """pause all the monitors."""
        for v in self.monitors.values():
            v.pause()

    def restart(self):
        """restart all the monitors."""
        for v in self.monitors.values():
            v.restart()

    def context(self, label: str = "") -> PContext:
        """
        create a context manager that profiles a section of code.

        Args:
            label: label for the code section, possibly shared between
                multiple sections. useful when it is desirable to distinguish
                specific steps of a pipeline, 'categories' of activity, etc.
        """
        return PContext(self, label)

    def reset(self):
        """clear this Profiler, removing all existing readings."""
        self.labels = defaultdict(self._newcaches)

    def _newcaches(self) -> dict:
        """internal data structure initialization function."""
        caches = {}
        for k, v in self.monitors.items():
            if v.qualities is not None:
                caches[k] = {q: 0 for q in v.qualities}
            else:
                caches[k] = 0
        return caches

    def __str__(self):
        if len(self.labels) == 0:
            return f"Profiler (no readings)"
        output = "Profiler\n"
        for k, v in self.labels.items():
            output += f"{k:}\n"
            for m, r in v.items():
                output += f"  {m}: {r}\n"
        return output

    def __repr__(self):
        return self.__str__()

__init__(monitors)

Parameters:

Name Type Description Default
monitors MutableMapping[str, AbstractMonitor]

dictionary of AbstractMonitor objects (see hostess.monitors for examples)

required
Source code in hostess/profilers.py
66
67
68
69
70
71
72
73
74
75
76
77
def __init__(self, monitors: MutableMapping[str, AbstractMonitor]):
    """
    Args:
        monitors: dictionary of AbstractMonitor objects
            (see hostess.monitors for examples)
    """
    self.monitors = monitors
    self.printstats = make_stat_printer(self.monitors)
    self.recordstats = make_stat_records(self.monitors)
    for k, v in self.monitors.items():
        v.default_display = "interval"
    self.labels = defaultdict(self._newcaches)

_newcaches()

internal data structure initialization function.

Source code in hostess/profilers.py
109
110
111
112
113
114
115
116
117
def _newcaches(self) -> dict:
    """internal data structure initialization function."""
    caches = {}
    for k, v in self.monitors.items():
        if v.qualities is not None:
            caches[k] = {q: 0 for q in v.qualities}
        else:
            caches[k] = 0
    return caches

context(label='')

create a context manager that profiles a section of code.

Parameters:

Name Type Description Default
label str

label for the code section, possibly shared between multiple sections. useful when it is desirable to distinguish specific steps of a pipeline, 'categories' of activity, etc.

''
Source code in hostess/profilers.py
 94
 95
 96
 97
 98
 99
100
101
102
103
def context(self, label: str = "") -> PContext:
    """
    create a context manager that profiles a section of code.

    Args:
        label: label for the code section, possibly shared between
            multiple sections. useful when it is desirable to distinguish
            specific steps of a pipeline, 'categories' of activity, etc.
    """
    return PContext(self, label)

pause()

pause all the monitors.

Source code in hostess/profilers.py
84
85
86
87
def pause(self):
    """pause all the monitors."""
    for v in self.monitors.values():
        v.pause()

reset()

clear this Profiler, removing all existing readings.

Source code in hostess/profilers.py
105
106
107
def reset(self):
    """clear this Profiler, removing all existing readings."""
    self.labels = defaultdict(self._newcaches)

restart()

restart all the monitors.

Source code in hostess/profilers.py
89
90
91
92
def restart(self):
    """restart all the monitors."""
    for v in self.monitors.values():
        v.restart()

start()

start all the monitors.

Source code in hostess/profilers.py
79
80
81
82
def start(self):
    """start all the monitors."""
    for v in self.monitors.values():
        v.start()

RefAlarm

Simple reference / dereference alarm.

Source code in hostess/profilers.py
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
class RefAlarm:
    """Simple reference / dereference alarm."""

    def __init__(
        self,
        getstack: bool = False,
        verbosity: Literal["warn", "print", "quiet"] = "print",
        warn_new: bool = False,
        ignore_dunder: bool = True,
    ):
        """
        Args:
            getstack: if True, check entire stack above frame that
                initializes self.context(); if False, just that frame
            verbosity: "warn" means issue RefAlarmWarnings; "print" means
                call `print`; "quiet" means no output (user must check
                `self.refcaches` to see results)
            warn_new: warn / print newly-assigned variables? does nothing
                if `verbosity == "quiet"`.
            ignore_dunder: ignore variables with "dunder" names?
        """
        self.verbosity = verbosity
        self.warn_new, self.getstack = warn_new, getstack
        self.ignore_dunder = ignore_dunder
        self.refcaches = defaultdict(list)

    def context(self, name: str = "default") -> _RefAlarmContext:
        """
        Produce a context manager related to this object.

        Args:
            name: name for this context, used for verbose reports and
                `refcache` keys

        Returns:
            A `_RefAlarmContext` suitable for use in a `with` statement.
        """
        return _RefAlarmContext(self, name, self.getstack, self.ignore_dunder)

    def receive_context_report(self, results: list[dict], name: str):
        """
        receive a context report. called by `_RefAlarmContexts` produced
        by this object's `context()` method.

        Args:
            results: `results` object from `_RefAlarmContext.__exit__()`
            name: name of context
        """
        self.refcaches[name].append(results)
        if self.verbosity == "quiet":
            return
        if self.verbosity == "print":
            printer = print
        else:
            printer = partial(warnings.warn, category=RefAlarmWarning)
        pre = "" if name == "default" else f"{name}: "
        for i, r in enumerate(results):
            fname, mismatches = r["name"], r["mismatches"]
            title = f"{i} ({fname})"
            for k, v in mismatches.items():
                if v == "new":
                    if self.warn_new is False:
                        continue
                    printer(f"{pre}{title}: {k} is new")
                elif v == "missing":
                    printer(f"{pre}{title}: {k} is missing")
                else:
                    printer(f"{pre}{title}: {k} refcount changed by {v}")

__init__(getstack=False, verbosity='print', warn_new=False, ignore_dunder=True)

Parameters:

Name Type Description Default
getstack bool

if True, check entire stack above frame that initializes self.context(); if False, just that frame

False
verbosity Literal['warn', 'print', 'quiet']

"warn" means issue RefAlarmWarnings; "print" means call print; "quiet" means no output (user must check self.refcaches to see results)

'print'
warn_new bool

warn / print newly-assigned variables? does nothing if verbosity == "quiet".

False
ignore_dunder bool

ignore variables with "dunder" names?

True
Source code in hostess/profilers.py
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
def __init__(
    self,
    getstack: bool = False,
    verbosity: Literal["warn", "print", "quiet"] = "print",
    warn_new: bool = False,
    ignore_dunder: bool = True,
):
    """
    Args:
        getstack: if True, check entire stack above frame that
            initializes self.context(); if False, just that frame
        verbosity: "warn" means issue RefAlarmWarnings; "print" means
            call `print`; "quiet" means no output (user must check
            `self.refcaches` to see results)
        warn_new: warn / print newly-assigned variables? does nothing
            if `verbosity == "quiet"`.
        ignore_dunder: ignore variables with "dunder" names?
    """
    self.verbosity = verbosity
    self.warn_new, self.getstack = warn_new, getstack
    self.ignore_dunder = ignore_dunder
    self.refcaches = defaultdict(list)

context(name='default')

Produce a context manager related to this object.

Parameters:

Name Type Description Default
name str

name for this context, used for verbose reports and refcache keys

'default'

Returns:

Type Description
_RefAlarmContext

A _RefAlarmContext suitable for use in a with statement.

Source code in hostess/profilers.py
825
826
827
828
829
830
831
832
833
834
835
836
def context(self, name: str = "default") -> _RefAlarmContext:
    """
    Produce a context manager related to this object.

    Args:
        name: name for this context, used for verbose reports and
            `refcache` keys

    Returns:
        A `_RefAlarmContext` suitable for use in a `with` statement.
    """
    return _RefAlarmContext(self, name, self.getstack, self.ignore_dunder)

receive_context_report(results, name)

receive a context report. called by _RefAlarmContexts produced by this object's context() method.

Parameters:

Name Type Description Default
results list[dict]

results object from _RefAlarmContext.__exit__()

required
name str

name of context

required
Source code in hostess/profilers.py
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
def receive_context_report(self, results: list[dict], name: str):
    """
    receive a context report. called by `_RefAlarmContexts` produced
    by this object's `context()` method.

    Args:
        results: `results` object from `_RefAlarmContext.__exit__()`
        name: name of context
    """
    self.refcaches[name].append(results)
    if self.verbosity == "quiet":
        return
    if self.verbosity == "print":
        printer = print
    else:
        printer = partial(warnings.warn, category=RefAlarmWarning)
    pre = "" if name == "default" else f"{name}: "
    for i, r in enumerate(results):
        fname, mismatches = r["name"], r["mismatches"]
        title = f"{i} ({fname})"
        for k, v in mismatches.items():
            if v == "new":
                if self.warn_new is False:
                    continue
                printer(f"{pre}{title}: {k} is new")
            elif v == "missing":
                printer(f"{pre}{title}: {k} is missing")
            else:
                printer(f"{pre}{title}: {k} refcount changed by {v}")

RefAlarmWarning

Bases: UserWarning

warning issued by RefAlarms with "warn" verbosity

Source code in hostess/profilers.py
753
754
class RefAlarmWarning(UserWarning):
    """warning issued by `RefAlarms` with "warn" `verbosity`"""

_RefAlarmContext

context manager for reference counting. should be initialized only by RefAlarm.context().

Source code in hostess/profilers.py
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
class _RefAlarmContext:
    """
    context manager for reference counting. should be initialized only by
    RefAlarm.context().
    """

    def __init__(
        self,
        refalarm: RefAlarm,
        name: str,
        getstack: bool = False,
        ignore_dunder: bool = True,
    ):
        self.refalarm, self.name = refalarm, name
        self.getstack = getstack
        self.refcache, self.ignore_dunder = None, ignore_dunder

    def __enter__(self):
        # drop refs from this frame
        self.refcache = refcounts(self.getstack, 2)

    def __exit__(self, *_):
        results = []
        # avoid, paranoiacally, calling this inside an expression,
        # and also drop refs from this frame
        counts = refcounts(self.getstack, 2)
        # if the size of the stack changed, something weird happened and
        # we cannot produce accurate results
        if len(self.refcache) != len(counts):
            warnings.warn("stack changed during counting, bailing out")
            return
        for old, new in zip(self.refcache, counts):
            oldname, oldcounts = old["name"], old["counts"]
            newname, newcounts = new["name"], new["counts"]
            # if the code name of a frame changed, something weird happened
            # and we cannot produce accurate results
            if oldname != newname:
                warnings.warn("stack changed during counting, bailing out")
                return
            mismatches = {}
            for k in set(oldcounts.keys()).union(newcounts.keys()):
                if self.ignore_dunder is True and k.startswith("__"):
                    continue
                if k not in oldcounts.keys():
                    mismatches[k] = "new"
                elif k not in newcounts.keys():
                    mismatches[k] = "missing"
                elif (refdiff := newcounts[k] - oldcounts[k]) != 0:
                    mismatches[k] = refdiff
            results.append({"name": newname, "mismatches": mismatches})
        self.refalarm.receive_context_report(results, self.name)

_add_varnames(obj, sdict, rec, scopename)

helper function for yclept(). creates records describing the variables in a given scope.

Source code in hostess/profilers.py
419
420
421
422
423
424
425
426
427
def _add_varnames(obj, sdict, rec, scopename):
    """
    helper function for yclept(). creates records describing the variables in
    a given scope.
    """
    for varname, reference in sdict.items():
        if obj is reference:
            rec["names"].append(varname)
            rec["scopes"].append(scopename)

_filter_history(refs, globals_, lids)

helper function for analyze_references(). attempts to remove references to ipython/jupyter history variables.

Parameters:

Name Type Description Default
refs list

list of reference objects

required
globals_ Optional[dict]

optional specified globals dictionary. if not given, uses the globals of the parent frame of the caller.

required
lids Collection[int]

ids of all known copies of other frames' locals dicts.

required

Returns:

Type Description
list

filtered list of references.

Source code in hostess/profilers.py
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
def _filter_history(
    refs: list, globals_: Optional[dict], lids: Collection[int]
) -> list:
    """
    helper function for analyze_references(). attempts to remove references to
    ipython/jupyter history variables.

    Args:
        refs: list of reference objects
        globals_: optional specified globals dictionary. if not given, uses
            the globals of the parent frame of the caller.
        lids: ids of all known copies of other frames' locals dicts.

    Returns:
        filtered list of references.
    """
    if globals_ is None:
        globals_ = currentframe().f_back.f_back.f_globals
    outrefs, hfilt = [], history_filter(globals_)
    for ref in refs:
        if history_filter(globals_):
            outrefs.append(ref)
        elif id(ref) in lids:
            ref.clear()
    return outrefs

_filter_ids(refs, permit, exclude, lids)

helper function for analyze_references(). provides blocklist/allowlist behavior based on object ids.

Parameters:

Name Type Description Default
refs list

list of reference objects

required
permit Collection[int]

list of allowed ids. if any are given, this functions as a strict allowlist.

required
exclude Collection[int]

list of excluded ids

required
lids Collection[int]

ids of all known copies of other frames' locals dicts

required

Returns:

Name Type Description
filtered_refs list[Any]

filtered list of references

refnoms list[Refnom]

Refnom dicts for each member of filtered_refs

Source code in hostess/profilers.py
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
def _filter_ids(
    refs: list,
    permit: Collection[int],
    exclude: Collection[int],
    lids: Collection[int],
) -> tuple[list[Any], list[Refnom]]:
    """
    helper function for analyze_references(). provides blocklist/allowlist
    behavior based on object ids.

    Args:
        refs: list of reference objects
        permit: list of allowed ids. if any are given, this functions as a
            strict allowlist.
        exclude: list of excluded ids
        lids: ids of all known copies of other frames' locals dicts

    Returns:
        filtered_refs: filtered list of references
        refnoms: Refnom dicts for each member of filtered_refs
    """
    outrefs, refnoms = [], []
    for ref in refs:
        try:
            assert id(ref) not in exclude
            assert (len(permit) == 0) or (id(ref) in permit)
            outrefs.append(ref)
            refnoms.append(yclept(ref, stepback=2))
        except AssertionError:
            if id(ref) in lids:
                ref.clear()
    return refnoms, outrefs

_filter_types(refs, permit, exclude, lids)

helper function for analyze_references(). provides blocklist/allowlist behavior based on object types.

Parameters:

Name Type Description Default
refs list

list of referencing objects

required
permit Collection[type]

types to explicitly permit. if there is at least one type, this functions as a strict allowlist.

required
exclude Collection[type]

types to explicitly exclude.

required
lids Collection[int]

ids of all known copies of other frames' locals dicts.

required

Returns:

Type Description
list

filtered list of references.

Source code in hostess/profilers.py
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
def _filter_types(
    refs: list,
    permit: Collection[type],
    exclude: Collection[type],
    lids: Collection[int],
) -> list:
    """
    helper function for analyze_references(). provides blocklist/allowlist
    behavior based on object types.

    Args:
        refs: list of referencing objects
        permit: types to explicitly permit. if there is at least one type,
            this functions as a strict allowlist.
        exclude: types to explicitly exclude.
        lids: ids of all known copies of other frames' `locals` dicts.

    Returns:
        filtered list of references.
    """
    outrefs = []
    for ref in refs:
        reftype = type(ref)
        try:
            assert reftype not in exclude
            assert not (len(permit) > 0 and (reftype not in permit))
            outrefs.append(ref)
        except AssertionError:
            if id(ref) in lids:
                ref.clear()
    return outrefs

_get_referencing_scopedicts(obj, existing_ids)

check for obj in the globals and locals dicts of all stack frames above the caller's. return all dicts in which obj has a name or names.

Parameters:

Name Type Description Default
obj Any

object to check for

required
existing_ids Collection[int]

ids of scopedicts that should be ignored

required

Returns:

Type Description
list[dict]

list of locals and globals dicts that reference obj

Source code in hostess/profilers.py
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
def _get_referencing_scopedicts(
    obj: Any, existing_ids: Collection[int]
) -> list[dict]:
    """
    check for `obj` in the globals and locals dicts of all stack frames above
    the caller's. return all dicts in which obj has a name or names.

    Args:
        obj: object to check for
        existing_ids: ids of scopedicts that should be ignored

    Returns:
        list of locals and globals dicts that reference obj
    """
    outscopes = []
    # if you do NOT slice the stack to -2, it will create a reference cycle
    # from the local namespace of the caller to itself, preventing it from ever
    # being garbage collected.
    for scopedict in chain(*[scopedicts(s.frame) for s in stack()[:-2]]):
        if id(scopedict) in existing_ids:
            continue
        if id(obj) in map(id, scopedict.values()):
            outscopes.append(scopedict)
    return outscopes

_maybe_release_locals(localdict, frame)

Possibly purge a dictionary, depending on the name of frame's code.

Tedious description of technical rationale:

the locals dict of the top-level module frame is the same as its globals dict. retrieving it from a frame gives us the actual globals dict, not a copy of it. we do NOT want to casually delete all members of the top-level module while pretending to merely inspect it.

conversely, locals dicts retrieved from lower frames are only copies. modifying them will not affect the actual namespaces of those frames. HOWEVER, references to everything in those copies will hang around forever until that frame fully dies, which, in most programs, will badly confuse the Python garbage collector and cause horrible memory leaks. clearing the copies is the only reliable way to prevent that from happening.

Parameters:

Name Type Description Default
localdict MutableMapping

a dict that might be a copy of frame's locals, or might be an actual view into its locals

required
frame FrameType

frame localdict came from

required

Returns:

Type Description
bool

True if we cleared localdict,, False we didn't

Note

Always skipped in Python>=3.13, as the introduction of FrameLocalsProxy makes it unnecessary.

Source code in hostess/profilers.py
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
def _maybe_release_locals(localdict: MutableMapping, frame: FrameType) -> bool:
    """
    Possibly purge a dictionary, depending on the name of `frame`'s code.

    Tedious description of technical rationale:

    the `locals` dict of the top-level module frame is the same as its
    `globals` dict. retrieving it from a frame gives us the _actual_ `globals`
    `dict`, not a copy of it. we do NOT want to casually delete all members
    of the top-level module while pretending to merely inspect it.

    conversely, `locals` `dicts` retrieved from lower frames are only copies.
    modifying them will not affect the actual namespaces of those frames.
    HOWEVER, references to everything in those copies will hang around forever
    until _that_ frame fully dies, which, in most programs, will badly confuse
    the Python garbage collector and cause horrible memory leaks. clearing the
    copies is the only reliable way to prevent that from happening.

    Args:
        localdict: a dict that might be a copy of `frame`'s locals, or might
            be an actual view into its locals
        frame: frame `localdict` came from

    Returns:
        True if we cleared `localdict`,, False we didn't

    Note:
        Always skipped in Python>=3.13, as the introduction of FrameLocalsProxy
            makes it unnecessary.
    """
    if frame.f_code.co_name != "<module>":
        if hasattr(localdict, "clear"):
            localdict.clear()
            return True
    return False

_yclept_framerec(frame)

return terse information about a frame's name and contents. for yclept.

Source code in hostess/profilers.py
392
393
394
395
396
397
398
399
400
401
def _yclept_framerec(frame: FrameType):
    """
    return terse information about a frame's name and contents. for yclept.
    """
    return {
        "co_names": frame.f_code.co_names,
        "func": frame.f_code.co_name,
        "qual": frame.f_code.co_qualname,
        "varnames": frame.f_code.co_varnames,
    }

analyze_references(obj, method, *, filter_primitive=True, filter_history=True, filter_scopedict=True, filter_reflexive=True, exclude_ids=frozenset(), exclude_types=frozenset(), permit_ids=frozenset(), permit_types=frozenset(), globals_=None, return_objects=False)

analyze 'references' to or from obj. designed for, but not limited to, analyzing references tracked by the Python garbage collector.

careful use is required to avoid memory leaks

TAKE SPECIAL CARE WHEN DECORATING THIS FUNCTION OR CALLING IT FROM A LAMBDA FUNCTION OR GENERATOR EXPRESSION, NO MATTER HOW HARMLESS- LOOKING. These operations may add references that are difficult to recognize or interpret. Calls that do not add context are much safer.

Notes
  1. All 'exclude', 'permit', and 'filter' operations are implicitly connected by boolean AND. Represented as a predicate:

    (~PRIMITIVE(REF) | ~FILTER_PRMITIVE)
    & (~HISTORY(REF) | ~FILTER_HISTORY)
    & (~SCOPEDICT(REF) | ~FILTER_SCOPEDICT)
    & ((ID(REF) != ID(OBJ)) | ~FILTER_REFLEXIVE)
    & ~(ID(REF) ∈ EXCLUDE_IDS)
    & (ID(REF) ∈ PERMIT_IDS | PERMIT_IDS = ∅)
    & ~(TYPE(REF) ∈ EXCLUDE_TYPES)
    & (TYPE(REF) ∈ PERMIT_TYPES | PERMIT_TYPES = ∅)
    
  2. References from obj to itself are never included. This may change in the future.

  3. This function is only completely compatible with CPython.

Parameters:

Name Type Description Default
obj Any

object of referential analysis

required
method Callable[[Any], Collection]

Function whose return values define 'references' of obj. gc.get_referents and gc.get_referrers are the intended and tested values.

required
filter_primitive bool

ignore 'primitive' (str, bool, &c) objects?

True
filter_history bool

attempt to ignore ipython 'history' objects? filter_scopedict: ignore direct references to the locals, globals, and builtins dicts of all frames in stack (not the values of these dictionaries?)

True
filter_reflexive bool

ignore references from obj to itself?

True
exclude_ids Collection[int]

denylist of reference ids.

frozenset()
exclude_types Collection[type]

denylist of reference types.

frozenset()
permit_ids Collection[int]

allowlist of reference ids.

frozenset()
permit_types Collection[type]

allowlist of reference types.

frozenset()
return_objects bool

return objects in set of references, or only descriptions of those objects?

False
globals_ Optional[dict[str, Any]]

optional dictionary of globals to use in filtering. currently only used in history filtering. If this argument is None, history filtering uses the globals of the calling frame.

None
Source code in hostess/profilers.py
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
def analyze_references(
    obj: Any,
    method: Callable[[Any], Collection],
    *,
    filter_primitive: bool = True,
    filter_history: bool = True,
    filter_scopedict: bool = True,
    filter_reflexive: bool = True,
    exclude_ids: Collection[int] = frozenset(),
    exclude_types: Collection[type] = frozenset(),
    permit_ids: Collection[int] = frozenset(),
    permit_types: Collection[type] = frozenset(),
    globals_: Optional[dict[str, Any]] = None,
    return_objects: bool = False,
) -> Union[tuple[list[Refnom], list[Any]], list[Refnom]]:
    """
    analyze 'references' to or from obj. designed for, but not limited to,
    analyzing references tracked by the Python garbage collector.

    Danger: careful use is required to avoid memory leaks
        TAKE SPECIAL CARE WHEN DECORATING THIS FUNCTION OR CALLING IT FROM
        A LAMBDA FUNCTION OR GENERATOR EXPRESSION, NO MATTER HOW HARMLESS-
        LOOKING. These operations may add references that are difficult to
        recognize or interpret. Calls that do not add context are much safer.

    Notes:
        1. All 'exclude', 'permit', and 'filter' operations are implicitly
            connected by boolean AND. Represented as a predicate:

                (~PRIMITIVE(REF) | ~FILTER_PRMITIVE)
                & (~HISTORY(REF) | ~FILTER_HISTORY)
                & (~SCOPEDICT(REF) | ~FILTER_SCOPEDICT)
                & ((ID(REF) != ID(OBJ)) | ~FILTER_REFLEXIVE)
                & ~(ID(REF) ∈ EXCLUDE_IDS)
                & (ID(REF) ∈ PERMIT_IDS | PERMIT_IDS = ∅)
                & ~(TYPE(REF) ∈ EXCLUDE_TYPES)
                & (TYPE(REF) ∈ PERMIT_TYPES | PERMIT_TYPES = ∅)

        2. References from obj to itself are never included. This may change
           in the future.
        3. This function is only completely compatible with CPython.


    Args:
        obj: object of referential analysis
        method: Function whose return values define 'references' of
            obj. gc.get_referents and gc.get_referrers are the intended and
            tested values.
        filter_primitive: ignore 'primitive' (str, bool, &c) objects?
        filter_history: attempt to ignore ipython 'history' objects?\
        filter_scopedict: ignore _direct_ references to the locals, globals,
            and builtins dicts of all frames in stack (_not_ the values of
            these dictionaries?)
        filter_reflexive: ignore references from obj to itself?
        exclude_ids: denylist of reference ids.
        exclude_types: denylist of reference types.
        permit_ids: allowlist of reference ids.
        permit_types: allowlist of reference types.
        return_objects: return objects in set of references, or only
            descriptions of those objects?
        globals_: optional dictionary of globals to use in filtering.
            currently only used in history filtering. If this argument is
            None, history filtering uses the globals of the calling frame.
    """
    refs = list(method(obj))
    objid = id(obj)
    exclude_types, permit_types = set(exclude_types), set(permit_types)
    del obj  # explicitly releasing obj clears reference from frame faster
    if filter_primitive is True:
        exclude_types.update(LITERAL_TYPES)
    # ensure we can always clear copies of locals dicts we might have received
    # from the method(obj) call
    sids, lids = scopedict_ids(getstack=True, distinguish_locals=True)
    # type exclusions are easier to perform here. id exclusions need to come
    # at the end of the function in order to filter the objects in this
    # namespace.
    if len(exclude_types) + len(permit_types) > 0:
        refs = _filter_types(refs, permit_types, exclude_types, lids)
    if filter_history is True:
        refs = _filter_history(refs, globals_, lids)
    exclude_ids = set(exclude_ids)
    if filter_scopedict is True:
        exclude_ids.update(sids)
    # the frame of this function, along with all objects in its namespace, are
    # always excluded from analysis
    exclude_ids.update(namespace_ids(include_frame_ids=True))
    # do this via the negative in case a copy of obj was hanging around here
    # somehow
    if filter_reflexive is False:
        exclude_ids.difference_update({objid})
    # TODO, maybe -- consider also allowing arguments to this function to be
    #  included in analysis
    refnoms, outrefs = _filter_ids(refs, permit_ids, exclude_ids, lids)
    if return_objects is True:
        return refnoms, outrefs
    return refnoms

def_lineno(obj)

Returns the line number where the object was defined, if available.

Source code in hostess/profilers.py
326
327
328
329
330
331
def def_lineno(obj) -> Optional[int]:
    """Returns the line number where the object was defined, if available."""
    try:
        return getsourcelines(obj)[1]
    except TypeError:
        return None

describe_frame_contents(frame=None)

describe the contents of a frame

Source code in hostess/profilers.py
373
374
375
376
377
378
379
380
381
382
383
384
def describe_frame_contents(frame=None):
    """describe the contents of a frame"""
    frame = frame if frame is not None else currentframe().f_back
    try:
        return {
            "filename": frame.f_code.co_filename,
            "lineno": frame.f_lineno,
            "name": frame.f_code.co_name,
            "locals": valmap(identify, frame.f_locals),
        }
    finally:
        _maybe_release_locals(frame.f_locals, frame)

describe_stack_contents()

describe the contents of the stack

Source code in hostess/profilers.py
387
388
389
def describe_stack_contents():
    """describe the contents of the stack"""
    return tuple(map(describe_frame_contents, [s[0] for s in stack()]))

di(obj_id)

backwards id. Use with care! Can segfault.

Parameters:

Name Type Description Default
obj_id int

id of desired object, as returned by id(obj).

required

Returns:

Type Description
Any

Object corresponding to obj_id.

Source code in hostess/profilers.py
725
726
727
728
729
730
731
732
733
734
735
def di(obj_id: int) -> Any:
    """
    backwards `id`. Use with care! Can segfault.

    Args:
        obj_id: id of desired object, as returned by `id(obj)`.

    Returns:
        Object corresponding to `obj_id`.
    """
    return _ctypes.PyObj_FromPtr(obj_id)

history_filter(glb)

generate a predicate function that attempts to filter jupyter/ipython history-related objects in the context of a particular global namespace.

Parameters:

Name Type Description Default
glb dict[str, Any]

relevant globals dict

required

Returns:

Type Description
Callable[[Any], bool]

function that returns False if its single argument looks like it's an ipython/jupyter history-related object or internal, and True if not.

Source code in hostess/profilers.py
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
def history_filter(glb: dict[str, Any]) -> Callable[[Any], bool]:
    """
    generate a predicate function that attempts to filter jupyter/ipython
    history-related objects in the context of a particular global namespace.

    Args:
        glb: relevant globals dict

    Returns:
        function that returns False if its single argument looks like it's an
            ipython/jupyter history-related object or internal, and True if
            not.
    """

    def filterref(item):
        if item.__class__.__name__ == "ZMQShellDisplayHook":
            return False
        if item.__class__.__name__ == "ExecutionResult":
            return False
        try:
            globalname = next(filter(lambda kv: kv[1] is item, glb.items()))[0]
        except StopIteration:
            return True
        if re.match(r"_+(i{1,3})?\d?", globalname):
            return False
        if globalname in ("In", "Out", "_ih", "_oh", "_dh"):
            return False
        return True

    return filterref

identify(obj, maxlen=25, getsize=False)

identify an object.

Parameters:

Name Type Description Default
obj Any

object to identify

required
maxlen int

maximum length of string representation of obj in return

25
getsize bool

if True, attempt to determine the size of obj. may be slow or unreliable.

False

Returns:

Type Description
IdentifyResult

dict giving id, type, string representation (possibly truncated), size in MB (if requested), and, if available, name, qualname, and module, and line number.

Source code in hostess/profilers.py
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
def identify(
    obj: Any, maxlen: int = 25, getsize: bool = False
) -> IdentifyResult:
    """
    identify an object.

    Args:
        obj: object to identify
        maxlen: maximum length of string representation of `obj` in return
        getsize: if True, attempt to determine the size of `obj`. may be slow
            or unreliable.

    Returns:
        dict giving id, type, string
            representation (possibly truncated), size in MB (if requested),
            and, if available, __name__, __qualname__, and __module__, and
            line number.
    """
    identifiers = {
        "id": id(obj),
        "type": type(obj),
        "line": def_lineno(obj),
        "r": str(obj)[:maxlen],
    }
    if getsize is True:
        identifiers["size"] = mb(asizeof(obj), 2)
    for attr in ("__name__", "__qualname__", "__module__"):
        if hasattr(obj, attr):
            identifiers[attr] = getattr(obj, attr)
    return identifiers

lineno()

Returns the current line number in our program.

Source code in hostess/profilers.py
321
322
323
def lineno() -> int:
    """Returns the current line number in our program."""
    return currentframe().f_back.f_lineno

namespace_ids(frames=None, include_frame_ids=False)

find ids of all top-level objects in the combined namespace(s) of a frame or frames.

Source code in hostess/profilers.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
def namespace_ids(
    frames: Union[FrameType, Collection[FrameType], None] = None,
    include_frame_ids=False,
) -> set[int]:
    """
    find ids of all top-level objects in the combined namespace(s) of
    a frame or frames.
    """
    if frames is None:
        frames = [currentframe().f_back]
    ids = set()
    for frame in frames:
        localdict, globaldict, builtindict = scopedicts(frame)
        ids.update(chain(*map(val_ids, (localdict, globaldict, builtindict))))
        _maybe_release_locals(localdict, frame)
    if include_frame_ids is True:
        ids.update(map(id, frames))
    return ids

refcounts(getstack=False, stepback=1)

get refcounts of named variables in one or all frames in the stack.

Parameters:

Name Type Description Default
getstack bool

check all frames in the stack above the starting frame?

False
stepback int

number of frames to step back from the frame of this function before counting

1

Returns:

Type Description
list[dict[str, Union[str, dict[str, int]]]]

A list of dicts, one for each counted frame (always length 1 if getstack is False). Keys are 'name' (co_name of frame code) and 'refs' (dict whose keys are variable names and values are refcounts of those variables).

Source code in hostess/profilers.py
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
def refcounts(
    getstack: bool = False, stepback: int = 1
) -> list[dict[str, Union[str, dict[str, int]]]]:
    """
    get refcounts of named variables in one or all frames in the stack.

    Args:
        getstack: check all frames in the stack above the starting frame?
        stepback: number of frames to step back from the frame of this
            function before counting

    Returns:
        A list of dicts, one for each counted frame (always length 1 if
            `getstack` is `False`). Keys are 'name' (co_name of frame code)
            and 'refs' (dict whose keys are variable names and values are
            refcounts of those variables).
    """
    if getstack is True:
        # always ignore current frame
        frames = [s.frame for s in stack()[stepback:]]
    else:
        frame = currentframe()
        for i in range(stepback):
            frame = frame.f_back
        frames = [frame]
    framerefs = []
    from types import FrameType

    frame: FrameType
    for i, frame in enumerate(frames):
        # TODO: optional globals; need to handle name collisions somehow
        localdict, _, _ = scopedicts(frame)
        counts = val_refs(localdict)
        _maybe_release_locals(localdict, frame)
        framerefs.append({"name": frame.f_code.co_name, "counts": counts})
        del frame  # paranoia
    del frames  # paranoia
    return framerefs

scopedict_ids(frames=None, *, getstack=False, scopenames=('locals', 'globals', 'builtins'), distinguish_locals=True)

return ids of all 'scopedicts' (locals, globals, builtins) in frames (by default, just the caller's frame.) uses include: distinguishing references held by namespaces from references held by other objects; avoiding accidental 'direct' manipulation of namespaces.

Parameters:

Name Type Description Default
frames Union[FrameType, Collection[FrameType], None]

a single frame, a collection of frames, or None. if None, get ids of scopedicts of the caller's frame.

None
getstack bool

if True, ignore the frames argument and instead look at all levels of the stack above the caller's frame.

False
scopenames Sequence[ScopeName]

names of scopes to fetch.

('locals', 'globals', 'builtins')
distinguish_locals bool

if True, return a tuple whose elements are: [0] all ids [1] just local-scope ids below top level

True
Source code in hostess/profilers.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
def scopedict_ids(
    frames: Union[FrameType, Collection[FrameType], None] = None,
    *,
    getstack: bool = False,
    scopenames: Sequence[ScopeName] = ("locals", "globals", "builtins"),
    distinguish_locals: bool = True,
):
    """
    return ids of all 'scopedicts' (locals, globals, builtins) in frames (by
    default, just the caller's frame.) uses include: distinguishing
    references held by namespaces from references held by other objects;
    avoiding accidental 'direct' manipulation of namespaces.

    Args:
        frames: a single frame, a collection of frames, or None. if None,
            get ids of scopedicts of the caller's frame.
        getstack: if True, ignore the frames argument and instead look at
            all levels of the stack above the caller's frame.
        scopenames: names of scopes to fetch.
        distinguish_locals: if True, return a tuple whose elements are:
            [0] all ids
            [1] just local-scope ids below top level
    """
    if getstack is True:
        frames = [s.frame for s in stack()[:-1]]
    frames = frames if frames is not None else [currentframe().f_back]
    ids, lids = set(), set()
    for frame in frames:
        _add_scopedict_ids(frame, ids, lids, scopenames)
    if distinguish_locals is True:
        return ids, lids
    return ids

scopedicts(frame, scopes=('locals', 'globals', 'builtins'))

fetch specified scopes from a frame and return them in a tuple. the elements of the tuple should be equivalent to the results of calling, e.g., locals() within the passed frame.

WARNING: caller is responsible for clearing references to locals, etc. calling this function with no cleanup deep in a call stack may lead to undesired dangling references.

Parameters:

Name Type Description Default
frame FrameType

frame (as generated by, e.g., inspect.currentframe()) from which to fetch scopes.

required
scopes Sequence[ScopeName]

names of scopes to fetch from frame.

('locals', 'globals', 'builtins')

Returns:

Type Description
tuple[dict, ...]

tuple of dictionaries representing the specified scopes of frame; their keys are variable names and their values are the associated objects.

Source code in hostess/profilers.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
def scopedicts(
    frame: FrameType,
    scopes: Sequence[ScopeName] = ("locals", "globals", "builtins"),
) -> tuple[dict, ...]:
    """
    fetch specified scopes from a frame and return them in a tuple. the
    elements of the tuple should be equivalent to the results of calling,
    e.g., locals() within the passed frame.

    WARNING: caller is responsible for clearing references to locals, etc.
    calling this function with no cleanup deep in a call stack may lead to
    undesired dangling references.

    Args:
        frame: frame (as generated by, e.g., inspect.currentframe()) from
            which to fetch scopes.
        scopes: names of scopes to fetch from frame.

    Returns:
        tuple of dictionaries representing the specified scopes of frame;
            their keys are variable names and their values are the associated
            objects.
    """
    outscopes = []
    for scope in scopes:
        outscopes.append(getattr(frame, f"f_{scope}"))
    return tuple(outscopes)

val_ids(mapping)

get ids of all values in mapping.

Parameters:

Name Type Description Default
mapping Mapping[Hashable, Any]

mapping whose values are the objects to id.

required

Returns:

Type Description
set[int]

set of ids of all objects in mapping's values.

Source code in hostess/profilers.py
200
201
202
203
204
205
206
207
208
209
210
def val_ids(mapping: Mapping[Hashable, Any]) -> set[int]:
    """
    get ids of all values in mapping.

    Args:
        mapping: mapping whose values are the objects to id.

    Returns:
        set of ids of all objects in mapping's values.
    """
    return set(map(id, mapping.values()))

val_refs(refmap)

produce dict containing refcounts of all values in refmap.

Parameters:

Name Type Description Default
refmap Mapping[str, Any]

dict to count.

required

Returns:

Type Description
dict[str, int]

dict whose keys are the same as refmap's and whose values are (sys.refcount(v) - 1) for the associated values of refmap.

Source code in hostess/profilers.py
738
739
740
741
742
743
744
745
746
747
748
749
750
def val_refs(refmap: Mapping[str, Any]) -> dict[str, int]:
    """
    produce dict containing refcounts of all values in refmap.

    Args:
        refmap: dict to count.

    Returns:
        `dict` whose keys are the same as `refmap`'s and whose
            values are (sys.refcount(v) - 1) for the associated
            values of `refmap`.
    """
    return {k: sys.getrefcount(v) - 1 for k, v in refmap.items()}

yclept(obj, terse=True, stepback=1)

Find basic identifiers for obj, along with any names for obj in all frames in stack, starting stepback frames back from the frame of this function.

Parameters:

Name Type Description Default
obj Any

object to name

required
terse bool

exclude extended information from output?

True
stepback int

how many frames to step back (from the frame of this function) before looking for obj?

1
Source code in hostess/profilers.py
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
def yclept(obj: Any, terse: bool = True, stepback: int = 1) -> Refnom:
    """
    Find basic identifiers for obj, along with any names for obj in all frames
    in stack, starting stepback frames back from the frame of this function.

    Args:
        obj: object to name
        terse: exclude extended information from output?
        stepback: how many frames to step back (from the frame of this
            function) before looking for obj?
    """
    nytta = []
    frame = currentframe()
    for _ in range(stepback):
        frame = frame.f_back
    while frame is not None:
        rec = _yclept_framerec(frame) | {"names": [], "scopes": []}
        if terse is True:
            rec = keyfilter(
                lambda k: k in ("func", "qual", "names", "scopes"), rec
            )
        localdict, globaldict, builtindict = scopedicts(frame)
        _add_varnames(obj, globaldict, rec, "globals")
        _add_varnames(obj, builtindict, rec, "builtins")

        if frame.f_code.co_name != "<module>":
            # don't bother adding redundant local varnames at top level
            _add_varnames(obj, localdict, rec, "locals")
            if hasattr(localdict, "clear"):
                localdict.clear()  # see _maybe_release_locals
        del globaldict, localdict, builtindict
        if len(rec["names"]) > 0:
            rec["names"] = tuple(rec["names"])
            rec["scopes"] = tuple(rec["scopes"])
            nytta.append(rec)
        frame = frame.f_back
    res = identify(obj, maxlen=55, getsize=False), nytta
    del obj  # explicitly releasing obj clears ref faster
    return res

serverpool

ServerPool

Abstraction for a pool of asynchronous workers with hostess-compatible interfaces. Intended primarily for distributing tasks across remote hosts. Many alternatives are more appropriate for local tasks, such as the Python Standard Library's concurrent.futures.ThreadPoolExecutor and multiprocessing.Pool.

Source code in hostess/serverpool.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
class ServerPool:
    """
    Abstraction for a pool of asynchronous workers with hostess-compatible
    interfaces. Intended primarily for distributing tasks across remote hosts.
    Many alternatives are more appropriate for local tasks, such as the Python
    Standard Library's `concurrent.futures.ThreadPoolExecutor` and
    `multiprocessing.Pool`.
    """

    def __init__(
        self,
        hosts: Sequence[Union["Instance", RunCommand]],
        max_concurrent: int = 1,
        poll: float = 0.03,
        task_delay: float | None = None
    ):
        """
        Args:
            hosts: An object that has at least one method that returns a
                `Viewer`, most likely a `hostess.aws.ec2.Instance` or
                `hostess.ssh.SSH`.
            max_concurrent: Maximum number of tasks a single host may run
                concurrently. The maximum number of threads spawned by this
                object is thus `max_concurrent * len(hosts)`, +1 for its
                polling thread.
            poll: Polling rate, in seconds, for checking pending/running tasks.
        """
        idattr = None
        for identifier in ("instance_id", "ip", "host"):
            if all(hasattr(h, identifier) for h in hosts):
                idattr = identifier
                break
        if idattr is None:
            raise TypeError("These do not appear to be appropriate hosts.")
        self.max_concurrent = max_concurrent
        self.hosts = {getattr(h, idattr): h for h in hosts}
        self.taskmap = {getattr(h, idattr): {} for h in hosts}
        self.idattr = idattr
        self.pending, self.completed = {}, {}
        self.completed_queue = Queue()
        self.closed, self.terminated = False, False
        self.pollthread, self.exc = None, ThreadPoolExecutor(1)
        self.task_ix, self.poll = 0, poll
        self.used = set()
        self.assignment_criteria = [self._meets_max_concurrent]
        self.task_delay = task_delay
        if self.task_delay is not None:
            self.assignment_criteria.append(self._meets_task_delay)

    def _meets_max_concurrent(self):
        return {
            i for i, t in self.taskmap.items() if len(t) < self.max_concurrent
        }

    def _meets_task_delay(self):
        now, met = time.time(), set()
        for i, tasks in self.taskmap.items():
            if len(tasks) == 0:
                met.add(i)
            elif min(
                now - t.start_timestamp for t in tasks.values()
            ) > self.task_delay:
                met.add(i)
        return met


    def _rectify_call(self, kwargs):
        """
        helper function for `self.apply()`. don't allow new tasks when closed;
        don't let callers forbid `Viewers` or disown processes.

        Raises:
            ValueError: if pool is closed.
        """
        if self.closed is True:
            raise ValueError("pool closed")
        if "_viewer" in kwargs and kwargs["_viewer"] is not True:
            kwargs.pop("_viewer")
        kwargs.pop('_disown', None)

    @property
    def available(self) -> dict[Hashable, Union["Instance", RunCommand]]:
        """
        Available hosts.

        Returns:
            `dict` of {host id: host} containing only non-busy hosts.
        """
        available = self.hosts
        for c in self.assignment_criteria:
            available = {i: h for i, h in available.items() if i in c()}
            if len(available) == 0:
                break
        return available


    @property
    def next_available(
        self
    ) -> Optional[tuple[Hashable, Union["Instance", RunCommand]]]:
        """
        First available host, if any, preferring hosts that have not recently
        been assigned a task.

        Returns:
            `tuple` of (host id, host), if one is available; None otherwise.
        """
        if len((ready := self.available)) == 0:
            return None
        if len(ready) == 1:
            return list(ready.items())[0]
        if len(self.used) == len(self.taskmap):
            self.used = set()
        options = list((i, r) for i, r in ready.items())
        filtered = list(o for o in options if o[0] not in self.used)
        if len(filtered) == 0:
            return options[0]
        self.used.add(filtered[0][0])
        return filtered[0]

    @property
    def running(self) -> tuple[Viewer]:
        """
        Returns:
            All currently-running tasks.
        """
        tasks = [
            [t for t in v.values() if t.running] for v in self.taskmap.values()
        ]
        # noinspection PyTypeChecker
        return tuple(chain(*tasks))

    def __poll_loop(self):
        """Process poll loop. Should only be called by `self.__start()`."""
        while True:
            rcount = 0
            for iid, tasks in self.taskmap.items():
                for tix, task in tuple(tasks.items()):
                    if task.done:
                        self.completed[tix] = tasks.pop(tix).get()
                        self.completed_queue.put(self.completed[tix])
                    elif self.terminated is True:
                        task.kill()
                    else:
                        rcount += 1
            # note that terminate() immediately sets pending to {}
            for tix in tuple(self.pending.keys()):
                if (id_host := self.next_available) is None:
                    continue
                self.taskmap[id_host[0]][
                    tix
                ] = ServerTask(id_host[1], *self.pending.pop(tix))
            if self.terminated is True or (
                (rcount + len(self.pending) == 0) and self.closed is True
            ):
                self.pollthread = None
                return
            time.sleep(self.poll)

    def __start(self):
        """
        Launch process polling loop, if necessary. Should only be called by
        `self.apply()`.
        """
        if self.pollthread is None:
            self.pollthread = self.exc.submit(self.__poll_loop)

    def apply(
        self,
        method: str,
        args: Sequence = (),
        kwargs: Mapping = None,
    ):
        """
        Submit a task to the host pool. Execute it immediately if a host is
        available; otherwise queue it for execution. Unlike some task pool
        methods of this type, `ServerPool.apply()` does not return a
        futurelike object, but instead relies on `ServerPool's` automated
        lifecycle management. `ServerPool` moves unexecuted tasks to
        `ServerPool.pending` as `tuples`; executed tasks to
        `ServerPool.taskmap` as futurelike `ServerTask` objects, and completed
        tasks to `ServerPool.completed` as `Viewers`.

        Args:
            method: Name of method of host to call with `args` and `kwargs`.
                This method must return a `Viewer`.
            args: Args to pass to the named method.
            kwargs: kwargs to pass to the named method.
        """
        kwargs = {} if kwargs is None else kwargs
        self._rectify_call(kwargs)
        self.__start()
        if (id_host := self.next_available) is None:
            self.pending[self.task_ix] = (method, args, kwargs)
        else:
            self.taskmap[id_host[0]][
                self.task_ix
            ] = ServerTask(id_host[1], method, args, kwargs)
        self.task_ix += 1

    def __str__(self):
        n_running = len(self.running)
        if self.terminated:
            infix = " (terminated) "
        elif self.closed:
            infix = " (closed) "
        else:
            infix = ""
        return (
            f"ServerPool{infix}: {len(self.taskmap)} hosts, {n_running} "
            f"running, {len(self.pending)} pending, {len(self.completed)} "
            f"completed"
        )

    def __repr__(self):
        return self.__str__()

    def close(self):
        """Close the pool, preventing submission of new tasks."""
        self.closed = True

    def join(self):
        """Block until all pending and running tasks are complete."""
        while self.pollthread is not None:
            time.sleep(self.poll)

    def terminate(self):
        """
        Terminate the `ServerPool`. This will cancel all pending tasks, kill
        all running tasks, and prevent submission of new tasks.
        """
        self.pending = {}
        self.closed, self.terminated = True, True
        self.exc.shutdown()

    def gather(self) -> list[Viewer]:
        """
        Block until all pending and running tasks are complete, then terminate
        self, then return the results of all completed tasks in a list. Useful
        for 'under-the-hood' uses of `ServerPool`.

        Returns:
            A `list` of `Viewers` for completed tasks, in order of submission.
        """
        self.join()
        output = [self.completed[i] for i in sorted(self.completed.keys())]
        self.terminate()
        return output

    def __del__(self):
        self.close()
        self.exc.shutdown()

available property

Available hosts.

Returns:

Type Description
dict[Hashable, Union[Instance, RunCommand]]

dict of {host id: host} containing only non-busy hosts.

next_available property

First available host, if any, preferring hosts that have not recently been assigned a task.

Returns:

Type Description
Optional[tuple[Hashable, Union[Instance, RunCommand]]]

tuple of (host id, host), if one is available; None otherwise.

running property

Returns:

Type Description
tuple[Viewer]

All currently-running tasks.

__init__(hosts, max_concurrent=1, poll=0.03, task_delay=None)

Parameters:

Name Type Description Default
hosts Sequence[Union[Instance, RunCommand]]

An object that has at least one method that returns a Viewer, most likely a hostess.aws.ec2.Instance or hostess.ssh.SSH.

required
max_concurrent int

Maximum number of tasks a single host may run concurrently. The maximum number of threads spawned by this object is thus max_concurrent * len(hosts), +1 for its polling thread.

1
poll float

Polling rate, in seconds, for checking pending/running tasks.

0.03
Source code in hostess/serverpool.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def __init__(
    self,
    hosts: Sequence[Union["Instance", RunCommand]],
    max_concurrent: int = 1,
    poll: float = 0.03,
    task_delay: float | None = None
):
    """
    Args:
        hosts: An object that has at least one method that returns a
            `Viewer`, most likely a `hostess.aws.ec2.Instance` or
            `hostess.ssh.SSH`.
        max_concurrent: Maximum number of tasks a single host may run
            concurrently. The maximum number of threads spawned by this
            object is thus `max_concurrent * len(hosts)`, +1 for its
            polling thread.
        poll: Polling rate, in seconds, for checking pending/running tasks.
    """
    idattr = None
    for identifier in ("instance_id", "ip", "host"):
        if all(hasattr(h, identifier) for h in hosts):
            idattr = identifier
            break
    if idattr is None:
        raise TypeError("These do not appear to be appropriate hosts.")
    self.max_concurrent = max_concurrent
    self.hosts = {getattr(h, idattr): h for h in hosts}
    self.taskmap = {getattr(h, idattr): {} for h in hosts}
    self.idattr = idattr
    self.pending, self.completed = {}, {}
    self.completed_queue = Queue()
    self.closed, self.terminated = False, False
    self.pollthread, self.exc = None, ThreadPoolExecutor(1)
    self.task_ix, self.poll = 0, poll
    self.used = set()
    self.assignment_criteria = [self._meets_max_concurrent]
    self.task_delay = task_delay
    if self.task_delay is not None:
        self.assignment_criteria.append(self._meets_task_delay)

__poll_loop()

Process poll loop. Should only be called by self.__start().

Source code in hostess/serverpool.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
def __poll_loop(self):
    """Process poll loop. Should only be called by `self.__start()`."""
    while True:
        rcount = 0
        for iid, tasks in self.taskmap.items():
            for tix, task in tuple(tasks.items()):
                if task.done:
                    self.completed[tix] = tasks.pop(tix).get()
                    self.completed_queue.put(self.completed[tix])
                elif self.terminated is True:
                    task.kill()
                else:
                    rcount += 1
        # note that terminate() immediately sets pending to {}
        for tix in tuple(self.pending.keys()):
            if (id_host := self.next_available) is None:
                continue
            self.taskmap[id_host[0]][
                tix
            ] = ServerTask(id_host[1], *self.pending.pop(tix))
        if self.terminated is True or (
            (rcount + len(self.pending) == 0) and self.closed is True
        ):
            self.pollthread = None
            return
        time.sleep(self.poll)

__start()

Launch process polling loop, if necessary. Should only be called by self.apply().

Source code in hostess/serverpool.py
272
273
274
275
276
277
278
def __start(self):
    """
    Launch process polling loop, if necessary. Should only be called by
    `self.apply()`.
    """
    if self.pollthread is None:
        self.pollthread = self.exc.submit(self.__poll_loop)

_rectify_call(kwargs)

helper function for self.apply(). don't allow new tasks when closed; don't let callers forbid Viewers or disown processes.

Raises:

Type Description
ValueError

if pool is closed.

Source code in hostess/serverpool.py
179
180
181
182
183
184
185
186
187
188
189
190
191
def _rectify_call(self, kwargs):
    """
    helper function for `self.apply()`. don't allow new tasks when closed;
    don't let callers forbid `Viewers` or disown processes.

    Raises:
        ValueError: if pool is closed.
    """
    if self.closed is True:
        raise ValueError("pool closed")
    if "_viewer" in kwargs and kwargs["_viewer"] is not True:
        kwargs.pop("_viewer")
    kwargs.pop('_disown', None)

apply(method, args=(), kwargs=None)

Submit a task to the host pool. Execute it immediately if a host is available; otherwise queue it for execution. Unlike some task pool methods of this type, ServerPool.apply() does not return a futurelike object, but instead relies on ServerPool's automated lifecycle management. ServerPool moves unexecuted tasks to ServerPool.pending as tuples; executed tasks to ServerPool.taskmap as futurelike ServerTask objects, and completed tasks to ServerPool.completed as Viewers.

Parameters:

Name Type Description Default
method str

Name of method of host to call with args and kwargs. This method must return a Viewer.

required
args Sequence

Args to pass to the named method.

()
kwargs Mapping

kwargs to pass to the named method.

None
Source code in hostess/serverpool.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
def apply(
    self,
    method: str,
    args: Sequence = (),
    kwargs: Mapping = None,
):
    """
    Submit a task to the host pool. Execute it immediately if a host is
    available; otherwise queue it for execution. Unlike some task pool
    methods of this type, `ServerPool.apply()` does not return a
    futurelike object, but instead relies on `ServerPool's` automated
    lifecycle management. `ServerPool` moves unexecuted tasks to
    `ServerPool.pending` as `tuples`; executed tasks to
    `ServerPool.taskmap` as futurelike `ServerTask` objects, and completed
    tasks to `ServerPool.completed` as `Viewers`.

    Args:
        method: Name of method of host to call with `args` and `kwargs`.
            This method must return a `Viewer`.
        args: Args to pass to the named method.
        kwargs: kwargs to pass to the named method.
    """
    kwargs = {} if kwargs is None else kwargs
    self._rectify_call(kwargs)
    self.__start()
    if (id_host := self.next_available) is None:
        self.pending[self.task_ix] = (method, args, kwargs)
    else:
        self.taskmap[id_host[0]][
            self.task_ix
        ] = ServerTask(id_host[1], method, args, kwargs)
    self.task_ix += 1

close()

Close the pool, preventing submission of new tasks.

Source code in hostess/serverpool.py
330
331
332
def close(self):
    """Close the pool, preventing submission of new tasks."""
    self.closed = True

gather()

Block until all pending and running tasks are complete, then terminate self, then return the results of all completed tasks in a list. Useful for 'under-the-hood' uses of ServerPool.

Returns:

Type Description
list[Viewer]

A list of Viewers for completed tasks, in order of submission.

Source code in hostess/serverpool.py
348
349
350
351
352
353
354
355
356
357
358
359
360
def gather(self) -> list[Viewer]:
    """
    Block until all pending and running tasks are complete, then terminate
    self, then return the results of all completed tasks in a list. Useful
    for 'under-the-hood' uses of `ServerPool`.

    Returns:
        A `list` of `Viewers` for completed tasks, in order of submission.
    """
    self.join()
    output = [self.completed[i] for i in sorted(self.completed.keys())]
    self.terminate()
    return output

join()

Block until all pending and running tasks are complete.

Source code in hostess/serverpool.py
334
335
336
337
def join(self):
    """Block until all pending and running tasks are complete."""
    while self.pollthread is not None:
        time.sleep(self.poll)

terminate()

Terminate the ServerPool. This will cancel all pending tasks, kill all running tasks, and prevent submission of new tasks.

Source code in hostess/serverpool.py
339
340
341
342
343
344
345
346
def terminate(self):
    """
    Terminate the `ServerPool`. This will cancel all pending tasks, kill
    all running tasks, and prevent submission of new tasks.
    """
    self.pending = {}
    self.closed, self.terminated = True, True
    self.exc.shutdown()

ServerTask

Simple future-like object. It is primarily intended to be instantiated by ServerPool as an abstraction for a process running on a remote host.

Source code in hostess/serverpool.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class ServerTask:
    """
    Simple future-like object. It is  primarily intended to be instantiated
    by `ServerPool` as an abstraction for a process running on a remote host.
    """

    def __init__(
        self,
        # TODO: some kind of type variable for this
        host: Union["Instance", RunCommand],
        method: str,
        args: Sequence,
        kwargs: Mapping,
        defer: bool = False,
        poll: float = 0.03,
    ):
        """
        Args:
            host: An object whose `method` attribute is a callable that returns
                a Viewer, most likely a `hostess.aws.ec2.Instance` or
                `hostess.ssh.SSH`.
            method: Name of a method of `host` that returns a `Viewer`.
            args: Args to pass to `host.method()`.
            kwargs: Kwargs to pass to `host.method()`.
            defer: If `True`, do not call `host.method()` on initialization.
            poll: Polling rate, in seconds, for `self.get()`.
        """
        self.instance, self.method = host, method
        self.args, self.kwargs = args, kwargs
        self.viewer, self.task, self.start_timestamp = None, None, None
        self.executed, self.poll, self.exception = False, poll, None
        if defer is False:
            self.run()

    def get(self) -> Union[Viewer, Exception]:
        """
        Block until `self.viewer` is finished.

        Returns:
            A Viewer to a finished process, or an Exception if `host.method()`
                raised one.

        Raises:
            ValueError: If this object has not yet executed its task.
            TypeError: If task execution failed.
        """
        if self.viewer is None:
            if self.executed is False:
                raise ValueError("Task not yet executed.")
            raise TypeError(
                "Execution failed. Check self.exception for details."
            )
        self.viewer.wait()
        return self.viewer

    def run(self):
        """Execute the task."""
        self.executed = True
        self.start_timestamp = time.time()
        try:
            viewer = getattr(
                self.instance, self.method
            )(*self.args, **self.kwargs)
        except KeyboardInterrupt:
            raise
        except Exception as ex:
            self.exception = ex
            raise ex
        if not isinstance(viewer, Viewer):
            self.exception = TypeError(
                f"host.method must return a Viewer; got {type(viewer)}."
            )
            raise self.exception
        self.viewer = viewer

    def kill(self):
        """Aliases self.viewer.kill(). Does nothing if self.viewer is None."""
        if self.viewer is not None:
            self.viewer.kill()

    @property
    def done(self) -> bool:
        """Aliases self.viewer.done. Always False if self.viewer is None."""
        return (self.viewer is not None) and self.viewer.done

    @property
    def running(self) -> bool:
        """Aliases self.viewer.running. Always False if self.viewer is None."""
        return (self.viewer is not None) and self.viewer.running

    @property
    def out(self) -> list:
        """Aliases self.viewer.out. Always [] if self.viewer is None."""
        return [] if self.viewer is None else self.viewer.out

    @property
    def err(self) -> list:
        """Aliases self.viewer.err. Always [] if self.viewer is None."""
        return [] if self.viewer is None else self.viewer.err

done property

Aliases self.viewer.done. Always False if self.viewer is None.

err property

Aliases self.viewer.err. Always [] if self.viewer is None.

out property

Aliases self.viewer.out. Always [] if self.viewer is None.

running property

Aliases self.viewer.running. Always False if self.viewer is None.

__init__(host, method, args, kwargs, defer=False, poll=0.03)

Parameters:

Name Type Description Default
host Union[Instance, RunCommand]

An object whose method attribute is a callable that returns a Viewer, most likely a hostess.aws.ec2.Instance or hostess.ssh.SSH.

required
method str

Name of a method of host that returns a Viewer.

required
args Sequence

Args to pass to host.method().

required
kwargs Mapping

Kwargs to pass to host.method().

required
defer bool

If True, do not call host.method() on initialization.

False
poll float

Polling rate, in seconds, for self.get().

0.03
Source code in hostess/serverpool.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def __init__(
    self,
    # TODO: some kind of type variable for this
    host: Union["Instance", RunCommand],
    method: str,
    args: Sequence,
    kwargs: Mapping,
    defer: bool = False,
    poll: float = 0.03,
):
    """
    Args:
        host: An object whose `method` attribute is a callable that returns
            a Viewer, most likely a `hostess.aws.ec2.Instance` or
            `hostess.ssh.SSH`.
        method: Name of a method of `host` that returns a `Viewer`.
        args: Args to pass to `host.method()`.
        kwargs: Kwargs to pass to `host.method()`.
        defer: If `True`, do not call `host.method()` on initialization.
        poll: Polling rate, in seconds, for `self.get()`.
    """
    self.instance, self.method = host, method
    self.args, self.kwargs = args, kwargs
    self.viewer, self.task, self.start_timestamp = None, None, None
    self.executed, self.poll, self.exception = False, poll, None
    if defer is False:
        self.run()

get()

Block until self.viewer is finished.

Returns:

Type Description
Union[Viewer, Exception]

A Viewer to a finished process, or an Exception if host.method() raised one.

Raises:

Type Description
ValueError

If this object has not yet executed its task.

TypeError

If task execution failed.

Source code in hostess/serverpool.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def get(self) -> Union[Viewer, Exception]:
    """
    Block until `self.viewer` is finished.

    Returns:
        A Viewer to a finished process, or an Exception if `host.method()`
            raised one.

    Raises:
        ValueError: If this object has not yet executed its task.
        TypeError: If task execution failed.
    """
    if self.viewer is None:
        if self.executed is False:
            raise ValueError("Task not yet executed.")
        raise TypeError(
            "Execution failed. Check self.exception for details."
        )
    self.viewer.wait()
    return self.viewer

kill()

Aliases self.viewer.kill(). Does nothing if self.viewer is None.

Source code in hostess/serverpool.py
86
87
88
89
def kill(self):
    """Aliases self.viewer.kill(). Does nothing if self.viewer is None."""
    if self.viewer is not None:
        self.viewer.kill()

run()

Execute the task.

Source code in hostess/serverpool.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def run(self):
    """Execute the task."""
    self.executed = True
    self.start_timestamp = time.time()
    try:
        viewer = getattr(
            self.instance, self.method
        )(*self.args, **self.kwargs)
    except KeyboardInterrupt:
        raise
    except Exception as ex:
        self.exception = ex
        raise ex
    if not isinstance(viewer, Viewer):
        self.exception = TypeError(
            f"host.method must return a Viewer; got {type(viewer)}."
        )
        raise self.exception
    self.viewer = viewer

shortcuts

shortcuts for composing shell commands. relies on some bournelike idioms, so output will likely function best in bash.

chain(cmds, op='then')

create a multi-part shell command.

Parameters:

Name Type Description Default
cmds Sequence[str]

commands to chain together.

required
op Literal['and', 'xor', 'then']

logical operator to chain them with.

'then'

Returns:

Type Description
str

multi-part shell command as a string.

Source code in hostess/shortcuts.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def chain(
    cmds: Sequence[str],
    op: Literal["and", "xor", "then"] = "then",
) -> str:
    """
    create a multi-part shell command.

    Args:
        cmds: commands to chain together.
        op: logical operator to chain them with.

    Returns:
        multi-part shell command as a string.
    """
    connector = {"and": "&&", "xor": "||", "then": ";"}[op]
    return f" {connector} ".join(cmds)

ssh_agent(key)

construct a shell command that starts an ssh-agent session and adds a keyfile.

Parameters:

Name Type Description Default
key str

path to SSH keyfile

required

Returns:

Type Description
str

string form of shell command.

Source code in hostess/shortcuts.py
74
75
76
77
78
79
80
81
82
83
84
85
def ssh_agent(key: str) -> str:
    """
    construct a shell command that starts an ssh-agent session and adds a
    keyfile.

    Args:
        key: path to SSH keyfile

    Returns:
        string form of shell command.
    """
    return chain(("eval `ssh-agent`", f"ssh-add {key}"))

sub(cmd)

shorthand for f"$({cmd})", i.e., instruction to run cmd in a subshell.

Parameters:

Name Type Description Default
cmd str

string form of shell command to wrap in subshell instruction

required

Returns:

Type Description
str

instruction to run cmd in a subshell

Source code in hostess/shortcuts.py
27
28
29
30
31
32
33
34
35
36
37
def sub(cmd: str) -> str:
    """
    shorthand for f"$({cmd})", i.e., instruction to run cmd in a subshell.

    Args:
        cmd: string form of shell command to wrap in subshell instruction

    Returns:
        instruction to run cmd in a subshell
    """
    return f"$({cmd})"

ternary(if_, then_, else_=None)

construct a bash command that serves as a ternary operator.

Parameters:

Name Type Description Default
if_ str

argument to the shell command "test" to use as predicate (see the manpage for test for details)

required
then_ str

shell command to run if predicate is truthy

required
else_ Optional[str]

shell command to run if predicate is falsy

None

Returns:

Type Description
str

string form of ternary shell command

Source code in hostess/shortcuts.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def ternary(if_: str, then_: str, else_: Optional[str] = None) -> str:
    """
    construct a bash command that serves as a ternary operator.

    Args:
        if_: argument to the shell command "test" to use as predicate
            (see the manpage for test for details)
        then_: shell command to run if predicate is truthy
        else_: shell command to run if predicate is falsy

    Returns:
          string form of ternary shell command
    """
    else_ = ":" if else_ is None else else_
    return f"if test {if_}  ; then {then_} ; else {else_} ; fi"

truthy(cmd)

construct a shell command that tests a predicate and echoes "True" if it's truthy and "False" if it's falsy. This is intended as an easy way to do tests in bash that output the string representation of a Python bool literal.

Parameters:

Name Type Description Default
cmd str

predicate to test. will be used as an argument to the shell command "test" (see manpage for test for details)

required

Returns:

Type Description
str

string form of truthiness-printing shell command

Source code in hostess/shortcuts.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def truthy(cmd: str) -> str:
    """
    construct a shell command that tests a predicate and echoes "True" if it's
    truthy and "False" if it's falsy. This is intended as an easy way to
    do tests in bash that output the string representation of a Python bool
    literal.

    Args:
        cmd: predicate to test. will be used as an argument to the shell
            command "test" (see manpage for test for details)

    Returns:
        string form of truthiness-printing shell command
    """
    return ternary(cmd, "echo True", "echo False")

ssh

NotebookConnection = tuple[str, Callable, dict, Processlike, Callable[[], None]] module-attribute

structure containing results of a tunneled Jupyter Notebook execution.

  1. URL for Jupyter server
  2. function that shuts down tunnel
  3. SSH tunnel metadata
  4. Jupyter execution process
  5. Callable for gracefully shutting down Notebook

SSH

Bases: RunCommand

callable interface to an SSH connection to a remote host. basically a wrapper for a fabric.connection.Connection object with additional functionality for managed command execution. NOTE: supports only keyfile authentication.

Examples:

>>> ssh = SSH.connect(
...    "1.11.11.111", 'remote_user', '/home/user/.ssh/keyfile.pem'
... )
>>> ssh("echo hi > a.txt")
>>> tail = ssh("tail -f a.txt")
>>> for n in range(5):
    ... ssh(f"echo {n} >> a.txt")
>>> print(','.join([s.strip() for s in tail.out]))
>>> ssh.con('ls -l / | grep dev')

expected output:

hi, 0, 1, 2, 3
drwxr-xr-x  15 root   root     3320 Nov 12 01:50 dev
Source code in hostess/ssh.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
class SSH(RunCommand):
    """
    callable interface to an SSH connection to a remote host. basically a
    wrapper for a fabric.connection.Connection object with additional
    functionality for managed command execution. NOTE: supports only keyfile
    authentication.

    Examples:
        >>> ssh = SSH.connect(
        ...    "1.11.11.111", 'remote_user', '/home/user/.ssh/keyfile.pem'
        ... )
        >>> ssh("echo hi > a.txt")
        >>> tail = ssh("tail -f a.txt")
        >>> for n in range(5):
            ... ssh(f"echo {n} >> a.txt")
        >>> print(','.join([s.strip() for s in tail.out]))
        >>> ssh.con('ls -l / | grep dev')

        expected output:

            hi, 0, 1, 2, 3
            drwxr-xr-x  15 root   root     3320 Nov 12 01:50 dev
    """

    def __init__(
        self,
        command: Optional[str] = None,
        conn: Optional[Connection] = None,
        key: Optional[str] = None,
        **kwargs: Union[int, float, str, bool],
    ):
        """
        Args:
            command: optional shell command to 'curry'  into this object. may
                be omitted if commands will be provided later, or if this
                particular object is not intended to execute commands.
            conn: Fabric `Connection` object
            key: path to keyfile; may be provided after instantiation, but
                must be provided before command is actually executed.
            **kwargs: RunCommand init kwargs (see RunCommand documentation)
        """
        if conn is None:
            raise TypeError("a Connection must be provided")
        super().__init__(command, conn, conn["runners"]["remote"], **kwargs)
        self.host, self.uname, self.key = conn.host, conn.user, key
        self.conn = conn  # effectively an alias for self.ctx
        self.tunnels: list[tuple[Callable, dict]] = []

    @classmethod
    def connect(
        cls, host: str, uname: str = GENERAL_DEFAULTS["uname"], key: str = None
    ) -> "SSH":
        """
        constructor that creates a connection to the remote host and uses it
        to instantiate the SSH object. convenient in cases when an appropriate
        `Connection` object does not already exist or should not be reused.

        Args:
            host: ip of remote host
            uname: user name on remote host
            key: path to keyfile

        Returns:
            an SSH object with a newly-generated `Connection`.
        """
        connect_kwargs = {"key_filename": key} if key is not None else {}
        conn = Connection(user=uname, host=host, connect_kwargs=connect_kwargs)
        ssh = object().__new__(cls)
        ssh.__init__(conn=conn, key=key)
        return ssh

    def put(
        self,
        source: Union[str, Path, IO, bytes],
        target: Union[str, Path],
        *args: Any,
        literal_str: bool = False,
        **kwargs: Any,
    ) -> dict:
        """
        write local file or object to target file on remote host.

        Args:
            source: filelike object or path to local file
            target: write path on remote host
            args: additional arguments to pass to underlying put method
            literal_str: if True and `source` is a `str`, write `source`
                into `target` as text rather than interpreting `source` as a
                path
            kwargs: additional kwargs to pass to underlying put command

        Returns:
            dict giving transfer metadata: local, remote, host, and port
        """
        if isinstance(source, str) and (literal_str is True):
            source = io.StringIO(source)
        if isinstance(source, bytes):
            source = io.BytesIO(source)
        elif not isinstance(source, (str, Path, io.StringIO, io.BytesIO)):
            raise TypeError("Source must be a string, Path, or IO.")
        return unpack_transfer_result(
            self.conn.put(source, target, *args, **kwargs)
        )

    def get(
        self,
        source: Union[str, Path],
        target: Union[str, Path, IO],
        *args: Any,
        **kwargs: Any,
    ) -> dict:
        """
        copy file from remote to local.

        Args:
            source: path to file on remote host
            target: path to local file, or a filelike object (such as
                io.BytesIO)
            *args: args to pass to underlying get method
            **kwargs: kwargs to pass to underlying get method

        Returns:
            dict giving transfer metadata: local, remote, host, and port
        """
        # TODO, maybe: try to improve speed, possibly by increasing chunksize.
        #  this may require backing transfers with something other than
        #  paramiko, which by default uses SFTP and includes dire warnings
        #  about packet sizes > 32kb. Always caching in-memory before writes
        #  to disk might also improve speed; requires testing.
        #  more radical options like actually creating an in-memory sshfs
        #  filesystem of some kind are also possibilities.
        return unpack_transfer_result(
            self.conn.get(str(source), target, *args, **kwargs)
        )

    def read(
        self,
        source: str,
        mode: Literal["r", "rb"] = "r",
        encoding: str = "utf-8",
        as_buffer: bool = False,
    ) -> Union[io.BytesIO, io.StringIO, bytes, str]:
        """
        read a file from the remote host directly into memory.

        Args:
            source: path to file on remote host.
            mode: 'r' to read file as text; 'rb' to read file as bytes
            encoding: encoding for text, used only if `mode` is 'r'
            as_buffer: if True, return BytesIO/StringIO; if False, return
                bytes/str

        Returns:
            contents of remote file as str, bytes, or IO
        """
        if mode not in ("r", "rb"):
            raise TypeError("mode must be 'r' or 'rb'")
        buffer = io.BytesIO()
        self.get(source, buffer)
        buffer.seek(0)
        if mode == "r":
            stringbuf = io.StringIO()
            stringbuf.write(buffer.read().decode(encoding))
            stringbuf.seek(0)
            buffer = stringbuf
        if as_buffer is True:
            return buffer
        return buffer.read()

    def read_csv(
        self,
        source: Union[str, Path],
        encoding: str = "utf-8",
        **csv_kwargs: Any,
    ) -> pd.DataFrame:
        """
        read a CSV-like file from the remote host into a pandas DataFrame.

        Args:
            source: path to CSV-like file on remote host
            encoding: encoding for text
            csv_kwargs: kwargs to pass to pd.read_csv

        Returns:
            DataFrame created from contents of remote CSV file
        """
        return pd.read_csv(
            self.read(str(source), "r", encoding, True), **csv_kwargs
        )

    def tunnel(self, local_port: int, remote_port: int):
        """
        create an SSH tunnel between a local port and a remote port; store an
        abstraction for the tunnel process, along with metadata about the
        tunnel, in self.tunnels.

        Args:
            local_port: port number for local end of tunnel.
            remote_port: port number for remote end of tunnel.
        """
        signaler, meta = open_tunnel(
            self.host, self.uname, self.key, local_port, remote_port
        )
        self.tunnels.append((signaler, meta))

    def __call__(
        self,
        *args: Union[int, float, str],
        _quiet: bool = True,
        _viewer: bool = True,
        _wait: bool = False,
        **kwargs: Union[int, float, str, bool],
    ) -> Processlike:
        """
        run a shell command in the remote host's default interpreter. See
        `RunCommand.__call__()` for details on calling conventions and options.

        Args:
            *args: args to use to construct the command.
            _viewer: if `True`, return a hostess `Viewer` object. otherwise
                return unwrapped Fabric `Result`.
            _wait: if `True`, block until command terminates (or connection
                fails). _w is an alias.
            _quiet: if `False`, print stdout and stderr, should the process
                return any before this function terminates. Generally best
                used with _wait=True.
            **kwargs: kwargs to pass to command execution. kwarg
                names beginning with '_' specify execution meta-parameters;
                others will be inserted directly into the command as `--`-type
                shell parameters.

        Returns:
            object representing executed process.
        """
        if (_w := kwargs.pop("_w", None)) is not None:
            _wait = _w
        result = super().__call__(*args, _viewer=_viewer, **kwargs)
        if _wait is True:
            result.wait()
        if _quiet is False:
            from rich import print as rp

            if len(result.stdout) > 0:
                rp(*result.stdout)
            if len(result.stderr) > 0:
                rp(*map(lambda t: f"[red]{t}[/red]", result.stderr))
        return result

    def con(
        self,
        *args: Union[int, float, str],
        _poll: float = 0.05,
        _timeout: Optional[float] = None,
        _return_viewer: bool = False,
        **kwargs: Union[int, float, str, bool],
    ) -> Optional[Viewer]:
        """
        pretend you are running a command on the remote host while looking at a
        terminal emulator. pauses for output and pretty-prints it to stdout.

        Does not return a process abstraction by default (pass
        _return_viewer=True if you want one). Fun in interactive environments.

        Only arguments unique to con() are described here; others are as
        SSH.__call__().

        Args:
            *args: additional args to pass to self.__call__.
            _poll: polling rate for process output, in seconds
            _timeout: if not None, raise a TimeoutError if this many seconds
                pass before receiving additional output from process (or
                process exit).
            _return_viewer: if True, return a Viewer for the process once it
                exits. Otherwise, return None.
            **kwargs: additional kwargs to pass to self.__call__.

        Returns:
            A Viewer if _return_viewer is True; otherwise None.
        """
        if kwargs.get("_viewer") is False:
            raise TypeError("Cannot call con() with _viewer=False")
        process = self(*args, _viewer=True, **kwargs)
        if _timeout is not None:
            waiting, unwait = timeout_factory(True, _timeout)
        else:
            waiting, unwait = zero, zero
        out_head, err_head = 0, 0
        try:
            while process.running:
                has_new_output, out_head, err_head = _move_print_heads(
                    err_head, out_head, process
                )
                if has_new_output is True:
                    unwait()
                else:
                    waiting()
                time.sleep(_poll)
            _move_print_heads(err_head, out_head, process)
        except KeyboardInterrupt:
            process.kill()
            print("^C")
        if _return_viewer is True:
            return process

    def close(self):
        for kill_signal, _meta in self.tunnels:
            kill_signal()
        if self.conn is not None:
            self.conn.close()

    def __str__(self):
        return f"{super().__str__()}\n{self.uname}@{self.host}"

    def __del__(self):
        self.close()

    conn = None

__call__(*args, _quiet=True, _viewer=True, _wait=False, **kwargs)

run a shell command in the remote host's default interpreter. See RunCommand.__call__() for details on calling conventions and options.

Parameters:

Name Type Description Default
*args Union[int, float, str]

args to use to construct the command.

()
_viewer bool

if True, return a hostess Viewer object. otherwise return unwrapped Fabric Result.

True
_wait bool

if True, block until command terminates (or connection fails). _w is an alias.

False
_quiet bool

if False, print stdout and stderr, should the process return any before this function terminates. Generally best used with _wait=True.

True
**kwargs Union[int, float, str, bool]

kwargs to pass to command execution. kwarg names beginning with '_' specify execution meta-parameters; others will be inserted directly into the command as ---type shell parameters.

{}

Returns:

Type Description
Processlike

object representing executed process.

Source code in hostess/ssh.py
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
def __call__(
    self,
    *args: Union[int, float, str],
    _quiet: bool = True,
    _viewer: bool = True,
    _wait: bool = False,
    **kwargs: Union[int, float, str, bool],
) -> Processlike:
    """
    run a shell command in the remote host's default interpreter. See
    `RunCommand.__call__()` for details on calling conventions and options.

    Args:
        *args: args to use to construct the command.
        _viewer: if `True`, return a hostess `Viewer` object. otherwise
            return unwrapped Fabric `Result`.
        _wait: if `True`, block until command terminates (or connection
            fails). _w is an alias.
        _quiet: if `False`, print stdout and stderr, should the process
            return any before this function terminates. Generally best
            used with _wait=True.
        **kwargs: kwargs to pass to command execution. kwarg
            names beginning with '_' specify execution meta-parameters;
            others will be inserted directly into the command as `--`-type
            shell parameters.

    Returns:
        object representing executed process.
    """
    if (_w := kwargs.pop("_w", None)) is not None:
        _wait = _w
    result = super().__call__(*args, _viewer=_viewer, **kwargs)
    if _wait is True:
        result.wait()
    if _quiet is False:
        from rich import print as rp

        if len(result.stdout) > 0:
            rp(*result.stdout)
        if len(result.stderr) > 0:
            rp(*map(lambda t: f"[red]{t}[/red]", result.stderr))
    return result

__init__(command=None, conn=None, key=None, **kwargs)

Parameters:

Name Type Description Default
command Optional[str]

optional shell command to 'curry' into this object. may be omitted if commands will be provided later, or if this particular object is not intended to execute commands.

None
conn Optional[Connection]

Fabric Connection object

None
key Optional[str]

path to keyfile; may be provided after instantiation, but must be provided before command is actually executed.

None
**kwargs Union[int, float, str, bool]

RunCommand init kwargs (see RunCommand documentation)

{}
Source code in hostess/ssh.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def __init__(
    self,
    command: Optional[str] = None,
    conn: Optional[Connection] = None,
    key: Optional[str] = None,
    **kwargs: Union[int, float, str, bool],
):
    """
    Args:
        command: optional shell command to 'curry'  into this object. may
            be omitted if commands will be provided later, or if this
            particular object is not intended to execute commands.
        conn: Fabric `Connection` object
        key: path to keyfile; may be provided after instantiation, but
            must be provided before command is actually executed.
        **kwargs: RunCommand init kwargs (see RunCommand documentation)
    """
    if conn is None:
        raise TypeError("a Connection must be provided")
    super().__init__(command, conn, conn["runners"]["remote"], **kwargs)
    self.host, self.uname, self.key = conn.host, conn.user, key
    self.conn = conn  # effectively an alias for self.ctx
    self.tunnels: list[tuple[Callable, dict]] = []

con(*args, _poll=0.05, _timeout=None, _return_viewer=False, **kwargs)

pretend you are running a command on the remote host while looking at a terminal emulator. pauses for output and pretty-prints it to stdout.

Does not return a process abstraction by default (pass _return_viewer=True if you want one). Fun in interactive environments.

Only arguments unique to con() are described here; others are as SSH.call().

Parameters:

Name Type Description Default
*args Union[int, float, str]

additional args to pass to self.call.

()
_poll float

polling rate for process output, in seconds

0.05
_timeout Optional[float]

if not None, raise a TimeoutError if this many seconds pass before receiving additional output from process (or process exit).

None
_return_viewer bool

if True, return a Viewer for the process once it exits. Otherwise, return None.

False
**kwargs Union[int, float, str, bool]

additional kwargs to pass to self.call.

{}

Returns:

Type Description
Optional[Viewer]

A Viewer if _return_viewer is True; otherwise None.

Source code in hostess/ssh.py
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
def con(
    self,
    *args: Union[int, float, str],
    _poll: float = 0.05,
    _timeout: Optional[float] = None,
    _return_viewer: bool = False,
    **kwargs: Union[int, float, str, bool],
) -> Optional[Viewer]:
    """
    pretend you are running a command on the remote host while looking at a
    terminal emulator. pauses for output and pretty-prints it to stdout.

    Does not return a process abstraction by default (pass
    _return_viewer=True if you want one). Fun in interactive environments.

    Only arguments unique to con() are described here; others are as
    SSH.__call__().

    Args:
        *args: additional args to pass to self.__call__.
        _poll: polling rate for process output, in seconds
        _timeout: if not None, raise a TimeoutError if this many seconds
            pass before receiving additional output from process (or
            process exit).
        _return_viewer: if True, return a Viewer for the process once it
            exits. Otherwise, return None.
        **kwargs: additional kwargs to pass to self.__call__.

    Returns:
        A Viewer if _return_viewer is True; otherwise None.
    """
    if kwargs.get("_viewer") is False:
        raise TypeError("Cannot call con() with _viewer=False")
    process = self(*args, _viewer=True, **kwargs)
    if _timeout is not None:
        waiting, unwait = timeout_factory(True, _timeout)
    else:
        waiting, unwait = zero, zero
    out_head, err_head = 0, 0
    try:
        while process.running:
            has_new_output, out_head, err_head = _move_print_heads(
                err_head, out_head, process
            )
            if has_new_output is True:
                unwait()
            else:
                waiting()
            time.sleep(_poll)
        _move_print_heads(err_head, out_head, process)
    except KeyboardInterrupt:
        process.kill()
        print("^C")
    if _return_viewer is True:
        return process

connect(host, uname=GENERAL_DEFAULTS['uname'], key=None) classmethod

constructor that creates a connection to the remote host and uses it to instantiate the SSH object. convenient in cases when an appropriate Connection object does not already exist or should not be reused.

Parameters:

Name Type Description Default
host str

ip of remote host

required
uname str

user name on remote host

GENERAL_DEFAULTS['uname']
key str

path to keyfile

None

Returns:

Type Description
SSH

an SSH object with a newly-generated Connection.

Source code in hostess/ssh.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
@classmethod
def connect(
    cls, host: str, uname: str = GENERAL_DEFAULTS["uname"], key: str = None
) -> "SSH":
    """
    constructor that creates a connection to the remote host and uses it
    to instantiate the SSH object. convenient in cases when an appropriate
    `Connection` object does not already exist or should not be reused.

    Args:
        host: ip of remote host
        uname: user name on remote host
        key: path to keyfile

    Returns:
        an SSH object with a newly-generated `Connection`.
    """
    connect_kwargs = {"key_filename": key} if key is not None else {}
    conn = Connection(user=uname, host=host, connect_kwargs=connect_kwargs)
    ssh = object().__new__(cls)
    ssh.__init__(conn=conn, key=key)
    return ssh

get(source, target, *args, **kwargs)

copy file from remote to local.

Parameters:

Name Type Description Default
source Union[str, Path]

path to file on remote host

required
target Union[str, Path, IO]

path to local file, or a filelike object (such as io.BytesIO)

required
*args Any

args to pass to underlying get method

()
**kwargs Any

kwargs to pass to underlying get method

{}

Returns:

Type Description
dict

dict giving transfer metadata: local, remote, host, and port

Source code in hostess/ssh.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
def get(
    self,
    source: Union[str, Path],
    target: Union[str, Path, IO],
    *args: Any,
    **kwargs: Any,
) -> dict:
    """
    copy file from remote to local.

    Args:
        source: path to file on remote host
        target: path to local file, or a filelike object (such as
            io.BytesIO)
        *args: args to pass to underlying get method
        **kwargs: kwargs to pass to underlying get method

    Returns:
        dict giving transfer metadata: local, remote, host, and port
    """
    # TODO, maybe: try to improve speed, possibly by increasing chunksize.
    #  this may require backing transfers with something other than
    #  paramiko, which by default uses SFTP and includes dire warnings
    #  about packet sizes > 32kb. Always caching in-memory before writes
    #  to disk might also improve speed; requires testing.
    #  more radical options like actually creating an in-memory sshfs
    #  filesystem of some kind are also possibilities.
    return unpack_transfer_result(
        self.conn.get(str(source), target, *args, **kwargs)
    )

put(source, target, *args, literal_str=False, **kwargs)

write local file or object to target file on remote host.

Parameters:

Name Type Description Default
source Union[str, Path, IO, bytes]

filelike object or path to local file

required
target Union[str, Path]

write path on remote host

required
args Any

additional arguments to pass to underlying put method

()
literal_str bool

if True and source is a str, write source into target as text rather than interpreting source as a path

False
kwargs Any

additional kwargs to pass to underlying put command

{}

Returns:

Type Description
dict

dict giving transfer metadata: local, remote, host, and port

Source code in hostess/ssh.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
def put(
    self,
    source: Union[str, Path, IO, bytes],
    target: Union[str, Path],
    *args: Any,
    literal_str: bool = False,
    **kwargs: Any,
) -> dict:
    """
    write local file or object to target file on remote host.

    Args:
        source: filelike object or path to local file
        target: write path on remote host
        args: additional arguments to pass to underlying put method
        literal_str: if True and `source` is a `str`, write `source`
            into `target` as text rather than interpreting `source` as a
            path
        kwargs: additional kwargs to pass to underlying put command

    Returns:
        dict giving transfer metadata: local, remote, host, and port
    """
    if isinstance(source, str) and (literal_str is True):
        source = io.StringIO(source)
    if isinstance(source, bytes):
        source = io.BytesIO(source)
    elif not isinstance(source, (str, Path, io.StringIO, io.BytesIO)):
        raise TypeError("Source must be a string, Path, or IO.")
    return unpack_transfer_result(
        self.conn.put(source, target, *args, **kwargs)
    )

read(source, mode='r', encoding='utf-8', as_buffer=False)

read a file from the remote host directly into memory.

Parameters:

Name Type Description Default
source str

path to file on remote host.

required
mode Literal['r', 'rb']

'r' to read file as text; 'rb' to read file as bytes

'r'
encoding str

encoding for text, used only if mode is 'r'

'utf-8'
as_buffer bool

if True, return BytesIO/StringIO; if False, return bytes/str

False

Returns:

Type Description
Union[BytesIO, StringIO, bytes, str]

contents of remote file as str, bytes, or IO

Source code in hostess/ssh.py
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
def read(
    self,
    source: str,
    mode: Literal["r", "rb"] = "r",
    encoding: str = "utf-8",
    as_buffer: bool = False,
) -> Union[io.BytesIO, io.StringIO, bytes, str]:
    """
    read a file from the remote host directly into memory.

    Args:
        source: path to file on remote host.
        mode: 'r' to read file as text; 'rb' to read file as bytes
        encoding: encoding for text, used only if `mode` is 'r'
        as_buffer: if True, return BytesIO/StringIO; if False, return
            bytes/str

    Returns:
        contents of remote file as str, bytes, or IO
    """
    if mode not in ("r", "rb"):
        raise TypeError("mode must be 'r' or 'rb'")
    buffer = io.BytesIO()
    self.get(source, buffer)
    buffer.seek(0)
    if mode == "r":
        stringbuf = io.StringIO()
        stringbuf.write(buffer.read().decode(encoding))
        stringbuf.seek(0)
        buffer = stringbuf
    if as_buffer is True:
        return buffer
    return buffer.read()

read_csv(source, encoding='utf-8', **csv_kwargs)

read a CSV-like file from the remote host into a pandas DataFrame.

Parameters:

Name Type Description Default
source Union[str, Path]

path to CSV-like file on remote host

required
encoding str

encoding for text

'utf-8'
csv_kwargs Any

kwargs to pass to pd.read_csv

{}

Returns:

Type Description
DataFrame

DataFrame created from contents of remote CSV file

Source code in hostess/ssh.py
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
def read_csv(
    self,
    source: Union[str, Path],
    encoding: str = "utf-8",
    **csv_kwargs: Any,
) -> pd.DataFrame:
    """
    read a CSV-like file from the remote host into a pandas DataFrame.

    Args:
        source: path to CSV-like file on remote host
        encoding: encoding for text
        csv_kwargs: kwargs to pass to pd.read_csv

    Returns:
        DataFrame created from contents of remote CSV file
    """
    return pd.read_csv(
        self.read(str(source), "r", encoding, True), **csv_kwargs
    )

tunnel(local_port, remote_port)

create an SSH tunnel between a local port and a remote port; store an abstraction for the tunnel process, along with metadata about the tunnel, in self.tunnels.

Parameters:

Name Type Description Default
local_port int

port number for local end of tunnel.

required
remote_port int

port number for remote end of tunnel.

required
Source code in hostess/ssh.py
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def tunnel(self, local_port: int, remote_port: int):
    """
    create an SSH tunnel between a local port and a remote port; store an
    abstraction for the tunnel process, along with metadata about the
    tunnel, in self.tunnels.

    Args:
        local_port: port number for local end of tunnel.
        remote_port: port number for remote end of tunnel.
    """
    signaler, meta = open_tunnel(
        self.host, self.uname, self.key, local_port, remote_port
    )
    self.tunnels.append((signaler, meta))

find_conda_env(cmd, env=None)

find location of a named conda environment. intended primarily for use on remote hosts.

Parameters:

Name Type Description Default
cmd RunCommand

instance of RunCommand or one of its subclasses; most likely an SSH instance.

required
env str

name of conda environment.

None

Returns:

Type Description
str

absolute path to root directory of conda environment.

Raises:

Type Description
FileNotFoundError

if environment cannot be found.

Source code in hostess/ssh.py
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
def find_conda_env(cmd: RunCommand, env: str = None) -> str:
    """
    find location of a named conda environment. intended primarily for use
    on remote hosts.

    Args:
        cmd: instance of RunCommand or one of its subclasses; most likely an
            SSH instance.
        env: name of conda environment.

    Returns:
        absolute path to root directory of conda environment.

    Raises:
        FileNotFoundError: if environment cannot be found.
    """
    env = "base" if env is None else env
    suffix = f"/envs/{env}" if env != "base" else ""
    try:
        cat = cmd(f"cat ~/.conda/environments.txt", _viewer=True)
        cat.wait()
        envs = "".join(cat.out).split("\n")
        if env == "base":
            return next(
                filter(lambda l: "envs" not in l and len(l) > 0, envs)
            ).strip()
        else:
            return next(filter(lambda l: suffix in l, envs)).strip()
    except (UnexpectedExit, StopIteration):
        pass
    getlines = cmd(
        short.chain(
            [short.truthy(f"-e {path}{suffix}") for path in CONDA_SEARCH_PATHS]
        ),
        _viewer=True,
    )
    getlines.wait()
    lines = getlines.out
    for line, path in zip(lines, CONDA_SEARCH_PATHS):
        if "True" in line:
            return f"{path}/{suffix}"
    raise FileNotFoundError("conda environment not found.")

find_ssh_key(keyname, paths=None)

look for private SSH keyfile.

Parameters:

Name Type Description Default
keyname str

full or partial name of keyfile

required
paths Optional[Collection[Union[str, Path]]]

paths in which to search for key file. if not specified, look in hostess.config.GENERAL_DEFAULTS['secrets_folders']

None

Returns:

Type Description
Union[Path, None]

path to keyfile

Raises:

Type Description
FileNotFoundError

if no key found

Source code in hostess/ssh.py
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
def find_ssh_key(
    keyname: str, paths: Optional[Collection[Union[str, Path]]] = None
) -> Union[Path, None]:
    """
    look for private SSH keyfile.

    Args:
        keyname: full or partial name of keyfile
        paths: paths in which to search for key file. if not specified, look
            in hostess.config.GENERAL_DEFAULTS['secrets_folders']

    Returns:
        path to keyfile

    Raises:
        FileNotFoundError: if no key found
    """
    checked = []
    if paths is None:
        paths = list(GENERAL_DEFAULTS["secrets_folders"]) + [os.getcwd()]
    for directory in filter(lambda p: p.exists(), map(Path, listify(paths))):
        # TODO: public key option
        try:
            matching_private_keys = filter(
                lambda x: "private key" in Magic().from_file(x),
                filter(lambda x: keyname in x.name, Path(directory).iterdir()),
            )
            return next(matching_private_keys)
        except StopIteration:
            checked.append(f"{directory}")
        except PermissionError:
            checked.append(f"(permission denied) {directory}")
    raise FileNotFoundError(f"Looked in: {'; '.join(checked)}")

get_jupyter_token(command, jupyter_executable, port)

Get the access token of a Jupyter server running on the specified port.

Parameters:

Name Type Description Default
command RunCommand

an instance of RunCommand or one of its subclasses, likely an SSH object.

required
jupyter_executable str

path to Jupyter executable.

required
port int

port on which Jupyter server is running.

required

Returns:

Type Description
str

the Jupyter server's access token.

Source code in hostess/ssh.py
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
def get_jupyter_token(
    command: RunCommand, jupyter_executable: str, port: int
) -> str:
    """
    Get the access token of a Jupyter server running on the specified port.

    Args:
        command: an instance of RunCommand or one of its subclasses, likely
            an SSH object.
        jupyter_executable: path to Jupyter executable.
        port: port on which Jupyter server is running.

    Returns:
        the Jupyter server's access token.
    """
    for attempt in range(5):
        try:
            jlister = command(f"{jupyter_executable} list", _viewer=True)
            jlister.wait()
            line = filtern(lambda l: str(port) in l, jlister.out)
            return re.search(TOKEN_PATTERN, line).group()
        except (StopIteration, AttributeError):
            time.sleep(0.1)
            continue
    raise ValueError(
        "Token not found. Notebook may not have started on correct port. "
    )

jupyter_connect(ssh, local_port=22222, remote_port=8888, env=None, get_token=True, kill_on_exit=False, working_directory=None, lab=False, **command_kwargs)

Launch a Jupyter server on a remote host over an SSH tunnel.

Parameters:

Name Type Description Default
ssh SSH

SSH object connected to remote host

required
local_port int

port number for local end of tunnel

22222
remote_port int

port number for remote Jupyter server / remote end of tunnel

8888
env Optional[str]

conda env from which to launch Jupyter server; if none is specified, use the remote host's default jupyter

None
get_token bool

get the access token from the server?

True
kill_on_exit bool

attempt to kill the Jupyter server when the jupyter_launch process terminates?

False
working_directory Optional[str]

working directory for jupyter server

None
lab bool

launch JupyterLab instead of Jupyter Notebook

False
**command_kwargs Union[int, str, bool]

additional kwargs to pass to jupyter notebook

{}

Returns:

Type Description
NotebookConnection

structure containing results of tunneled notebook execution, including a callable to terminate the Notebook and another to close the tunnel

Source code in hostess/ssh.py
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
def jupyter_connect(
    ssh: SSH,
    local_port: int = 22222,
    remote_port: int = 8888,
    env: Optional[str] = None,
    get_token: bool = True,
    kill_on_exit: bool = False,
    working_directory: Optional[str] = None,
    lab: bool = False,
    **command_kwargs: Union[int, str, bool],
) -> NotebookConnection:
    """
    Launch a Jupyter server on a remote host over an SSH tunnel.

    Args:
        ssh: `SSH` object connected to remote host
        local_port: port number for local end of tunnel
        remote_port: port number for remote Jupyter server / remote end of
            tunnel
        env: conda env from which to launch Jupyter server; if none is
            specified, use the remote host's default `jupyter`
        get_token: get the access token from the server?
        kill_on_exit: attempt to kill the Jupyter server when the
            `jupyter_launch` process terminates?
        working_directory: working directory for jupyter server
        lab: launch JupyterLab instead of Jupyter Notebook
        **command_kwargs: additional kwargs to pass to `jupyter notebook`

    Returns:
        structure containing results of tunneled notebook execution,
            including a callable to terminate the Notebook and another
            to close the tunnel
    """
    booktype = "notebook" if lab is False else "lab"
    if env is not None:
        jupyter = f"{find_conda_env(ssh, env)}" f"/bin/jupyter {booktype}"
    else:
        jupyter = f"jupyter {booktype}"
    stopper = stop_jupyter_factory(ssh, jupyter, remote_port)
    done = stopper if kill_on_exit is True else zero
    cmd = f"{jupyter} --port {remote_port} --no-browser"
    if working_directory is not None:
        cmd = f"cd {working_directory} && {cmd}"
    launch_process = ssh(cmd, _done=done, _bg=True, **command_kwargs)
    jupyter_url_base = f"http://localhost:{local_port}"
    if get_token:
        try:
            token = get_jupyter_token(ssh, jupyter, remote_port)
            jupyter_url = f"{jupyter_url_base}/?token={token}"
        except ValueError as ve:
            warnings.warn(str(ve))
            jupyter_url = None
    else:
        jupyter_url = jupyter_url_base
    if jupyter_url is not None:
        ssh.tunnel(local_port, remote_port)
        tunnel, tunnel_meta = ssh.tunnels[-1]
    else:
        tunnel, tunnel_meta = None, None
    return jupyter_url, tunnel, tunnel_meta, launch_process, stopper

launch_tunnel_thread(host, uname, keyfile, local_port, remote_port, signalbuf=None)

launch an SSH tunnel. primarily intended as a helper function for open_tunnel(), but can be used on its own. blocks until it hits an exception or it receives a signal, so should generally be run in a thread.

Parameters:

Name Type Description Default
host str

hostname of tunnel target

required
uname str

username on remote host

required
keyfile str

path to local SSH key file

required
local_port int

port for proximal end of tunnel

required
remote_port int

port for distal end of tunnel

required
signalbuf Optional[list]

list to receive close-tunnel signal

None

Returns:

Type Description
Union[tuple[Connection, Exception], Any]

signal received if closed gracefully; tuple of the Connection

Union[tuple[Connection, Exception], Any]

object and the Exception if it hits an exception.

Source code in hostess/ssh.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def launch_tunnel_thread(
    host: str, 
    uname: str, 
    keyfile: str,
    local_port: int,
    remote_port: int,
    signalbuf: Optional[list] = None
) -> Union[tuple[Connection, Exception], Any]:
    """
    launch an SSH tunnel. primarily intended as a helper function 
    for `open_tunnel()`, but can be used on its own. blocks until
    it hits an exception or it receives a signal, so should generally
    be run in a thread.

    Args:
        host: hostname of tunnel target
        uname: username on remote host
        keyfile: path to local SSH key file
        local_port: port for proximal end of tunnel
        remote_port: port for distal end of tunnel
        signalbuf: list to receive close-tunnel signal

    Returns:
        signal received if closed gracefully; tuple of the Connection
        object and the Exception if it hits an exception.
    """
    conn = SSH.connect(host, uname, keyfile).conn
    try:
        with conn.forward_local(local_port, remote_port):
            while True:
                if signalbuf is not None:
                    if len(signalbuf) > 0:
                        conn.close()
                        return signalbuf[0]
                time.sleep(1)
    except Exception as ex:
        return conn, ex

merge_csv(ssh_dict, fn, **csv_kwargs)

merges data from CSV files on multiple remote hosts into a single pandas DataFrame.

Parameters:

Name Type Description Default
ssh_dict Mapping[Hashable, SSH]

mapping whose keys are identifiers for remote hosts and whose values are SSH objects connected to those hosts.

required
fn str

path to file (must be the same on all remote hosts)

required
csv_kwargs Any

kwargs to pass to pd.read_csv()

{}

Returns:

Type Description
DataFrame

a DataFrame containing merged data from all remote CSV files,

DataFrame

including a "server" column that labels the source hosts using the

DataFrame

keys of ssh_dict.

Source code in hostess/ssh.py
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
def merge_csv(
    ssh_dict: Mapping[Hashable, SSH], fn: str, **csv_kwargs: Any
) -> pd.DataFrame:
    """
    merges data from CSV files on multiple remote hosts into a single pandas
    DataFrame.

    Args:
        ssh_dict: mapping whose keys are identifiers for remote hosts and
            whose values are SSH objects connected to those hosts.
        fn: path to file (must be the same on all remote hosts)
        csv_kwargs: kwargs to pass to pd.read_csv()

    Returns:
         a DataFrame containing merged data from all remote CSV files,
         including a "server" column that labels the source hosts using the
         keys of `ssh_dict`.
    """
    framelist = []
    for name, ssh in ssh_dict.items():
        csv_df = ssh.read_csv(fn, **csv_kwargs)
        csv_df["server"] = name
        framelist.append(csv_df)
    return pd.concat(framelist).reset_index(drop=True)

open_tunnel(host, uname, keyfile, local_port, remote_port)

launch a thread that maintains an SSH tunnel. NOTE: supports only keyfile authentication.

Parameters:

Name Type Description Default
host str

remote host ip

required
uname str

user name on remote host

required
keyfile Union[str, Path]

path to keyfile

required
local_port int

port on local end of tunnel

required
remote_port int

port on remote end of tunnel

required

Returns:

Name Type Description
signaler Callable[[Any], None]

function that shuts down tunnel

tunnel_metadata dict[str, Union[int, str, Path]]

dict of metadata about the tunnel

Source code in hostess/ssh.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def open_tunnel(
    host: str,
    uname: str,
    keyfile: Union[str, Path],
    local_port: int,
    remote_port: int,
) -> tuple[Callable[[Any], None], dict[str, Union[int, str, Path]]]:
    """
    launch a thread that maintains an SSH tunnel. NOTE: supports only
    keyfile authentication.

    Args:
        host: remote host ip
        uname: user name on remote host
        keyfile: path to keyfile
        local_port: port on local end of tunnel
        remote_port: port on remote end of tunnel

    Returns:
        signaler: function that shuts down tunnel
        tunnel_metadata: dict of metadata about the tunnel
    """

    exc = ThreadPoolExecutor(1)
    signalbuf = []
    exc.submit(
        launch_tunnel_thread, 
        host, 
        uname, 
        keyfile, 
        local_port, 
        remote_port, 
        signalbuf
    )
    metadict = {
        "host": host,
        "uname": uname,
        "keyfile": keyfile,
        "local_port": local_port,
        "remote_port": remote_port,
    }

    def signaler(sig=0):
        signalbuf.append(sig)

    return signaler, metadict

stop_jupyter_factory(command, jupyter, port)

Create a function that shuts down a Jupyter server when some other task or process completes.

Parameters:

Name Type Description Default
command RunCommand

an instance of RunCommand or one of its subclasses, likely an SSH object.

required
jupyter str

absolute path to jupyter executable

required
port int

port on which jupyter server is running

required

Returns:

Type Description
Callable

function that, when passed anything with a wait method, calls that method, and once it finishes, attempts to stop the Jupyter server running on the specified port. If called with no arguments or with an object with no wait method, attempts to stop the server immediately.

Source code in hostess/ssh.py
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
def stop_jupyter_factory(
    command: RunCommand, jupyter: str, port: int
) -> Callable:
    """
    Create a function that shuts down a Jupyter server when some other task
    or process completes.

    Args:
        command: an instance of RunCommand or one of its subclasses, likely
            an SSH object.
        jupyter: absolute path to jupyter executable
        port: port on which jupyter server is running

    Returns:
        function that, when passed anything with a `wait` method, calls that
            method, and once it finishes, attempts to stop the Jupyter server
            running on the specified port. If called with no arguments or with
            an object with no `wait` method, attempts to stop the server
            immediately.
    """

    def stop_it(waitable: Any = None):
        if waitable is not None:
            waitable.wait()
        command(f"{jupyter} stop {port}")

    return stop_it

unpack_transfer_result(result)

summarize a fabric transfer Result.

Parameters:

Name Type Description Default
result Result

Result of a get, put, or similar SSH operation.

required

Returns:

Type Description
dict

dict giving local and remote transfer targets, hostname, and port.

Source code in hostess/ssh.py
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
def unpack_transfer_result(result: fabric.transfer.Result) -> dict:
    """
    summarize a fabric transfer Result.

    Args:
        result: Result of a get, put, or similar SSH operation.

    Returns:
        dict giving local and remote transfer targets, hostname, and port.
    """
    return {
        "local": result.local,
        "remote": result.remote,
        "host": result.connection.host,
        "port": result.connection.port,
    }

station

station.actors

concrete implementations of Actor and Sensor base classes, along with some helper functions.

NODE_ACTION_FIELDS = frozenset({'id', 'start', 'stop', 'status', 'result'}) module-attribute

keys a dict must have to count as a valid "actiondict" in a Node's actions list

DirWatch

Bases: FileSystemWatch

like FileSystemWatch, but its target property should be a folder, not a file, and its patterns property matches newly-appearing filenames.

Source code in hostess/station/actors.py
436
437
438
439
440
441
442
443
444
445
class DirWatch(FileSystemWatch):
    """
    like FileSystemWatch, but its `target` property should be a folder, not a
    file, and its `patterns` property matches newly-appearing filenames.
    """

    def __init__(self):
        super().__init__(checker=watch_dir)

    name = "dirwatch"

FileSystemWatch

Bases: Sensor

simple Sensor for watching contents of a filesystem. offers an interface for changing target path and regex match patterns. this base class tails a file. see DirWatch for a subclass that diffs a directory.

Source code in hostess/station/actors.py
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
class FileSystemWatch(Sensor):
    """
    simple Sensor for watching contents of a filesystem. offers an
    interface for changing target path and regex match patterns. this base
    class tails a file. see DirWatch for a subclass that diffs a directory.
    """

    def __init__(self, checker=tail_file):
        super().__init__()
        self.checker = checker

    def _set_target(self, path):
        self._watched = Path(path)
        self.config["check"]["path"] = Path(path)
        self.config["grepreport"]["exec"]["path"] = Path(path)

    def _get_target(self):
        return self._watched

    def _get_logfile(self):
        return self._logfile

    def _set_logfile(self, path):
        self._logfile = path
        self.config["linelog"]["exec"]["path"] = Path(path)

    def _get_patterns(self):
        return self._patterns

    def _set_patterns(self, patterns):
        self._patterns = patterns
        self.config["grepreport"]["match"]["patterns"] = patterns
        self.config["grepreport"]["exec"]["patterns"] = patterns

    actions = (ReportStringMatch,)
    loggers = (LineLogger,)
    name = "filewatch"
    target = property(_get_target, _set_target)
    """what file does this Sensor watch?"""
    logfile = property(_get_logfile, _set_logfile)
    """where does this Sensor log its matches?"""
    patterns = property(_get_patterns, _set_patterns)
    """what patterns does this Sensor look for in the file?"""
    _watched = None
    _logfile = None
    _patterns = ()
    interface = ("logfile", "target", "patterns")
logfile = property(_get_logfile, _set_logfile) class-attribute instance-attribute

where does this Sensor log its matches?

patterns = property(_get_patterns, _set_patterns) class-attribute instance-attribute

what patterns does this Sensor look for in the file?

target = property(_get_target, _set_target) class-attribute instance-attribute

what file does this Sensor watch?

FileWriter

Bases: Actor

Simple Actor that writes to a file.

Source code in hostess/station/actors.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class FileWriter(Actor):
    """Simple Actor that writes to a file."""

    def match(self, instruction: Any, **_) -> bool:
        if instruction.action.name != "filewrite":
            raise NoMatch("not a file write instruction")
        if instruction.action.WhichOneof("call") != "localcall":
            raise NoMatch("Not a properly-formatted local call")
        return True

    @reported
    def execute(
        self,
        node: "Node",
        action: Message,
        key=None,
        noid=False,
        **_,
    ):
        with open(self.file, self.mode) as stream:
            stream.write(unpack_obj(action.localcall))

    def _get_mode(self) -> str:
        return self._mode

    def _set_mode(self, mode: str):
        self._mode = mode

    def _get_file(self) -> Path:
        return self._file

    def _set_file(self, path: Path):
        self._file = path

    _file = None
    _mode = "a"
    file = property(_get_file, _set_file)
    """file this Actor writes to"""
    mode = property(_get_mode, _set_mode)
    """mode this Actor writes in -- one of 'w', 'wb', 'a', or 'ab'."""
    interface = ("file", "mode")
    actortype = "action"
    name = "filewrite"
file = property(_get_file, _set_file) class-attribute instance-attribute

file this Actor writes to

mode = property(_get_mode, _set_mode) class-attribute instance-attribute

mode this Actor writes in -- one of 'w', 'wb', 'a', or 'ab'.

FuncCaller

Bases: Actor

Versatile Actor that handles Instructions to call Python functions. A Station typically makes these using handlers.make_function_call().

Source code in hostess/station/actors.py
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
class FuncCaller(Actor):
    """
    Versatile Actor that handles Instructions to call Python functions.
    A Station typically makes these using `handlers.make_function_call()`.
    """

    def match(self, instruction: Any, **_) -> bool:
        if instruction.action.WhichOneof("call") != "functioncall":
            raise NoMatch("not a function call instruction")
        return True

    @reported
    def execute(
        self,
        node: "Node",
        action: Message,
        key=None,
        noid=False,
        **_,
    ) -> Any:
        caches, call = make_function_call(action.functioncall)
        node.actions[key] |= caches
        call()
        if len(node.actions[key]["result"]) != 0:
            return node.actions[key]["result"]
        else:
            return None

    name = "funccaller"
    actortype = "action"

InstructionFromInfo

Bases: DispatchActor

skeleton Info-handling Actor for Stations. Checks, based on configurable criteria, whether an object unpacked from an Info message included in an Update indicates that the Station should assign a task to some handler Delegate, and, if it does, create an Instruction from that object based on an instruction-making function.

Note that this Actor is basically abstract by default; its criteria and instruction_maker properties must be assigned to make it do anything.

Source code in hostess/station/actors.py
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
class InstructionFromInfo(DispatchActor):
    """
    skeleton Info-handling Actor for Stations. Checks, based on configurable
    criteria, whether an object unpacked from an Info message included in an
    Update indicates that the Station should assign a task to some handler
    Delegate, and, if it does, create an Instruction from that object based on
    an instruction-making function.

    Note that this Actor is basically abstract by default; its `criteria` and
    `instruction_maker` properties must be assigned to make it do anything.
    """

    def match(self, note, **_) -> bool:
        if self.criteria is None:
            raise NoMatch("no criteria to match against")
        for criterion in self.criteria:
            if criterion(note):
                return True
        raise NoMatch("note did not match criteria")

    def execute(
        self,
        node: "Station",
        note,
        **_,
    ):
        if self.instruction_maker is None:
            raise TypeError("Must have an instruction maker.")
        delegatename = self.pick(node, note)
        node.queue_task(delegatename, self.instruction_maker(note))

    interface = ("instruction_maker", "criteria")
    name: str
    actortype = "info"
    instruction_maker: Optional[Callable[[Any], pro.Instruction]] = None
    """function that generates an Instruction from an object"""
    criteria: Optional[Sequence[Callable[[Any], bool]]] = None
    """
    predicate functions that define what objects this Actor can handle. if
    any of these functions return True when passed an object, the Actor 
    matches that object.
    """
criteria = None class-attribute instance-attribute

predicate functions that define what objects this Actor can handle. if any of these functions return True when passed an object, the Actor matches that object.

instruction_maker = None class-attribute instance-attribute

function that generates an Instruction from an object

LineLogger

Bases: Actor

Simple Actor that logs all strings passed to it. intended to be attached to Sensors that need to generate their own logs rather than writing to their parent Delegate's primary log.

Source code in hostess/station/actors.py
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
class LineLogger(Actor):
    """
    Simple Actor that logs all strings passed to it. intended to be attached
    to Sensors that need to generate their own logs rather than writing to
    their parent Delegate's primary log.
    """

    def match(self, line, **_):
        """match only strings"""
        if isinstance(line, str):
            return True
        raise NoMatch("not a string.")

    def execute(
        self, node: "delegates.Delegate", line: str, *, path=None, **_
    ):
        if path is None:
            return
        with path.open("a") as stream:
            stream.write(f"{line}\n")

    name = "linelog"
    actortype = "log"
match(line, **_)

match only strings

Source code in hostess/station/actors.py
282
283
284
285
286
def match(self, line, **_):
    """match only strings"""
    if isinstance(line, str):
        return True
    raise NoMatch("not a string.")

PipeActorPlaceholder

Bases: Actor

pipeline execution actor. resubmits individual steps of pipelines as Instructions to the calling node. we haven't implemented this, so it doesn't do anything.

Source code in hostess/station/actors.py
136
137
138
139
140
141
142
143
144
145
146
147
148
class PipeActorPlaceholder(Actor):
    """
    pipeline execution actor. resubmits individual steps of pipelines as
    Instructions to the calling node. we haven't implemented this, so it
    doesn't do anything.
    """

    def match(self, instruction: Any, **_):
        if instruction.WhichOneof("task") == "pipe":
            return True
        raise NoMatch("not a pipeline instruction")

    name = "pipeline"

ReportStringMatch

Bases: Actor

Actor that checks whether a string matches any of a sequence of regex patterns. Intended to be used by Sensors that work by tailing a file or other data stream.

This Actor's execute() inserts the string into the parent Delegate's list of actionable events, annotated with a list of all patterns that matched the string, and, optionally, with a string denoting the string's source. the Delegate will use this to construct an Info Message it will include in an Update to its Station.

Source code in hostess/station/actors.py
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
class ReportStringMatch(Actor):
    """
    Actor that checks whether a string matches any of a sequence of regex
    patterns. Intended to be used by Sensors that work by tailing a file or
    other data stream.

    This Actor's `execute()` inserts the string into the parent Delegate's
    list of actionable events, annotated with a list of all patterns that
    matched the string, and, optionally, with a string denoting the
    string's source. the Delegate will use this to construct an Info Message
    it will include in an Update to its Station.
    """

    def match(self, line, *, patterns=(), **_):
        if not isinstance(line, str):
            raise NoMatch("is not a string")
        for pattern in patterns:
            if re.search(pattern, line):
                return True
        raise NoMatch("does not match patterns")

    def execute(
        self,
        node: "delegates.Delegate",
        line: str,
        patterns=(),
        *,
        path=None,
        **_,
    ):
        node.add_actionable_event(
            {
                "path": str(path),
                "content": line,
                "match": [p for p in patterns if re.search(p, line)],
            },
            self.owner,
        )

    name = "grepreport"
    actortype = "action"

SysCaller

Bases: Actor

Versatile Actor that handles Instructions to run OS-level shell commands.

Source code in hostess/station/actors.py
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
class SysCaller(Actor):
    """
    Versatile Actor that handles Instructions to run OS-level shell commands.
    """

    def match(self, instruction: Any, **_) -> bool:
        if instruction.action.WhichOneof("call") != "systemcall":
            raise NoMatch("not a system call instruction")
        return True

    @reported
    def execute(
        self,
        node: "Node",
        action: Message,
        key=None,
        noid=False,
        **_,
    ) -> tuple[dict[str, list[str]], str]:
        # TODO:
        #  - handle environment variables. workaround is of course to just
        #    set them in the command
        #  - switch interpreter on command
        #  - handle compression
        #  - handle fork requests (in 'context' field of SystemCall message)
        kwargs = {}
        # feed a binary blob to process stdin if present
        if action.systemcall.payload is not None:
            kwargs["_in_stream"] = BytesIO(action.systemcall.payload)
        viewer = RunCommand(action.systemcall.command, _viewer=True)()
        # slightly different convention than FunctionCaller because all we have
        # is out and err
        node.actions[key] |= {"result": {"out": viewer.out, "err": viewer.err}}
        viewer.wait()
        # we don't want to report the action as failed for having stuff in
        # stderr because of how many applications randomly print to stderr.
        # the requesting object will have to handle that, or a subclass
        # could call this method from super and postfilter the results.
        # even raising the exception based on exit code is maybe questionable!
        node.actions[key]["exit_code"] = viewer.returncode()
        status = "crash" if viewer.returncode() != 0 else "success"
        return {"out": viewer.out, "err": viewer.err}, status

    name = "syscaller"
    actortype = "action"

conclude_execution(result, status=None, actiondict=None)

conclude a "regular" 'do'-type action, inserting relevant data into its associated actiondict. Intended primarily as a component function of the reported() decorator.

Note that individual Actors, even if they use reported(), often define additional cleanup steps.

Parameters:

Name Type Description Default
result Any

return value of, or Exception raised by, an Actor's execute() method.

required
status Optional[str]

optional status code. if not specified, status will always be "success" unless result is an Exception. Primarily intended to allow Actors to describe gracefully-handled failures.

None
actiondict Optional[MutableMapping[str, Any]]

element of parent Node's actions attribute. if not specified, creates an empty dict -- in this case, this function is basically a no-op.

None
Source code in hostess/station/actors.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def conclude_execution(
    result: Any,
    status: Optional[str] = None,
    actiondict: Optional[MutableMapping[str, Any]] = None,
):
    """
    conclude a "regular" 'do'-type action, inserting relevant data into its
    associated actiondict. Intended primarily as a component function of the
    `reported()` decorator.

    Note that individual Actors, even if they use `reported()`, often define
    additional cleanup steps.

    Args:
        result: return value of, or Exception raised by, an Actor's execute()
            method.
        status: optional status code. if not specified, status will always be
            "success" unless `result` is an Exception. Primarily intended to
            allow Actors to describe gracefully-handled failures.
        actiondict: element of parent Node's `actions` attribute. if not
            specified, creates an empty dict -- in this case, this function
            is basically a no-op.
    """
    actiondict = {} if actiondict is None else actiondict
    if isinstance(result, Exception):
        # Actors that run commands in subprocesses may insert their own
        # 'streaming' results, which we do not want to overwrite with the
        # exception.
        actiondict["status"] = "crash"
        actiondict["exception"] = result
    else:
        # individual actors may have unique failure criteria
        actiondict["status"] = "success" if status is None else status
        actiondict["result"] = result
    # in some cases could check stderr but would have to be careful
    # due to the many processes that communicate on stderr on purpose
    actiondict["end"] = dt.datetime.now(dt.UTC)
    actiondict["duration"] = actiondict["end"] - actiondict["start"]

init_execution(node, instruction, key, noid)

perform setup for a "regular" 'do'-type Instruction. Intended primarily as a component function of the reported() decorator.

Parameters:

Name Type Description Default
node Node

Actor's parent Node (usually a Delegate)

required
instruction Message

'do'-type Instruction

required
key Optional[Hashable]

name of / identifier for Instruction's Action. if not specified, generates a random integer.

required
noid bool

don't insert the Instruction's id into the generated actiondict.

required

Returns:

Name Type Description
action Action

Action from action field of Instruction, or full Instruction if it contains only a description of the action

actiondict dict

data/metadata dict for the action that this function generated function and inserted into the parent Node's actions

key Hashable

identifier for action

Source code in hostess/station/actors.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def init_execution(
    node: Node, instruction: Message, key: Optional[Hashable], noid: bool
) -> tuple[pro.Action, dict, Hashable]:
    """
    perform setup for a "regular" 'do'-type Instruction. Intended primarily as
    a component function of the `reported()` decorator.

    Args:
        node: Actor's parent Node (usually a Delegate)
        instruction: 'do'-type Instruction
        key: name of / identifier for Instruction's Action. if not specified,
            generates a random integer.
        noid: don't insert the Instruction's id into the generated actiondict.

    Returns:
        action: Action from action field of Instruction, or full Instruction
            if it contains only a description of the action
        actiondict: data/metadata dict for the action that this function
            generated function and inserted into the parent Node's `actions`
        key: identifier for action
    """
    if instruction.HasField("action"):
        action = instruction.action  # for brevity in execute() methods
    else:
        action = instruction  # pure description cases
    if key is None:
        key = random.randint(0, int(1e7))
    node.actions[key] = make_actiondict(action)
    if noid is False:
        node.actions[key]["instruction_id"] = key
    return action, node.actions[key], key

reported(executor)

decorator for bound execute() methods of Actors that handle 'do'-type Instructions in a "normal" fashion. Provides standardized setup and conclusion behaviors.

Parameters:

Name Type Description Default
executor Callable

bound execute() method of associated Actor.

required

Returns:

Type Description
Callable

version of execute() method with added setup and conclusion steps.

Source code in hostess/station/actors.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def reported(executor: Callable) -> Callable:
    """
    decorator for bound execute() methods of Actors that handle 'do'-type
    Instructions in a "normal" fashion. Provides standardized setup and
    conclusion behaviors.

    Args:
        executor: bound execute() method of associated Actor.

    Returns:
        version of execute() method with added setup and conclusion steps.
    """

    def with_reportage(
        self, node: Node, instruction: Message, key=None, noid=False, **kwargs
    ):
        action, report, key = init_execution(node, instruction, key, noid)
        try:
            results = executor(self, node, action, key, **kwargs)
        except Exception as ex:
            results = (ex,)
        conclude_execution(*listify(results), actiondict=report)

    return with_reportage

station.bases

base classes and helpers for Delegates, Stations, Sensors, and Actors.

Actor

Bases: ABC

abstract base class enabling conditional responses to events. Actors should generally only be instantiated from methods of a parent Matcher.

Source code in hostess/station/bases.py
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
class Actor(ABC):
    """
    abstract base class enabling conditional responses to events. Actors
    should generally only be instantiated from methods of a parent Matcher.
    """

    def __init__(self):
        matchp = inspect.getfullargspec(self.match).kwonlyargs
        execp = inspect.getfullargspec(self.execute).kwonlyargs
        self.config = {
            "match": {k: None for k in matchp},
            "exec": {k: None for k in execp},
        }
        self.params = {"match": tuple(matchp), "exec": tuple(execp)}
        self.match = configured(self.match, self.config["match"])
        self.execute = configured(self.execute, self.config["exec"])

    def match(self, event: Any, **_: Any) -> bool:
        """
        Determine if this Actor can / should handle a given event.
        Must be implemented in concrete subclasses of Actor.

        Args:
            event: event to match.
            **_: placeholder for kwargs defined in a subclass.

        Returns:
            True if this Actor can handle `event`.

        Raises:
            NoMatch: if this Actor cannot handle `event`.
        """
        raise NotImplementedError

    def execute(self, node: Node, event: Any, **kwargs: Any) -> Any:
        """
        This method defines what an Actor does with objects it matches. Must
        be implemented in concrete subclasses of Actor.

        Args:
            node: parent Node of this Actor. In normal operation, this
                argument will never be explicitly passed, but instead
                partially evaluated into this method in `Actor.__init__()`.
            event: object to do something with.
            **kwargs: placeholder for kwargs defined in a subclass.
        """
        raise NotImplementedError

    name: str
    config: Mapping
    class_interface = ()
    interface = ()
    actortype: str
    owner = None
execute(node, event, **kwargs)

This method defines what an Actor does with objects it matches. Must be implemented in concrete subclasses of Actor.

Parameters:

Name Type Description Default
node Node

parent Node of this Actor. In normal operation, this argument will never be explicitly passed, but instead partially evaluated into this method in Actor.__init__().

required
event Any

object to do something with.

required
**kwargs Any

placeholder for kwargs defined in a subclass.

{}
Source code in hostess/station/bases.py
424
425
426
427
428
429
430
431
432
433
434
435
436
def execute(self, node: Node, event: Any, **kwargs: Any) -> Any:
    """
    This method defines what an Actor does with objects it matches. Must
    be implemented in concrete subclasses of Actor.

    Args:
        node: parent Node of this Actor. In normal operation, this
            argument will never be explicitly passed, but instead
            partially evaluated into this method in `Actor.__init__()`.
        event: object to do something with.
        **kwargs: placeholder for kwargs defined in a subclass.
    """
    raise NotImplementedError
match(event, **_)

Determine if this Actor can / should handle a given event. Must be implemented in concrete subclasses of Actor.

Parameters:

Name Type Description Default
event Any

event to match.

required
**_ Any

placeholder for kwargs defined in a subclass.

{}

Returns:

Type Description
bool

True if this Actor can handle event.

Raises:

Type Description
NoMatch

if this Actor cannot handle event.

Source code in hostess/station/bases.py
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
def match(self, event: Any, **_: Any) -> bool:
    """
    Determine if this Actor can / should handle a given event.
    Must be implemented in concrete subclasses of Actor.

    Args:
        event: event to match.
        **_: placeholder for kwargs defined in a subclass.

    Returns:
        True if this Actor can handle `event`.

    Raises:
        NoMatch: if this Actor cannot handle `event`.
    """
    raise NotImplementedError

AllBusy

Bases: Exception

A Station, possibly via one of its Actors, attempted to dispatch an Instruction to one of its Delegates, but all appropriate Delegates for the Instruction were busy.

Source code in hostess/station/bases.py
558
559
560
561
562
563
class AllBusy(Exception):
    """
    A Station, possibly via one of its Actors, attempted to dispatch an
    Instruction to one of its Delegates, but all appropriate Delegates for the
    Instruction were busy.
    """

AttrConsumer

Mix-in class that provides functionality for "consuming" attributes of other objects. Designed to permit Nodes and similar objects to promote interface properties of attached elements into their own interfaces as pseudo-attributes.

Source code in hostess/station/bases.py
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
class AttrConsumer:
    """
    Mix-in class that provides functionality for "consuming" attributes of
    other objects. Designed to permit Nodes and similar objects to promote
    interface properties of attached elements into their own interfaces as
    pseudo-attributes.
    """

    def __init__(self):
        self.attrefs = {}

    def consume_property(
        self, obj: Any, attr: str, newname: Optional[str] = None
    ):
        """
        consume an attribute of another object into this object's interface.

        Args:
            obj: object from which to consume attribute
            attr: name of attribute to consume
            newname: optional name of referencing attribute of `self`;
                if not specified, just use `attr`.
        """
        newname = attr if newname is None else newname
        self.attrefs[newname] = (obj, attr)

    def __getattr__(self, attr):
        """
        if normal attribute lookup fails, attempt to refer it to a property
        of an associated object.
        """
        ref = self.attrefs.get(attr)
        if ref is None:
            raise AttributeError
        return getattr(ref[0], ref[1])

    def __setattr__(self, attr, value):
        """
        refer assignments to pseudo-attributes defined in self.proprefs to the
        properties of the underlying objects.
        """
        if attr == "attrefs":
            return super().__setattr__(attr, value)
        if (ref := self.attrefs.get(attr)) is not None:
            try:
                return setattr(ref[0], ref[1], value)
            except AttributeError as ae:
                raise ConsumedAttributeError(str(ae))
        return super().__setattr__(attr, value)

    def __dir__(self):
        """
        add the pseudo-attributes in attrefs to this object's directory so
        that they look real.
        """
        # noinspection PyUnresolvedReferences
        return super().__dir__() + list(self.attrefs.keys())

    def _get_interface(self) -> list[str]:
        return [k for k in self.attrefs.keys()]

    def _set_interface(self, _):
        raise TypeError("interface does not support assignment")

    interface = property(_get_interface, _set_interface)
__dir__()

add the pseudo-attributes in attrefs to this object's directory so that they look real.

Source code in hostess/station/bases.py
100
101
102
103
104
105
106
def __dir__(self):
    """
    add the pseudo-attributes in attrefs to this object's directory so
    that they look real.
    """
    # noinspection PyUnresolvedReferences
    return super().__dir__() + list(self.attrefs.keys())
__getattr__(attr)

if normal attribute lookup fails, attempt to refer it to a property of an associated object.

Source code in hostess/station/bases.py
76
77
78
79
80
81
82
83
84
def __getattr__(self, attr):
    """
    if normal attribute lookup fails, attempt to refer it to a property
    of an associated object.
    """
    ref = self.attrefs.get(attr)
    if ref is None:
        raise AttributeError
    return getattr(ref[0], ref[1])
__setattr__(attr, value)

refer assignments to pseudo-attributes defined in self.proprefs to the properties of the underlying objects.

Source code in hostess/station/bases.py
86
87
88
89
90
91
92
93
94
95
96
97
98
def __setattr__(self, attr, value):
    """
    refer assignments to pseudo-attributes defined in self.proprefs to the
    properties of the underlying objects.
    """
    if attr == "attrefs":
        return super().__setattr__(attr, value)
    if (ref := self.attrefs.get(attr)) is not None:
        try:
            return setattr(ref[0], ref[1], value)
        except AttributeError as ae:
            raise ConsumedAttributeError(str(ae))
    return super().__setattr__(attr, value)
consume_property(obj, attr, newname=None)

consume an attribute of another object into this object's interface.

Parameters:

Name Type Description Default
obj Any

object from which to consume attribute

required
attr str

name of attribute to consume

required
newname Optional[str]

optional name of referencing attribute of self; if not specified, just use attr.

None
Source code in hostess/station/bases.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def consume_property(
    self, obj: Any, attr: str, newname: Optional[str] = None
):
    """
    consume an attribute of another object into this object's interface.

    Args:
        obj: object from which to consume attribute
        attr: name of attribute to consume
        newname: optional name of referencing attribute of `self`;
            if not specified, just use `attr`.
    """
    newname = attr if newname is None else newname
    self.attrefs[newname] = (obj, attr)

ConsumedAttributeError

Bases: AttributeError

An AttrConsumer's attempt to set a consumed attribute failed.

Source code in hostess/station/bases.py
44
45
46
47
class ConsumedAttributeError(AttributeError):
    """An AttrConsumer's attempt to set a consumed attribute failed."""

    pass

DispatchActor

Bases: Actor, ABC

abstract class for Actors intended to dispatch Instructions from Stations to Nodes.

Source code in hostess/station/bases.py
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
class DispatchActor(Actor, ABC):
    """
    abstract class for Actors intended to dispatch Instructions from
    Stations to Nodes.
    """

    def __init__(self):
        super().__init__()
        self.interface = self.interface + (
            "target_name",
            "target_actor",
            "target_picker",
        )

    def pick(self, station: "Station", instruction: Message, **_: Any) -> str:
        """
        Pick which of a Station's Delegates to send an Instruction to.

        Args:
            station: Parent Station. in normal operation, will never be
                explicitly passed.
            instruction: Instruction Message to dispatch.
            **_: placeholder for kwargs defined in concrete subclasses.

        Returns:
            Name of selected Delegate.

        Raises:
            NoMatchingDelegate: if no Delegate matches rules defined by this
                Actor's target_name, target_picker, or target_actor
                attributes, including if the Station has no Delegates.
            TypeError: if this Actor's target_name, target_picker, and
                target_actor attributes are all None.
        """
        if all(
            t is None
            for t in (self.target_name, self.target_actor, self.target_picker)
        ):
            raise TypeError("Must have a delegate name, actor name, or picker")
        targets = station.delegates
        if self.target_name is not None:
            targets = [n for n in targets if n["name"] == self.target_name]
        if self.target_actor is not None:
            targets = [n for n in targets if self.target_actor in n["actors"]]
        if self.target_picker is not None:
            targets = [
                n for n in targets if self.target_picker(n, instruction)
            ]
        not_busy = [n for n in targets if n.get("busy") is False]
        if len(targets) == 0:
            raise NoMatchingDelegate
        if len(not_busy) == 0:
            shuffle(targets)
            return targets[0]["name"]
        return not_busy[0]["name"]

    target_name: Optional[str] = None
    """if set, dispatch only to Delegates named exactly target_name."""
    target_actor: Optional[str] = None
    """
    if set, dispatch only to Delegates that have an Actor named target_actor.
    """
    target_picker: Optional[Callable[[dict, Message], str]] = None
    """
    function that can be used to define more complex Delegate selection 
    behaviors.
    """
target_actor = None class-attribute instance-attribute

if set, dispatch only to Delegates that have an Actor named target_actor.

target_name = None class-attribute instance-attribute

if set, dispatch only to Delegates named exactly target_name.

target_picker = None class-attribute instance-attribute

function that can be used to define more complex Delegate selection behaviors.

pick(station, instruction, **_)

Pick which of a Station's Delegates to send an Instruction to.

Parameters:

Name Type Description Default
station 'Station'

Parent Station. in normal operation, will never be explicitly passed.

required
instruction Message

Instruction Message to dispatch.

required
**_ Any

placeholder for kwargs defined in concrete subclasses.

{}

Returns:

Type Description
str

Name of selected Delegate.

Raises:

Type Description
NoMatchingDelegate

if no Delegate matches rules defined by this Actor's target_name, target_picker, or target_actor attributes, including if the Station has no Delegates.

TypeError

if this Actor's target_name, target_picker, and target_actor attributes are all None.

Source code in hostess/station/bases.py
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
def pick(self, station: "Station", instruction: Message, **_: Any) -> str:
    """
    Pick which of a Station's Delegates to send an Instruction to.

    Args:
        station: Parent Station. in normal operation, will never be
            explicitly passed.
        instruction: Instruction Message to dispatch.
        **_: placeholder for kwargs defined in concrete subclasses.

    Returns:
        Name of selected Delegate.

    Raises:
        NoMatchingDelegate: if no Delegate matches rules defined by this
            Actor's target_name, target_picker, or target_actor
            attributes, including if the Station has no Delegates.
        TypeError: if this Actor's target_name, target_picker, and
            target_actor attributes are all None.
    """
    if all(
        t is None
        for t in (self.target_name, self.target_actor, self.target_picker)
    ):
        raise TypeError("Must have a delegate name, actor name, or picker")
    targets = station.delegates
    if self.target_name is not None:
        targets = [n for n in targets if n["name"] == self.target_name]
    if self.target_actor is not None:
        targets = [n for n in targets if self.target_actor in n["actors"]]
    if self.target_picker is not None:
        targets = [
            n for n in targets if self.target_picker(n, instruction)
        ]
    not_busy = [n for n in targets if n.get("busy") is False]
    if len(targets) == 0:
        raise NoMatchingDelegate
    if len(not_busy) == 0:
        shuffle(targets)
        return targets[0]["name"]
    return not_busy[0]["name"]

DoNotUnderstand

Bases: ValueError

This Delegate does not know how to interpret this Instruction.

Source code in hostess/station/bases.py
515
516
class DoNotUnderstand(ValueError):
    """This Delegate does not know how to interpret this Instruction."""

Matcher

Bases: AttrConsumer, ABC

Abstract mix-in class for Node and Sensor. Provides functionality for matching objects against Actors.

Source code in hostess/station/bases.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
class Matcher(AttrConsumer, ABC):
    """
    Abstract mix-in class for Node and Sensor. Provides functionality for
    matching objects against Actors.
    """

    def match(
        self, event: Any, category: Optional[str] = None, **kwargs: Any
    ) -> list[Actor]:
        """
        Check the Matcher's Actors to see which, if any, can handle an event.

        Args:
            event: object to match Actors against
            category: optional category of Actor to check; if specified,
                attempt to match `event` only against Actors whose `category`
                 attribute is equal to `category`
            **kwargs: kwargs to pass to Actor.match

        Returns:
            list of all Actors that matched `event`.

        Raises:
            NoActorForEvent: if no Actors matched `event`.
        """
        matching_actors = []
        actors = self.filter_actors_by_category(category)
        for actor in actors:
            try:
                actor.match(event, **kwargs)
                matching_actors.append(actor)
            except (NoMatch, AttributeError, KeyError, ValueError, TypeError):
                continue
        if len(matching_actors) == 0:
            raise NoActorForEvent
        return matching_actors

    def explain_match(
        self, event: Any, category: Optional[str] = None, **kwargs: Any
    ) -> dict[str, Union[str, bool]]:
        """
        Introspection function for matching process.

        Args:
            event: object to match Actors against
            category: optional category of Actors to match `event` against
            **kwargs: kwargs for `Actor.match()`

        Returns:
              dict whose keys are the names of Actors and whose values are
                    the output of each actor's `match()` method, or a stringified
                    version of the Exception it raised.

        """
        reasons = {}
        actors = self.filter_actors_by_category(category)
        for actor in actors:
            try:
                reasons[actor.name] = actor.match(event, **kwargs)
            except KeyboardInterrupt:
                raise
            except Exception as err:
                reasons[actor.name] = f"{type(err)}: {err}"
        return reasons

    def filter_actors_by_category(
        self, actortype: Optional[str]
    ) -> list[Actor]:
        """
        Args:
            actortype: optional string denoting a category of Actor

        Returns:
            A list containing all of this object's Actors whose `category`
                attribute is equal to `category`, or, if `category` is None,
                simply all of this objcet's Actors.
        """
        if actortype is None:
            return list(self.actors.values())
        filtered = []
        for r in self.actors.values():
            if r.actortype == actortype:
                filtered.append(r)
            elif isinstance(r.actortype, tuple) and actortype in r.actortype:
                filtered.append(r)
        return filtered

    def add_element(
        self, cls: Union[type[Actor], type[Sensor]], name: Optional[str] = None
    ):
        """
        instantiate an Actor or Sensor and associate it with this object,
        consuming its interface properties and making it available for
        matching or sensor looping.

        Args:
            cls: Actor or Sensor type to instantiate and associate
            name: optional custom name for element that will be used to
                identify it in this object's configuration dictionary
                and properties interface. If not specified, uses the name of
                the element's class, suffixing incrementing numbers if that
                name would collide with an already-associated element.
        """
        name = inc_name(cls.name if name is None else name, self.cdict)
        element = cls()
        element.name, element.owner = name, self
        if issubclass(cls, Actor):
            self.actors[name] = element
        elif issubclass(cls, Sensor):
            self.sensors[name] = element
            element.set_poll_nonsticky(self.poll)
        else:
            raise TypeError(f"{cls} is not a valid subelement for this class.")
        self.cdict[name], self.params[name] = element.config, element.params
        for prop in element.interface:
            self.consume_property(element, prop, f"{name}_{prop}")
        return name

    actors: dict[str, Actor]
    params: dict[str, Any]
    sensors: dict[str, "Sensor"]
add_element(cls, name=None)

instantiate an Actor or Sensor and associate it with this object, consuming its interface properties and making it available for matching or sensor looping.

Parameters:

Name Type Description Default
cls Union[type[Actor], type[Sensor]]

Actor or Sensor type to instantiate and associate

required
name Optional[str]

optional custom name for element that will be used to identify it in this object's configuration dictionary and properties interface. If not specified, uses the name of the element's class, suffixing incrementing numbers if that name would collide with an already-associated element.

None
Source code in hostess/station/bases.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
def add_element(
    self, cls: Union[type[Actor], type[Sensor]], name: Optional[str] = None
):
    """
    instantiate an Actor or Sensor and associate it with this object,
    consuming its interface properties and making it available for
    matching or sensor looping.

    Args:
        cls: Actor or Sensor type to instantiate and associate
        name: optional custom name for element that will be used to
            identify it in this object's configuration dictionary
            and properties interface. If not specified, uses the name of
            the element's class, suffixing incrementing numbers if that
            name would collide with an already-associated element.
    """
    name = inc_name(cls.name if name is None else name, self.cdict)
    element = cls()
    element.name, element.owner = name, self
    if issubclass(cls, Actor):
        self.actors[name] = element
    elif issubclass(cls, Sensor):
        self.sensors[name] = element
        element.set_poll_nonsticky(self.poll)
    else:
        raise TypeError(f"{cls} is not a valid subelement for this class.")
    self.cdict[name], self.params[name] = element.config, element.params
    for prop in element.interface:
        self.consume_property(element, prop, f"{name}_{prop}")
    return name
explain_match(event, category=None, **kwargs)

Introspection function for matching process.

Parameters:

Name Type Description Default
event Any

object to match Actors against

required
category Optional[str]

optional category of Actors to match event against

None
**kwargs Any

kwargs for Actor.match()

{}

Returns:

Type Description
dict[str, Union[str, bool]]

dict whose keys are the names of Actors and whose values are the output of each actor's match() method, or a stringified version of the Exception it raised.

Source code in hostess/station/bases.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def explain_match(
    self, event: Any, category: Optional[str] = None, **kwargs: Any
) -> dict[str, Union[str, bool]]:
    """
    Introspection function for matching process.

    Args:
        event: object to match Actors against
        category: optional category of Actors to match `event` against
        **kwargs: kwargs for `Actor.match()`

    Returns:
          dict whose keys are the names of Actors and whose values are
                the output of each actor's `match()` method, or a stringified
                version of the Exception it raised.

    """
    reasons = {}
    actors = self.filter_actors_by_category(category)
    for actor in actors:
        try:
            reasons[actor.name] = actor.match(event, **kwargs)
        except KeyboardInterrupt:
            raise
        except Exception as err:
            reasons[actor.name] = f"{type(err)}: {err}"
    return reasons
filter_actors_by_category(actortype)

Parameters:

Name Type Description Default
actortype Optional[str]

optional string denoting a category of Actor

required

Returns:

Type Description
list[Actor]

A list containing all of this object's Actors whose category attribute is equal to category, or, if category is None, simply all of this objcet's Actors.

Source code in hostess/station/bases.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def filter_actors_by_category(
    self, actortype: Optional[str]
) -> list[Actor]:
    """
    Args:
        actortype: optional string denoting a category of Actor

    Returns:
        A list containing all of this object's Actors whose `category`
            attribute is equal to `category`, or, if `category` is None,
            simply all of this objcet's Actors.
    """
    if actortype is None:
        return list(self.actors.values())
    filtered = []
    for r in self.actors.values():
        if r.actortype == actortype:
            filtered.append(r)
        elif isinstance(r.actortype, tuple) and actortype in r.actortype:
            filtered.append(r)
    return filtered
match(event, category=None, **kwargs)

Check the Matcher's Actors to see which, if any, can handle an event.

Parameters:

Name Type Description Default
event Any

object to match Actors against

required
category Optional[str]

optional category of Actor to check; if specified, attempt to match event only against Actors whose category attribute is equal to category

None
**kwargs Any

kwargs to pass to Actor.match

{}

Returns:

Type Description
list[Actor]

list of all Actors that matched event.

Raises:

Type Description
NoActorForEvent

if no Actors matched event.

Source code in hostess/station/bases.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def match(
    self, event: Any, category: Optional[str] = None, **kwargs: Any
) -> list[Actor]:
    """
    Check the Matcher's Actors to see which, if any, can handle an event.

    Args:
        event: object to match Actors against
        category: optional category of Actor to check; if specified,
            attempt to match `event` only against Actors whose `category`
             attribute is equal to `category`
        **kwargs: kwargs to pass to Actor.match

    Returns:
        list of all Actors that matched `event`.

    Raises:
        NoActorForEvent: if no Actors matched `event`.
    """
    matching_actors = []
    actors = self.filter_actors_by_category(category)
    for actor in actors:
        try:
            actor.match(event, **kwargs)
            matching_actors.append(actor)
        except (NoMatch, AttributeError, KeyError, ValueError, TypeError):
            continue
    if len(matching_actors) == 0:
        raise NoActorForEvent
    return matching_actors

NoActorForEvent

Bases: DoNotUnderstand

This Matcher has no Actor that matches this Event.

Source code in hostess/station/bases.py
519
520
class NoActorForEvent(DoNotUnderstand):
    """This Matcher has no Actor that matches this Event."""

NoConfigError

Bases: DoNotUnderstand

This Delegate received a 'configure' Instruction, but the Instruction did not specify what to configure.

Source code in hostess/station/bases.py
530
531
532
533
534
class NoConfigError(DoNotUnderstand):
    """
    This Delegate received a 'configure' Instruction, but the Instruction
    did not specify what to configure.
    """

NoInstructionType

Bases: DoNotUnderstand

This Delegate received an Instruction that did not specify what type of Instruction it was.

Source code in hostess/station/bases.py
537
538
539
540
541
class NoInstructionType(DoNotUnderstand):
    """
    This Delegate received an Instruction that did not specify what type of
    Instruction it was.
    """

NoMatch

Bases: Exception

This Actor does not match an event passed to its check() method. This Exception is used primarily for control flow.

Source code in hostess/station/bases.py
544
545
546
547
548
class NoMatch(Exception):
    """
    This Actor does not match an event passed to its `check()` method. This
    Exception is used primarily for control flow.
    """

NoMatchingDelegate

Bases: Exception

A Station, possibly via one of its Actors, attempted to dispatch an Instruction to one of its Delegates, but found no appropriate Delegate.

Source code in hostess/station/bases.py
551
552
553
554
555
class NoMatchingDelegate(Exception):
    """
    A Station, possibly via one of its Actors, attempted to dispatch an
    Instruction to one of its Delegates, but found no appropriate Delegate.
    """

NoTaskError

Bases: DoNotUnderstand

This Delegate received a 'do' Instruction, but the Instruction included neither an Action Message or a description of a task to perform.

Source code in hostess/station/bases.py
523
524
525
526
527
class NoTaskError(DoNotUnderstand):
    """
    This Delegate received a 'do' Instruction, but the Instruction included
    neither an Action Message or a description of a task to perform.
    """

Node

Bases: Matcher, ABC

Abstract base class for Delegates and Stations. Defines core behavior like running Sensors, spooling events to Actors, managing a TCPTalk server, constructing an interface, starting up, shutting down, and logging.

Source code in hostess/station/bases.py
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
class Node(Matcher, ABC):
    """
    Abstract base class for Delegates and Stations. Defines core behavior like
    running Sensors, spooling events to Actors, managing a TCPTalk server,
    constructing an interface, starting up, shutting down, and logging.
    """

    def __init__(
        self,
        name: str,
        n_threads: int = 6,
        elements: tuple[Union[type[Sensor], type[Actor]]] = (),
        start: bool = False,
        can_receive=False,
        host: Optional[str] = None,
        port: Optional[int] = None,
        poll: float = 0.08,
        timeout: int = 10,
        _is_process_owner=False,
        **extra_attrs,
    ):
        super().__init__()
        # attributes unique to subclass but necessary for this step of
        # initialization, mostly related to logging
        for k, v in extra_attrs.items():
            setattr(self, k, v)
        self.host, self.port = host, port
        self.params, self.name = {}, name
        self.init_time = filestamp()
        self._set_logfile()
        try:
            self.logfile.parent.mkdir(exist_ok=True, parents=True)
            self._log("initializing", category="system")
            self.cdict, self.actors, self.sensors = {}, {}, {}
            self.threads, self._lock = {}, threading.Lock()
            for element in elements:
                self.add_element(element)
            self.n_threads = n_threads
            self.exc = ThreadPoolExecutor(n_threads)
            self.can_receive = can_receive
            self.poll, self.timeout, self.signals = poll, timeout, {}
            self.__is_process_owner = _is_process_owner
            self.is_shut_down = False
            self.exception = None
            atexit.register(self.exc.shutdown, wait=False, cancel_futures=True)
            if self.__is_process_owner is True:
                atexit.register(
                    partial(self._log, "shutdown complete", category="system")
                )
            if start is True:
                self.start()
        except Exception as ex:
            self._log("initialization failed", exception=ex, category="system")

    def restart_server(self):
        """
        (re)start the node's TCPTalk server (if it is supposed to have one).
        """
        if self.server is not None:
            self.server["kill"]()
        if self.can_receive is False:
            if (self.host is not None) or (self.port is not None):
                raise TypeError(
                    "cannot provide host/port for non-receiving node."
                )
        elif (self.host is None) or (self.port is None):
            raise TypeError("must provide host and port for receiving node.")
        elif self.can_receive is True:
            self.server = TCPTalk(
                self.host,
                self.port,
                ackcheck=self._ackcheck,
                executor=self.exc,
            )
            self.threads |= self.server.threads
            self.inbox = self.server.data
            for ix, sig in self.server.signals.items():
                self.signals[f"server_{ix}"] = sig
        else:
            self.server, self.server_events, self.inbox = None, None, None

    def _set_logfile(self):
        """
        concrete subclasses must define rules for constructing log filenames.
        """
        raise NotImplementedError

    def start(self):
        """
        Start the Node's main loop and, if it is supposed to have one, its
        TCPTalk server.
        """
        if self.__started is True:
            raise EnvironmentError("Node already started.")
        self._log("starting", category="system")
        self.restart_server()
        self.threads["main"] = self.exc.submit(self._start)
        self.__started = True
        self.state = "nominal"
        self._log("completed start", category="system")

    def nodeid(self) -> dict[str, Union[str, int]]:
        """
        get basic identifying information for this Node.

        Returns:
            dict whose keys are "name", "pid", and "host".
        """
        return {
            "name": self.name,
            "pid": os.getpid(),
            "host": socket.gethostname(),
        }

    def add_element(
        self, cls: Union[type[Actor], type[Sensor]], name: Optional[str] = None
    ):
        """
        Instantiate an Actor or Sensor and associate it with this Node.

        Args:
            cls: type of Actor or Sensor to instantiate and associate.
            name: optional name for Actor or Sensor used to identify it in
                this Node's interface/config. name of its class will be used
                if not specified, plus a numerical suffix if it would collide
                with the name of an already-attached element.
        """
        logname = name if name is not None else cls.name
        self._log(
            f"adding element", cls=str(cls), name=logname, category="system"
        )
        super().add_element(cls, name)
        self._log(
            f"added element", cls=str(cls), name=logname, category="system"
        )

    def busy(self) -> bool:
        """are we too busy to do new stuff?"""
        # TODO: or maybe explicitly check threads? do we want a free one?
        #  idk
        # noinspection PyProtectedMember
        if self.exc._work_queue.qsize() > 0:
            return True
        return False

    def _main_loop(self):
        """
        Implementations of Node must define what they actually do when they're
        running. Should only be executed in a thread, and only by
        `Node._start()`.
        """
        raise NotImplementedError

    def _shutdown(self, exception: Optional[Exception] = None):
        """Implementations of Node must define specific shutdown behavior."""
        raise NotImplementedError

    def shutdown(self, exception: Optional[Exception] = None):
        """
        Shut down the Node.

        Args:
            exception: Unhandled Exception that stopped the Node's main loop,
                if any. Should be None if called explicitly or as part of
                a graceful shutdown workflow.
        """
        self.locked = True
        self.state = "shutdown" if exception is None else "crashed"
        self._log(
            "beginning shutdown",
            category="system",
            state=self.state,
            exception=exception,
        )
        self.signals["main"] = 1
        try:
            self._shutdown(exception=exception)
            self._log("completed shutdown")
        except Exception as ex:
            self._log("shutdown exception", exception=ex, category="system")
        self.is_shut_down = True

    def _start(self) -> Optional[Exception]:
        """
        private method to start the Node. should only be executed by
        `Node.start()`.

        Returns:
            Unhandled Exception that stopped the Node's main loop, if any.
                None on intentional shutdown.
        """
        for name, sensor in self.sensors.items():
            self.threads[name] = self.exc.submit(self._sensor_loop, sensor)
        exception = None
        try:
            self._main_loop()
        except Exception as ex:
            exception = ex
        # don't do a double shutdown during a graceful termination
        if self.state not in ("shutdown", "crashed"):
            self.shutdown(exception)
        return exception

    def _is_locked(self):
        return self._lock.locked()

    def _set_locked(self, state: bool):
        if state is True:
            self._lock.acquire(blocking=False)
        elif state is False:
            if self._lock.locked():
                self._lock.release()
        else:
            raise TypeError

    def identify_elements(
        self, element_type: Optional[Literal["actors", "sensors"]] = None
    ) -> dict[str, str]:
        """
        create a dict with names and classes of node's attached elements.
        intended as lightweight monitoring information; used to help format
        state for the `situation` app.

        Args:
            element_type: "actors" or "sensors" to describe only Actors or
                Sensors. if None, describe both.

        Returns:
            dict whose keys are actor and sensor names and whose values are
                the outputs of `element_dict()` called on the
        """
        if element_type is None:
            elements = chain(self.actors.items(), self.sensors.items())
        else:
            elements = getattr(self, element_type).items()
        return element_dict(elements)

    def __str__(self):
        pstring = f"{type(self).__name__} ({self.name})"
        pstring += f"\nthreads:\n"
        pstring += yprint({k: str(t) for k, t in self.threads.items()}, 2)
        pstring += f"\nactors: {list(self.actors)}"
        pstring += f"\nsensors: {list(self.sensors)}"
        pstring += f"\nconfig:\n{yprint(self.config, 2)}"
        return pstring

    def __repr__(self):
        return self.__str__()

    def _log(self, event: Any, **extra_fields: Any):
        """
        construct a JSON object from an event and write it into this Node's
            log file.

        Args:
            event: object to format as JSON object and write to log. this
                function will try to unnest dicts and protobuf
                Messages, and is also very happy with simple objects like
                strings, but will try its best to make a decent JSON
                representation of anything you pass it. nevertheless, large or
                complex objects are less likely to produce good results.
            **extra_fields: extra stuff to write into JSON object. kwarg names
                specify named fields of the object. Special formatting is
                available for Exceptions. These arguments should not be large
                or nested objects.
        """
        exkeys = [
            k for k, v in extra_fields.items() if isinstance(v, Exception)
        ]
        for k in exkeys:
            extra_fields |= exc_report(extra_fields.pop(k))
        logdict = valmap(json_sanitize, {"time": logstamp(3)} | extra_fields)
        if isinstance(event, (dict, Message)):
            # TODO, maybe: still want an event key?
            logdict |= flatten_for_json(event)
        else:
            logdict["event"] = json_sanitize(event)
        with self.logfile.open("a") as stream:
            json.dump(logdict, stream, indent=2)
            stream.write(",\n")

    def _get_config(self):
        """getter for config property"""
        props, params = {}, defaultdict(dict)
        for prop in self.interface:
            try:
                props[prop] = getattr(self, prop)
            except AttributeError:
                props[prop] = "UNINITIALIZED PROPERTY"
        for name, actor_cdict in self.params.items():
            for k, v in filter(lambda kv: kv[1] != (), actor_cdict.items()):
                params[name][k] = self.cdict[name].get(k)
        return {"interface": props, "cdict": dict(params)}

    def _get_n_threads(self):
        """getter for n_threads property"""
        return self._n_threads

    def _set_n_threads(self, n_threads: int):
        """setter for n_threads property"""
        self._n_threads = n_threads
        if self.exc is None:
            return
        self.exc._max_workers = n_threads

    exc: Optional[ThreadPoolExecutor] = None
    """
    shared thread pool for main loop, Sensor loops, some types of Actor 
    execution, and TCP server io and select threads. Note that attached Actors 
    and Sensors that launch threads are not required to launch them in this
    object. Always None if Node not yet started.
    """
    inbox: Optional[Mailbox] = None
    """Optional Mailbox for incoming protobuf Messages."""
    config = property(_get_config)
    """
    dict with items 'interface' and 'cdict', describing settable/gettable 
    properties and configurable parameters respectively
    """
    locked = property(_is_locked, _set_locked)
    """is this Node locked?"""
    __started = False
    threads: Optional[dict[Hashable, Future]] = None
    """
    dict of currently-running or not-yet-cleaned threads running in this 
    Node's `exc` pool. Always None if Node is not yet started.
    """
    server: Optional[TCPTalk] = None
    """optional hostess.station.talkie.TCPTalk server."""
    server_events = None
    state = "stopped"
    """Node state description"""
    _ackcheck: Optional[Callable] = None
    """
    optional function used as `Node.server`'s `ackcheck` argument; can be used
    to implement response spooling, dispatching, etc.
    """
    logfile: Path
    """path to log file"""
_ackcheck = None class-attribute instance-attribute

optional function used as Node.server's ackcheck argument; can be used to implement response spooling, dispatching, etc.

config = property(_get_config) class-attribute instance-attribute

dict with items 'interface' and 'cdict', describing settable/gettable properties and configurable parameters respectively

exc = None class-attribute instance-attribute

shared thread pool for main loop, Sensor loops, some types of Actor execution, and TCP server io and select threads. Note that attached Actors and Sensors that launch threads are not required to launch them in this object. Always None if Node not yet started.

inbox = None class-attribute instance-attribute

Optional Mailbox for incoming protobuf Messages.

locked = property(_is_locked, _set_locked) class-attribute instance-attribute

is this Node locked?

logfile instance-attribute

path to log file

server = None class-attribute instance-attribute

optional hostess.station.talkie.TCPTalk server.

state = 'stopped' class-attribute instance-attribute

Node state description

threads = None class-attribute instance-attribute

dict of currently-running or not-yet-cleaned threads running in this Node's exc pool. Always None if Node is not yet started.

_get_config()

getter for config property

Source code in hostess/station/bases.py
898
899
900
901
902
903
904
905
906
907
908
909
def _get_config(self):
    """getter for config property"""
    props, params = {}, defaultdict(dict)
    for prop in self.interface:
        try:
            props[prop] = getattr(self, prop)
        except AttributeError:
            props[prop] = "UNINITIALIZED PROPERTY"
    for name, actor_cdict in self.params.items():
        for k, v in filter(lambda kv: kv[1] != (), actor_cdict.items()):
            params[name][k] = self.cdict[name].get(k)
    return {"interface": props, "cdict": dict(params)}
_get_n_threads()

getter for n_threads property

Source code in hostess/station/bases.py
911
912
913
def _get_n_threads(self):
    """getter for n_threads property"""
    return self._n_threads
_log(event, **extra_fields)

construct a JSON object from an event and write it into this Node's log file.

Parameters:

Name Type Description Default
event Any

object to format as JSON object and write to log. this function will try to unnest dicts and protobuf Messages, and is also very happy with simple objects like strings, but will try its best to make a decent JSON representation of anything you pass it. nevertheless, large or complex objects are less likely to produce good results.

required
**extra_fields Any

extra stuff to write into JSON object. kwarg names specify named fields of the object. Special formatting is available for Exceptions. These arguments should not be large or nested objects.

{}
Source code in hostess/station/bases.py
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
def _log(self, event: Any, **extra_fields: Any):
    """
    construct a JSON object from an event and write it into this Node's
        log file.

    Args:
        event: object to format as JSON object and write to log. this
            function will try to unnest dicts and protobuf
            Messages, and is also very happy with simple objects like
            strings, but will try its best to make a decent JSON
            representation of anything you pass it. nevertheless, large or
            complex objects are less likely to produce good results.
        **extra_fields: extra stuff to write into JSON object. kwarg names
            specify named fields of the object. Special formatting is
            available for Exceptions. These arguments should not be large
            or nested objects.
    """
    exkeys = [
        k for k, v in extra_fields.items() if isinstance(v, Exception)
    ]
    for k in exkeys:
        extra_fields |= exc_report(extra_fields.pop(k))
    logdict = valmap(json_sanitize, {"time": logstamp(3)} | extra_fields)
    if isinstance(event, (dict, Message)):
        # TODO, maybe: still want an event key?
        logdict |= flatten_for_json(event)
    else:
        logdict["event"] = json_sanitize(event)
    with self.logfile.open("a") as stream:
        json.dump(logdict, stream, indent=2)
        stream.write(",\n")
_main_loop()

Implementations of Node must define what they actually do when they're running. Should only be executed in a thread, and only by Node._start().

Source code in hostess/station/bases.py
762
763
764
765
766
767
768
def _main_loop(self):
    """
    Implementations of Node must define what they actually do when they're
    running. Should only be executed in a thread, and only by
    `Node._start()`.
    """
    raise NotImplementedError
_set_logfile()

concrete subclasses must define rules for constructing log filenames.

Source code in hostess/station/bases.py
698
699
700
701
702
def _set_logfile(self):
    """
    concrete subclasses must define rules for constructing log filenames.
    """
    raise NotImplementedError
_set_n_threads(n_threads)

setter for n_threads property

Source code in hostess/station/bases.py
915
916
917
918
919
920
def _set_n_threads(self, n_threads: int):
    """setter for n_threads property"""
    self._n_threads = n_threads
    if self.exc is None:
        return
    self.exc._max_workers = n_threads
_shutdown(exception=None)

Implementations of Node must define specific shutdown behavior.

Source code in hostess/station/bases.py
770
771
772
def _shutdown(self, exception: Optional[Exception] = None):
    """Implementations of Node must define specific shutdown behavior."""
    raise NotImplementedError
_start()

private method to start the Node. should only be executed by Node.start().

Returns:

Type Description
Optional[Exception]

Unhandled Exception that stopped the Node's main loop, if any. None on intentional shutdown.

Source code in hostess/station/bases.py
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
def _start(self) -> Optional[Exception]:
    """
    private method to start the Node. should only be executed by
    `Node.start()`.

    Returns:
        Unhandled Exception that stopped the Node's main loop, if any.
            None on intentional shutdown.
    """
    for name, sensor in self.sensors.items():
        self.threads[name] = self.exc.submit(self._sensor_loop, sensor)
    exception = None
    try:
        self._main_loop()
    except Exception as ex:
        exception = ex
    # don't do a double shutdown during a graceful termination
    if self.state not in ("shutdown", "crashed"):
        self.shutdown(exception)
    return exception
add_element(cls, name=None)

Instantiate an Actor or Sensor and associate it with this Node.

Parameters:

Name Type Description Default
cls Union[type[Actor], type[Sensor]]

type of Actor or Sensor to instantiate and associate.

required
name Optional[str]

optional name for Actor or Sensor used to identify it in this Node's interface/config. name of its class will be used if not specified, plus a numerical suffix if it would collide with the name of an already-attached element.

None
Source code in hostess/station/bases.py
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
def add_element(
    self, cls: Union[type[Actor], type[Sensor]], name: Optional[str] = None
):
    """
    Instantiate an Actor or Sensor and associate it with this Node.

    Args:
        cls: type of Actor or Sensor to instantiate and associate.
        name: optional name for Actor or Sensor used to identify it in
            this Node's interface/config. name of its class will be used
            if not specified, plus a numerical suffix if it would collide
            with the name of an already-attached element.
    """
    logname = name if name is not None else cls.name
    self._log(
        f"adding element", cls=str(cls), name=logname, category="system"
    )
    super().add_element(cls, name)
    self._log(
        f"added element", cls=str(cls), name=logname, category="system"
    )
busy()

are we too busy to do new stuff?

Source code in hostess/station/bases.py
753
754
755
756
757
758
759
760
def busy(self) -> bool:
    """are we too busy to do new stuff?"""
    # TODO: or maybe explicitly check threads? do we want a free one?
    #  idk
    # noinspection PyProtectedMember
    if self.exc._work_queue.qsize() > 0:
        return True
    return False
identify_elements(element_type=None)

create a dict with names and classes of node's attached elements. intended as lightweight monitoring information; used to help format state for the situation app.

Parameters:

Name Type Description Default
element_type Optional[Literal['actors', 'sensors']]

"actors" or "sensors" to describe only Actors or Sensors. if None, describe both.

None

Returns:

Type Description
dict[str, str]

dict whose keys are actor and sensor names and whose values are the outputs of element_dict() called on the

Source code in hostess/station/bases.py
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
def identify_elements(
    self, element_type: Optional[Literal["actors", "sensors"]] = None
) -> dict[str, str]:
    """
    create a dict with names and classes of node's attached elements.
    intended as lightweight monitoring information; used to help format
    state for the `situation` app.

    Args:
        element_type: "actors" or "sensors" to describe only Actors or
            Sensors. if None, describe both.

    Returns:
        dict whose keys are actor and sensor names and whose values are
            the outputs of `element_dict()` called on the
    """
    if element_type is None:
        elements = chain(self.actors.items(), self.sensors.items())
    else:
        elements = getattr(self, element_type).items()
    return element_dict(elements)
nodeid()

get basic identifying information for this Node.

Returns:

Type Description
dict[str, Union[str, int]]

dict whose keys are "name", "pid", and "host".

Source code in hostess/station/bases.py
718
719
720
721
722
723
724
725
726
727
728
729
def nodeid(self) -> dict[str, Union[str, int]]:
    """
    get basic identifying information for this Node.

    Returns:
        dict whose keys are "name", "pid", and "host".
    """
    return {
        "name": self.name,
        "pid": os.getpid(),
        "host": socket.gethostname(),
    }
restart_server()

(re)start the node's TCPTalk server (if it is supposed to have one).

Source code in hostess/station/bases.py
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
def restart_server(self):
    """
    (re)start the node's TCPTalk server (if it is supposed to have one).
    """
    if self.server is not None:
        self.server["kill"]()
    if self.can_receive is False:
        if (self.host is not None) or (self.port is not None):
            raise TypeError(
                "cannot provide host/port for non-receiving node."
            )
    elif (self.host is None) or (self.port is None):
        raise TypeError("must provide host and port for receiving node.")
    elif self.can_receive is True:
        self.server = TCPTalk(
            self.host,
            self.port,
            ackcheck=self._ackcheck,
            executor=self.exc,
        )
        self.threads |= self.server.threads
        self.inbox = self.server.data
        for ix, sig in self.server.signals.items():
            self.signals[f"server_{ix}"] = sig
    else:
        self.server, self.server_events, self.inbox = None, None, None
shutdown(exception=None)

Shut down the Node.

Parameters:

Name Type Description Default
exception Optional[Exception]

Unhandled Exception that stopped the Node's main loop, if any. Should be None if called explicitly or as part of a graceful shutdown workflow.

None
Source code in hostess/station/bases.py
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
def shutdown(self, exception: Optional[Exception] = None):
    """
    Shut down the Node.

    Args:
        exception: Unhandled Exception that stopped the Node's main loop,
            if any. Should be None if called explicitly or as part of
            a graceful shutdown workflow.
    """
    self.locked = True
    self.state = "shutdown" if exception is None else "crashed"
    self._log(
        "beginning shutdown",
        category="system",
        state=self.state,
        exception=exception,
    )
    self.signals["main"] = 1
    try:
        self._shutdown(exception=exception)
        self._log("completed shutdown")
    except Exception as ex:
        self._log("shutdown exception", exception=ex, category="system")
    self.is_shut_down = True
start()

Start the Node's main loop and, if it is supposed to have one, its TCPTalk server.

Source code in hostess/station/bases.py
704
705
706
707
708
709
710
711
712
713
714
715
716
def start(self):
    """
    Start the Node's main loop and, if it is supposed to have one, its
    TCPTalk server.
    """
    if self.__started is True:
        raise EnvironmentError("Node already started.")
    self._log("starting", category="system")
    self.restart_server()
    self.threads["main"] = self.exc.submit(self._start)
    self.__started = True
    self.state = "nominal"
    self._log("completed start", category="system")

Sensor

Bases: Matcher, ABC

abstract base class for Node elements that 'watch' some data source. semi-autonomous; runs asynchronously from its parent Node and uses its own Actors to watch the data source and decide what to bother the Node about.

Sensors should generally only be instantiated by methods of a parent Node.

Source code in hostess/station/bases.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
class Sensor(Matcher, ABC):
    """
    abstract base class for Node elements that 'watch' some data source.
    semi-autonomous; runs asynchronously from its parent Node and uses its
    own Actors to watch the data source and decide what to bother the Node
    about.

    Sensors should generally only be instantiated by methods of a parent Node.
    """

    def __init__(self):
        super().__init__()
        self.interface = self.interface + self.class_interface
        props, self.actors = [], {}
        checkp = inspect.getfullargspec(self.check).kwonlyargs
        self.config = {"check": {k: None for k in checkp}}
        self.check_params = tuple(checkp)
        self.params = {"check": self.check_params}
        for cls in chain(self.actions, self.loggers):
            self.add_element(cls, cls.name)
        for prop in props:
            setattr(self, *prop)
        self.check = configured(self.check, self.config["check"])
        self.memory = None

    # TODO: perhaps make this less redundant with superclass add_element
    def associate_actor(self, cls: type[Actor], name: Optional[str] = None):
        """
        instantiate an Actor and associate it with this Sensor.

        Args:
            cls: type of Actor to instantiate and associate
            name: optional name for Actor; used to identify it in this Sensor's
                interface and config. If not specified, defaults to the class
                name, suffixed with incrementing numbers if it would collide
                with the name of an already-attached Actor
        """
        name = inc_name(cls.name if name is None else name, self.actors)
        self.actors[name] = cls()
        self.actors[name].owner, self.actors[name].name = self, name
        self.config[name] = self.actors[name].config
        self.params[name] = {
            "match": self.actors[name].params["match"],
            "exec": self.actors[name].params["exec"],
        }
        for prop in self.actors[name].interface:
            self.props.append(
                (f"{name}_{prop}", getattr(self.actors[name], prop))
            )

    def add_element(self, cls: type[Actor], name: Optional[str] = None):
        """
        use special Sensor association behavior, and don't permit Sensor mise
        en abyme.
        """
        if issubclass(cls, Sensor):
            raise TypeError("cannot add Sensors to a Sensor.")
        self.associate_actor(cls, name)

    def _log(self, *args, **kwargs):
        """
        call owning Node's log function. automatically include the fact that
        this Sensor generated the log entry.
        """
        self.owner._log(*args, category="sensor", sensor=self.name, **kwargs)

    def check(self, node: Node, **check_kwargs: Any):
        """
        Main pointy-end function for Sensor.

        Use this Sensor's `checker()` method to look for new events and, if
        there are any, match them against this Sensor's Actors

        Args:
            node: Node to inform about any matching events. In normal
                operation, this argument will never be explicitly passed: it
                will always be this Sensor's owning Node, and will always be
                partially evaluated into this method during `Sensor.__init__`.
            **check_kwargs: kwargs to pass to `self.checker()`. Will also
                never be explicitly passed in normal operation; if there are
                any, they will be automatically taken from the "check" item of
                this Sensor's `config` dict.
        """
        step = "check"  # for error logging
        try:
            self.memory, events = self.checker(self.memory, **check_kwargs)
            for event in events:
                try:
                    step = "match"
                    actors = self.match(event)
                except NoActorForEvent:
                    continue
                for actor in actors:
                    step = f"execute {actor.name}"
                    # kwargs propagate to individual actors via `self.config`
                    actor.execute(node, event)
        except Exception as ex:
            self._log("check failure", step=step, exception=ex)

    def __str__(self):
        pstring = f"{type(self).__name__} ({self.name})\n"
        pstring += f"interface:\n"
        for attr in self.interface:
            pstring += f"    {attr}: {getattr(self, attr)}\n"
        pstring += f"actors: {[a for a in self.actors]}\n"
        pstring += f"config: {yaml.dump(self.config).replace('null', 'None')}"
        return pstring

    def __repr__(self):
        return self.__str__()

    def _get_poll(self) -> Optional[float]:
        return self._poll

    def _set_poll(self, pollrate: float):
        self._poll = pollrate
        self.has_individual_pollrate = True

    def set_poll_nonsticky(self, pollrate: float):
        self._poll = pollrate

    def close(self):
        pass

    poll = property(_get_poll, _set_poll)
    """
    when this `Sensor` is running in a Node's `sensor_loop()` function, this
    sets interval in seconds between subsequent calls to `self.check()`. If
    not set, it defaults to the poll rate of the parent Node. 
    """
    _poll = None
    has_individual_pollrate = False
    base_config = MPt({})
    checker: Callable
    """
    data-fetching function called by `self.check()`. Must be defined in 
    implementations of this class.
    """
    actions: tuple[type[Actor]] = ()
    """
    default Actors associated with this class. `Sensor.__init__()` 
    instantiates and attaches an Actor of each specified type.
    """
    loggers: tuple[type[Actor]] = ()
    """same, but for logging-only Actors."""
    name: str
    class_interface = ("poll",)
    interface = ()
actions = () class-attribute instance-attribute

default Actors associated with this class. Sensor.__init__() instantiates and attaches an Actor of each specified type.

checker instance-attribute

data-fetching function called by self.check(). Must be defined in implementations of this class.

loggers = () class-attribute instance-attribute

same, but for logging-only Actors.

poll = property(_get_poll, _set_poll) class-attribute instance-attribute

when this Sensor is running in a Node's sensor_loop() function, this sets interval in seconds between subsequent calls to self.check(). If not set, it defaults to the poll rate of the parent Node.

_log(*args, **kwargs)

call owning Node's log function. automatically include the fact that this Sensor generated the log entry.

Source code in hostess/station/bases.py
299
300
301
302
303
304
def _log(self, *args, **kwargs):
    """
    call owning Node's log function. automatically include the fact that
    this Sensor generated the log entry.
    """
    self.owner._log(*args, category="sensor", sensor=self.name, **kwargs)
add_element(cls, name=None)

use special Sensor association behavior, and don't permit Sensor mise en abyme.

Source code in hostess/station/bases.py
290
291
292
293
294
295
296
297
def add_element(self, cls: type[Actor], name: Optional[str] = None):
    """
    use special Sensor association behavior, and don't permit Sensor mise
    en abyme.
    """
    if issubclass(cls, Sensor):
        raise TypeError("cannot add Sensors to a Sensor.")
    self.associate_actor(cls, name)
associate_actor(cls, name=None)

instantiate an Actor and associate it with this Sensor.

Parameters:

Name Type Description Default
cls type[Actor]

type of Actor to instantiate and associate

required
name Optional[str]

optional name for Actor; used to identify it in this Sensor's interface and config. If not specified, defaults to the class name, suffixed with incrementing numbers if it would collide with the name of an already-attached Actor

None
Source code in hostess/station/bases.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
def associate_actor(self, cls: type[Actor], name: Optional[str] = None):
    """
    instantiate an Actor and associate it with this Sensor.

    Args:
        cls: type of Actor to instantiate and associate
        name: optional name for Actor; used to identify it in this Sensor's
            interface and config. If not specified, defaults to the class
            name, suffixed with incrementing numbers if it would collide
            with the name of an already-attached Actor
    """
    name = inc_name(cls.name if name is None else name, self.actors)
    self.actors[name] = cls()
    self.actors[name].owner, self.actors[name].name = self, name
    self.config[name] = self.actors[name].config
    self.params[name] = {
        "match": self.actors[name].params["match"],
        "exec": self.actors[name].params["exec"],
    }
    for prop in self.actors[name].interface:
        self.props.append(
            (f"{name}_{prop}", getattr(self.actors[name], prop))
        )
check(node, **check_kwargs)

Main pointy-end function for Sensor.

Use this Sensor's checker() method to look for new events and, if there are any, match them against this Sensor's Actors

Parameters:

Name Type Description Default
node Node

Node to inform about any matching events. In normal operation, this argument will never be explicitly passed: it will always be this Sensor's owning Node, and will always be partially evaluated into this method during Sensor.__init__.

required
**check_kwargs Any

kwargs to pass to self.checker(). Will also never be explicitly passed in normal operation; if there are any, they will be automatically taken from the "check" item of this Sensor's config dict.

{}
Source code in hostess/station/bases.py
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
def check(self, node: Node, **check_kwargs: Any):
    """
    Main pointy-end function for Sensor.

    Use this Sensor's `checker()` method to look for new events and, if
    there are any, match them against this Sensor's Actors

    Args:
        node: Node to inform about any matching events. In normal
            operation, this argument will never be explicitly passed: it
            will always be this Sensor's owning Node, and will always be
            partially evaluated into this method during `Sensor.__init__`.
        **check_kwargs: kwargs to pass to `self.checker()`. Will also
            never be explicitly passed in normal operation; if there are
            any, they will be automatically taken from the "check" item of
            this Sensor's `config` dict.
    """
    step = "check"  # for error logging
    try:
        self.memory, events = self.checker(self.memory, **check_kwargs)
        for event in events:
            try:
                step = "match"
                actors = self.match(event)
            except NoActorForEvent:
                continue
            for actor in actors:
                step = f"execute {actor.name}"
                # kwargs propagate to individual actors via `self.config`
                actor.execute(node, event)
    except Exception as ex:
        self._log("check failure", step=step, exception=ex)

element_dict(elements)

Actor/Sensor title formatter for identify_elements() or similar introspection methods.

Source code in hostess/station/bases.py
585
586
587
588
589
590
591
592
593
def element_dict(elements: Collection[Union[Actor, Sensor]]) -> dict[str, str]:
    """
    Actor/Sensor title formatter for `identify_elements()` or similar
    introspection methods.
    """
    return {
        k: f"{v.__class__.__module__}.{v.__class__.__name__}"
        for k, v in elements
    }

inc_name(name, config)

If a string would duplicate an existing key of a dictionary, add a numerical suffix to it to prevent collisions.

Parameters:

Name Type Description Default
name str

key caller would like to add to config

required
config Mapping[str]

mapping caller would like to use name as a key in

required

Returns:

Type Description
str

name if it does not duplicate any key of config; name with a unique-within-keys-of-config numerical suffix if it does.

Source code in hostess/station/bases.py
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
def inc_name(name: str, config: Mapping[str]) -> str:
    """
    If a string would duplicate an existing key of a dictionary, add a
    numerical suffix to it to prevent collisions.

    Args:
        name: key caller would like to add to `config`
        config: mapping caller would like to use `name` as a key in

    Returns:
        `name` if it does not duplicate any key of `config`; `name` with a
            unique-within-keys-of-config numerical suffix if it does.
    """
    if name in config:
        matches = filter(lambda k: re.match(name, k), config)
        name = f"{name}_{len(tuple(matches)) + 1}"
    return name

validate_instruction(instruction)

First-pass Instruction validation function. Called by Delegates on Instruction receipt; can also be used as an independent validator.

Parameters:

Name Type Description Default
instruction Message

Instruction to validate.

required

Raises:

Type Description
NoInstructionType

if instruction does not have a defined type.

NoConfigError

if a 'configure' Instruction does not specify a config.

NoTaskError

if a 'do' instruction does not specify a task to perform.

Source code in hostess/station/bases.py
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
def validate_instruction(instruction: Message):
    """
    First-pass Instruction validation function. Called by Delegates on
        Instruction receipt; can also be used as an independent validator.

    Args:
        instruction: Instruction to validate.

    Raises:
        NoInstructionType: if `instruction` does not have a defined type.
        NoConfigError: if a 'configure' Instruction does not specify a config.
        NoTaskError: if a 'do' instruction does not specify a task to perform.
    """
    if enum(instruction, "type") == "unknowninst":
        raise NoInstructionType
    if enum(instruction, "type") == "configure" and instruction.config is None:
        raise NoConfigError
    if enum(instruction, "type") == "do" and not instruction.HasField("task"):
        raise NoTaskError

station.comm

simple, robust protocol for messaging and serialized data transfer.

CODE_TO_MTYPE = MPt({0: 'none', 1: 'Update', 2: 'Instruction', 3: 'PythonObject'}) module-attribute

one-byte-wide codes for Message type of comm body. "none" means the comm body is not a serialized protobuf Message.

HEADER_STRUCT = struct.Struct('<8sBL') module-attribute

struct specification for hostess comm header.

HOSTESS_ACK = b'\x06hostess' module-attribute

hostess acknowledgement code

HOSTESS_EOM = b'\x03hostess' module-attribute

hostess end-of-message code

HOSTESS_SOH = b'\x01hostess' module-attribute

hostess start-of-header code

make_comm(body)

create a hostess comm from a buffer or a protobuf Message.

Parameters:

Name Type Description Default
body Union[bytes, Message]

byte string or hostess Message to use as comm body

required

Returns:

Type Description
bytes

hostess comm as bytes

Source code in hostess/station/comm.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def make_comm(body: Union[bytes, Message]) -> bytes:
    """
    create a hostess comm from a buffer or a protobuf Message.

    Args:
        body: byte string or hostess Message to use as comm body

    Returns:
        hostess comm as `bytes`
    """
    if hasattr(body, "SerializePartialToString"):
        # i.e., it's a protobuf Message
        buf, mtype = body.SerializePartialToString(), body.__class__.__name__
    else:
        buf, mtype = body, "none"
    return (
        HEADER_STRUCT.pack(
            HOSTESS_SOH, MTYPE_TO_CODE[mtype], len(buf) + WRAPPER_SIZE
        )
        + buf
        + HOSTESS_EOM
    )

read_comm(buffer, unpack_proto=False)

read a hostess comm from a byte string. if the comm's header says its body contains a hostess Message protobuf, attempt to decode it as a Message.

Parameters:

Name Type Description Default
buffer bytes

bytes object comprising a hostess comm.

required
unpack_proto bool

if True and the comm contains a protobuf, unpack it into a dictionary rather than returning a 'raw' Message.

False

Returns:

Type Description
dict[str, Union[dict, bytes, Message, str]]

a dict containing the decoded header, the (possibly decoded) body,

dict[str, Union[dict, bytes, Message, str]]

and any errors.

Source code in hostess/station/comm.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def read_comm(
    buffer: bytes, unpack_proto: bool = False
) -> dict[str, Union[dict, bytes, Message, str]]:
    """
    read a hostess comm from a byte string. if the comm's header says its body
    contains a hostess Message protobuf, attempt to decode it as a Message.

    Args:
        buffer: `bytes` object comprising a hostess comm.
        unpack_proto: if True and the comm contains a protobuf, unpack it
            into a dictionary rather than returning a 'raw' Message.

    Returns:
        a dict containing the decoded header, the (possibly decoded) body,
        and any errors.
    """
    try:
        header = read_header(buffer[: HEADER_STRUCT.size])
    except IOError:
        return {"header": None, "body": buffer, "err": "header"}
    err, body = [], buffer[HEADER_STRUCT.size:]
    if body.endswith(HOSTESS_EOM):
        body = body[: -len(HOSTESS_EOM)]
    if len(buffer) != header["length"]:
        err.append("length")
    if header["mtype"] == "none":
        return {"header": header, "body": body, "err": ";".join(err)}
    try:
        # the value of the 'mtype' key should correspond to a hostess.station
        # protocol buffer class
        message_class = getattr(hostess_proto, header["mtype"])
        message: Message = message_class.FromString(body)
    except AttributeError:
        err.append("mtype")
        return {"header": header, "body": body, "err": ";".join(err)}
    except DecodeError:
        err.append("protobuf decode")
        return {"header": header, "body": body, "err": ";".join(err)}
    if unpack_proto is True:
        message = m2d(message)
    return {"header": header, "body": message, "err": ";".join(err)}

read_header(buffer)

read a hostess header from the first 13 bytes of buffer.

Parameters:

Name Type Description Default
buffer bytes

a bytes buffer containing a hostess comm

required

Returns:

Type Description
dict[str, Union[str, bool, int]]

dict with keys: "mtype": name of body's hostess Message type as given in header; "none" if the header says the body is not a serialized Message "length": body length as given in header

Source code in hostess/station/comm.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def read_header(buffer: bytes) -> dict[str, Union[str, bool, int]]:
    """
    read a hostess header from the first 13 bytes of `buffer`.

    Args:
        buffer: a `bytes` buffer containing a hostess comm

    Returns:
        dict with keys:
            "mtype": name of body's hostess Message type as given in header;
                "none" if the header says the body is not a serialized Message
            "length": body length as given in header
    """
    try:
        unpacked = HEADER_STRUCT.unpack(buffer[:13])
        assert buffer[:8] == HOSTESS_SOH
        try:
            mtype = CODE_TO_MTYPE[unpacked[1]]
        except KeyError:
            mtype = "invalid message type"
        return {"mtype": mtype, "length": unpacked[2]}
    except (struct.error, AssertionError):
        raise IOError("invalid hostess header")

station.delegates

Delegate

Bases: Node

Source code in hostess/station/delegates.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
class Delegate(bases.Node):
    def __init__(
        self,
        station_address: tuple[str, int],
        name: str,
        elements: tuple[Union[type[bases.Sensor], type[bases.Actor]]] = (),
        n_threads: int = 4,
        poll: float = 0.08,
        timeout: int = 10,
        update_interval: float = 10,
        start: bool = False,
        loginfo: Optional[Mapping[str]] = MPt({}),
        _is_process_owner: bool = False,
    ):
        """
        configurable remote processor for hostess network. can gather data
        and/or execute actions based on the elements attached to it. should
        typically be instantiated via the launch_delegate() method of the
        supervising Station.

        Args:
            station_address: (hostname, port) of supervising Station
            name: identifying name for delegate
            n_threads: max threads in executor
            elements: Sensors or Actors to add to delegate at creation.
            poll: delay, in seconds, for polling loops
            timeout: timeout, in s, for intra-hostess communications
            update_interval: interval, in s, for check-in Updates to
                supervising Station
        """
        super().__init__(
            name=name,
            n_threads=n_threads,
            elements=elements,
            start=start,
            poll=poll,
            timeout=timeout,
            _is_process_owner=_is_process_owner,
            logdir=loginfo.get("logdir", GENERIC_LOGINFO["logdir"]),
            loginfo=loginfo,
            station=station_address,
        )
        self.update_interval = update_interval
        self.actionable_events, self.infocount = [], defaultdict(int)
        self.actions = {}
        self.instruction_queue = []
        self.update_timer, self.reset_update_timer = timeout_factory(False)
        # TODO: add local hostname of delegate
        self.init_params = {
            "n_threads": n_threads,
            "poll": poll,
            "timeout": timeout,
            "logdir": self.logdir,
            "logfile": self.logfile,
            "update_interval": update_interval,
            "_is_process_owner": _is_process_owner,
        }

    def _set_logfile(self):
        """internal function to set path to log file."""
        self.logfile = Path(
            self.logdir,
            f"{self.loginfo.get('init_time', self.init_time)}_{self.name}_"
            f"{self.station[0]}_{self.station[1]}.log",
        )

    def _sensor_loop(
        self, sensor: bases.Sensor
    ) -> dict[str, Union[str, Optional[int], Optional[Exception]]]:
        """
        continuously check a Sensor. this function must be launched in its
        own thread or it will block and be useless. NOTE: should only be
        called from _start().

        Args:
            sensor: Sensor to poll.

        Returns:
            dict with keys:
                name: name of sensor
                signal: signal sent to terminate this function (if any)
                exception: exception that terminated this function (if any)
        """
        exception = None
        try:
            while self.signals.get(sensor.name) is None:
                # noinspection PyPropertyAccess
                if not self.locked:
                    sensor.check(self)
                time.sleep(sensor.poll)
        except Exception as ex:
            exception = ex
        finally:
            sensor.close()
            return {
                "name": sensor.name,
                "signal": self.signals.get(sensor.name),
                "exception": exception,
            }

    def check_on_action(
        self, instruction_id: int
    ) -> tuple[Optional[Exception], bool]:
        """
        check whether one of this delegate's Actions completed. if it crashed,
        set its status and exception keys appropriately in this delegate's
        `actions` dict. typically called as part of the main Delegate loop,
        specifically from _check_on_actions().

        Args:
            instruction_id: numerical identifier of Action to check.

        Returns:
            exception: None if the Action terminated successfully or hasn't yet
                terminated; the Exception the Action raised if it didn't
                terminate successfully.
            done: True if the Action has terminated; False if not.
        """
        try:
            self.threads[f"Instruction_{instruction_id}"].result(0)
        except TimeoutError:
            return None, True
        except Exception as ex:
            # action crashed without setting its status as such
            self.actions[instruction_id]["status"] = "crash"
            self.actions[instruction_id]["exception"] = ex
            return ex, False
        # an action wrapped in @reported will catch exceptions and do this
        # politely instead of crashing as above
        if self.actions[instruction_id].get("exception") is not None:
            self.actions[instruction_id]["status"] = "crash"
            return self.actions[instruction_id]["exception"], False
        return None, False

    def _check_actions(self):
        """
        check running actions (threads launched as part of a 'do'
        instruction). if any have crashed or completed, log them and report
        them to the Station, then remove them from the thread cache.
        """
        acts_to_clean, threads_to_clean = [], []
        # this runs asynchronously so iterating over bare .items() is unstable
        items = tuple(self.actions.items())
        for instruction_id, action in items:
            # TODO: multistep "pipeline" case
            exception, running = self.check_on_action(instruction_id)
            if running is True:
                continue
            # TODO: accomplish this with a wrapper
            if exception is not None:
                self._log(
                    action,
                    exception=exception,
                    status="failed",
                    category="action",
                )
            else:
                self._log(action, status="completed", category="action")
            # TODO: determine if we should reset update timer here
            response = self._report_on_action(action)
            # TODO: error handling
            if response not in ("connection refused", "err", "timeout"):
                # i.e., try again later
                acts_to_clean.append(instruction_id)
                threads_to_clean.append(f"Instruction_{instruction_id}")
        for target in acts_to_clean:
            self.actions.pop(target)
        for target in threads_to_clean:
            self.threads.pop(target)

    def _send_info(self):
        """
        construct an Update based on everything in the actionable_events
        cache and send it to the Station, then clear actionable_events.
        """
        message = self._base_message(reason="info")
        # TODO: this might want to be more sophisticated
        # TODO: arbitrary max number
        max_notes, info = 5, []
        for i in range(len(self.actionable_events)):
            info.append(self.actionable_events.pop())
            if i == max_notes - 1:
                break
        self._log(
            "sending info", info=info, category="comms", direction="send"
        )
        message.MergeFrom(pro.Update(info=[pack_obj(i) for i in info]))
        response = self.talk_to_station(message)
        # TODO: perhaps there's a better way to track this 'outbox'...
        #  but I'd rather not do it with a mailbox object, I want it to be
        #  quicker/more ephemeral
        if response in ("err", "connection refused", "timeout"):
            self.actionable_events += info

    def _main_loop(self):
        while self.signals.get("main") is None:
            # TODO: lockouts might be overly strict. we'll see
            # report actionable events (appended to actionable_events by
            # Sensors) to Station
            if (len(self.actionable_events) > 0) and (not self.locked):
                self.locked = True
                self._send_info()
                self.locked = False
            # clean up and report on completed / crashed actions
            if not self.locked:
                self._check_actions()
            # periodically check in with Station
            if self.update_timer() >= self.update_interval:
                if ("check_in" not in self.threads) and (not self.locked):
                    self._check_in()
            # TODO: launch sensors that were dynamically added; relaunch
            #  failed sensor threads
            # act on any Instructions received from Station
            if (len(self.instruction_queue) > 0) and (not self.locked):
                self.locked = True
                self._handle_instruction(self.instruction_queue.pop())
                self.locked = False
            time.sleep(self.poll)

    def _shutdown(self, exception: Optional[Exception] = None):
        """
        internal shutdown handler.

        Args:
            exception: Exception that terminated Delegate's main loop, if any.
                should be None on a 'graceful' shutdown.
        """
        # divorce oneself from actors and acts, from events and instructions
        self.actions, self.actionable_events = {}, []
        # TODO, maybe: try to kill child processes (can't in general kill
        #  threads but sys.exit should handle it)
        # signal sensors to shut down
        for k in self.threads.keys():
            self.signals[k] = 1
        # goodbye to all that
        self.instruction_queue, self.actors, self.sensors = [], {}, {}
        try:
            self.threads["exit_report"] = self.exc.submit(
                self._send_exit_report, exception
            )
        except Exception as ex:
            self._log("exit report failed", exception=ex, category="system")
        # wait to send exit report
        if "exit_report" in self.threads:
            while self.threads["exit_report"].running():
                time.sleep(0.1)

    def _send_exit_report(self, exception: Optional[Exception] = None):
        """
        send Update to Station informing it that Delegate is exiting (and why).

        Args:
            exception: unhandled Exception that caused Delegate's main loop
                to exit. None on intentional shutdown.
        """
        self.state = "crashed" if exception is not None else "shutdown"
        msg = self._base_message(reason="exiting")
        if exception is not None:
            try:
                info = pro.Update(
                    info=[pack_obj(exc_report(exception), "exception")]
                )
            except Exception as ex:
                info = pro.Update(info=[pack_obj(exc_report(ex), "exception")])
            msg.MergeFrom(info)
        self.talk_to_station(msg)

    def _report_on_action(self, action: dict):
        """
        report to Station on completed/failed action. should only be called as
        part of the main loop, specifically from _check_on_actions().

        Args:
            action: a value of this delegate's `actions` dict.
        """
        msg = self._base_message(
            completed=task_msg(action), reason="completion"
        )
        # TODO: multi-step case
        return self.talk_to_station(msg)

    def _check_in(self):
        """send heartbeat Update to the Station."""
        self.talk_to_station(self._base_message(reason="heartbeat"))
        self.reset_update_timer()

    def _match_task_instruction(
        self, instruction: pro.Instruction
    ) -> list[bases.Actor]:
        """
        wrapper for self.match that specifically checks for actors that can
        execute a task described in an Instruction.

        Args:
            instruction: Instruction containing a task.

        Returns:
            list of this Delegate's Actors that match the Instruction.

        Raises:
            NoActorForEvent: if none of this Delegate's Actors match.
        """
        try:
            return self.match(instruction, "action")
        except StopIteration:
            raise bases.NoActorForEvent(
                str(self.explain_match(instruction, "action"))
            )

    def _configure_from_instruction(self, instruction: Message):
        cp_for_log, cd_for_log = {}, {}
        for param in instruction.config:
            if enum(param, "paramtype") == "config_property":
                try:
                    unpacked = unpack_obj(param.value)
                    setattr(self, param.value.name, unpacked)
                    cp_for_log[param.value.name] = unpacked
                except ConsumedAttributeError as cae:
                    self._log(
                        "error setting interface property",
                        name=param.value.name,
                        category="system",
                        exception=cae,
                    )
                    raise bases.DoNotUnderstand(
                        f"error setting property: {cae}"
                    )
                except AttributeError:
                    self._log(
                        "missing requested interface property",
                        name=param.value.name,
                        category="system",
                    )
                    raise bases.DoNotUnderstand(
                        f"no property {param.value.name}"
                    )
            elif enum(param, "paramtype") == "config_dict":
                unpacked = unpack_obj(param.value)
                self.cdict = rmerge(self.cdict, unpacked)
                cd_for_log |= unpacked
            else:
                raise bases.DoNotUnderstand("unknown ConfigParamType")
        self._log(
            "configured from instruction",
            category="system",
            configuration=(cp_for_log | cd_for_log),
        )

    def _handle_instruction(self, instruction: pro.Instruction):
        """
        interpret, reply to, and execute (if relevant) an Instruction. should
        only be called as part of the main loop.

        Args:
            instruction: Instruction received from Station.
        """
        status, err = "wilco", None
        try:
            bases.validate_instruction(instruction)
            # TODO: this might be too verbose
            self._log(
                "received instruction",
                content=instruction,
                category="comms",
                direction="recv",
            )
            if enum(instruction, "type") == "configure":
                self._configure_from_instruction(instruction)
            # TODO, maybe: different kill behavior.
            elif enum(instruction, "type") in ("stop", "kill"):
                # this occurs synchronously so move it to the finally block
                pass
            elif enum(instruction, "type") == "do":
                self.execute_do_instruction(instruction)
            else:
                raise bases.DoNotUnderstand(
                    f"unknown instruction type {enum(instruction, 'type')}"
                )
        except bases.DoNotUnderstand as dne:
            status = "bad_request"
            if enum(instruction, "type") == "do":
                err = self.explain_match(instruction, "action")
            else:
                err = dne
        finally:
            # don't duplicate exit report behavior
            if enum(instruction, "type") in ("stop", "kill"):
                return self.shutdown()
            # otherwise send wilco or bad_request reply
            # noinspection PyTypeChecker
            self._reply_to_instruction(instruction, status, err)

    def _execute_task_with_actors(
        self,
        actors: Sequence[bases.Actor],
        instruction: pro.Instruction,
        key: Optional[Hashable],
        noid: bool,
    ):
        """
        helper function for execute_do_instruction(). run each matching Actor
        in sequence.

        Args:
            actors: matching actors (output of _match_task_instruction())
            instruction: "do" Instruction
            key: instruction id or randomly-generated key
            noid: True if the instruction didn't come with an id (should never
                happen), False normally
        """
        for actor in actors:
            actor.execute(self, instruction, key=key, noid=noid)

    def execute_do_instruction(self, instruction: pro.Instruction):
        """
        identify matching Actors and execute a "do" Instruction (an
        Instruction specifying an action). typically called from the
        _handle_instruction() workflow.

        Args:
            instruction: Instruction to match and execute.
        """
        # this will raise NoActorForEvent if none match
        actors = self._match_task_instruction(instruction)
        if instruction.id is None:
            # this should really never happen, but...
            key, noid, noid_infix = (
                random.randint(0, int(1e7)),
                True,
                "noid_",
            )
        else:
            key, noid, noid_infix = instruction.id, False, ""
        threadname = f"Instruction_{noid_infix}{key}"
        # TODO: this could get sticky for the multi-step case
        self.threads[threadname] = self.exc.submit(
            self._execute_task_with_actors, actors, instruction, key, noid
        )

    def _trysend(self, message: Message):
        """
        try to send a message to the Station. Sleep if it doesn't work --
        or if we're shut down, just assume the Station is dead and leave.

        args:
            message: Message to send to station.
        """
        response, was_locked, timeout_counter = None, self.locked, count()
        self.locked = True
        while response in (None, "timeout", "connection refused"):
            # if we couldn't get to the Station, log that fact, wait, and
            # retry. lock self while this is happening to ensure we don't do
            # this in big pulses.
            if response in ("timeout", "connection refused"):
                if next(timeout_counter) % 10 == 0:
                    if self.state == "stopped":
                        self._log(
                            "no response from station, completing termination",
                            category="comms",
                        )
                        self.locked = False  # TODO: sure about this?
                        return "timeout"
                    self._log(response, category="comms", direction="recv")
                # TODO, maybe: this could be a separate attribute
                time.sleep(self.update_interval)
            response, _ = stsend(self._insert_state(message), *self.station)
        # if we locked ourselves due to bad responses, and we weren't already
        # locked for some reason -- like we often will have been if sending
        # a task report or something -- unlock ourselves.
        if was_locked is False:
            self.locked = False
        return response

    def _interpret_response(self, response: bytes) -> str:
        """
        interpret a response from the Station. If it contains an Instruction,
        append it to this Delegate's instruction_queue.

        Args:
            response: bytes containing a hostess com received from the Station.

        Returns:
            "err" if the comm failed to decode properly, "ok" if the comm
                decoded properly but did not contain an Instruction (like a
                simple acknowledgment comm), "instruction" if the comm
                contained an Instruction.
        """
        decoded = read_comm(response)
        if isinstance(decoded, dict):
            if decoded["err"]:
                # TODO: log
                return "err"
            decoded = decoded["body"]
        if isinstance(decoded, pro.Instruction):
            self.instruction_queue.append(decoded)
            return "instruction"
        return "ok"

    def talk_to_station(self, message: pro.Message) -> Union[str, bytes]:
        """
        send a Message to the Station and queue any returned Instruction.

        Args:
            message: hostess protobuf Message to send to the Station

        Returns:
             status code for exchange: "ok" if successful but no Instruction
                  received (simple acknowledgement comm received);
                  "timeout" or "connection refused" for failed connections;
                  "err" for receipt of bytes we could not decode as a comm;
                  "instruction" if successful and comm contained an Instruction
        """
        response = self._trysend(message)
        if response not in ("timeout", "connection refused"):
            response = self._interpret_response(response)
        if response in ("err", "timeout", "connection_refused"):
            self._log(
                message, status=response, category="comms", direction="recv"
            )
        return response

    def _running_actions_message(self) -> list[pro.TaskReport]:
        """
        helper function for constructing Updates.

        Returns:
            list of TaskReport Messages, one for each currently-running action.
        """
        running = filter(
            lambda a: a.get("status") == "running", self.actions.values()
        )
        return list(map(task_msg, running))

    def _base_message(
        self, **fields: Union[Message, str, Sequence[Message], int]
    ) -> pro.Update:
        """
        construct a basic Update message.

        Args:
            **fields: dict of Update field names + values to add to the base
                Update.

        Returns:
            a pro.Update message suitable for sending to the Station. Contains
                delegate id, timestamp, delegate state, running actions, and
                anything passed in **fields.
        """
        # noinspection PyProtectedMember
        return pro.Update(
            delegateid=self.nodeid(),
            time=make_timestamp(),
            state=pro.DelegateState(
                status=self.state,
                # TODO: loc assignment
                loc="primary",
                can_receive=False,
                busy=self.busy(),
                threads={k: v._state for k, v in self.threads.items()},
            ),
            running=self._running_actions_message(),
            **fields,
        )

    # TODO: untangle this + _base_message() workflow
    def _insert_state(self, message: pro.Update) -> pro.Update:
        """
        insert the Delegate's current state information into an Update.

        Called immediately before every attempt to send an Update to the
        Station.

        Args:
            message: Update to update.

        Returns:
            the updated Update.
        """
        state = pro.DelegateState(
            interface=pack_obj(self.config["interface"]),
            cdict=pack_obj(self.config["cdict"]),
            actors=self.identify_elements("actors"),
            sensors=self.identify_elements("sensors"),
            infocount=dict(self.infocount),
            init_params=pack_obj(self.init_params),
        )
        message.state.MergeFrom(state)
        return message

    def _reply_to_instruction(
        self,
        instruction: pro.Instruction,
        status: Literal["bad_request", "wilco"],
        err: Optional[Any] = None,
    ):
        """
        send a reply Update to an Instruction informing the Station that we
        will or won't do the thing.

        Args:
            instruction: received Instruction
            status: "wilco" if we'll do it, "bad_request" if we won't/can't
            err: Object -- a code or Exception, usually -- explaining a
                "bad_request" status. None if status is "wilco".
        """
        msg = self._base_message()
        msg.MergeFrom(pro.Update(reason=status, instruction_id=instruction.id))
        # TODO, maybe: kinda messy?
        if err is not None:
            msg.MergeFrom(pro.Update(info=[pack_obj(err)]))
        self.talk_to_station(msg)

    def add_actionable_event(
        self, event: Any, category: Optional[Union[str, Sensor]] = None
    ):
        """
        Queue an actionable event, usually received from a Sensor, for
        transmission to the Station. This method is most often called by an
        Actor.

        Args:
            event: object we'd like Station to know about
            category: optional label for type of event or originating Sensor,
                used to update self.infocount
        """
        if isinstance(category, Sensor):
            category = category.name
        if category is not None:
            self.infocount[category] += 1
        self.actionable_events.append(event)

    station: tuple[str, int]
    loginfo: Mapping[str]
__init__(station_address, name, elements=(), n_threads=4, poll=0.08, timeout=10, update_interval=10, start=False, loginfo=MPt({}), _is_process_owner=False)

configurable remote processor for hostess network. can gather data and/or execute actions based on the elements attached to it. should typically be instantiated via the launch_delegate() method of the supervising Station.

Parameters:

Name Type Description Default
station_address tuple[str, int]

(hostname, port) of supervising Station

required
name str

identifying name for delegate

required
n_threads int

max threads in executor

4
elements tuple[Union[type[Sensor], type[Actor]]]

Sensors or Actors to add to delegate at creation.

()
poll float

delay, in seconds, for polling loops

0.08
timeout int

timeout, in s, for intra-hostess communications

10
update_interval float

interval, in s, for check-in Updates to supervising Station

10
Source code in hostess/station/delegates.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def __init__(
    self,
    station_address: tuple[str, int],
    name: str,
    elements: tuple[Union[type[bases.Sensor], type[bases.Actor]]] = (),
    n_threads: int = 4,
    poll: float = 0.08,
    timeout: int = 10,
    update_interval: float = 10,
    start: bool = False,
    loginfo: Optional[Mapping[str]] = MPt({}),
    _is_process_owner: bool = False,
):
    """
    configurable remote processor for hostess network. can gather data
    and/or execute actions based on the elements attached to it. should
    typically be instantiated via the launch_delegate() method of the
    supervising Station.

    Args:
        station_address: (hostname, port) of supervising Station
        name: identifying name for delegate
        n_threads: max threads in executor
        elements: Sensors or Actors to add to delegate at creation.
        poll: delay, in seconds, for polling loops
        timeout: timeout, in s, for intra-hostess communications
        update_interval: interval, in s, for check-in Updates to
            supervising Station
    """
    super().__init__(
        name=name,
        n_threads=n_threads,
        elements=elements,
        start=start,
        poll=poll,
        timeout=timeout,
        _is_process_owner=_is_process_owner,
        logdir=loginfo.get("logdir", GENERIC_LOGINFO["logdir"]),
        loginfo=loginfo,
        station=station_address,
    )
    self.update_interval = update_interval
    self.actionable_events, self.infocount = [], defaultdict(int)
    self.actions = {}
    self.instruction_queue = []
    self.update_timer, self.reset_update_timer = timeout_factory(False)
    # TODO: add local hostname of delegate
    self.init_params = {
        "n_threads": n_threads,
        "poll": poll,
        "timeout": timeout,
        "logdir": self.logdir,
        "logfile": self.logfile,
        "update_interval": update_interval,
        "_is_process_owner": _is_process_owner,
    }
_base_message(**fields)

construct a basic Update message.

Parameters:

Name Type Description Default
**fields Union[Message, str, Sequence[Message], int]

dict of Update field names + values to add to the base Update.

{}

Returns:

Type Description
Update

a pro.Update message suitable for sending to the Station. Contains delegate id, timestamp, delegate state, running actions, and anything passed in **fields.

Source code in hostess/station/delegates.py
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
def _base_message(
    self, **fields: Union[Message, str, Sequence[Message], int]
) -> pro.Update:
    """
    construct a basic Update message.

    Args:
        **fields: dict of Update field names + values to add to the base
            Update.

    Returns:
        a pro.Update message suitable for sending to the Station. Contains
            delegate id, timestamp, delegate state, running actions, and
            anything passed in **fields.
    """
    # noinspection PyProtectedMember
    return pro.Update(
        delegateid=self.nodeid(),
        time=make_timestamp(),
        state=pro.DelegateState(
            status=self.state,
            # TODO: loc assignment
            loc="primary",
            can_receive=False,
            busy=self.busy(),
            threads={k: v._state for k, v in self.threads.items()},
        ),
        running=self._running_actions_message(),
        **fields,
    )
_check_actions()

check running actions (threads launched as part of a 'do' instruction). if any have crashed or completed, log them and report them to the Station, then remove them from the thread cache.

Source code in hostess/station/delegates.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def _check_actions(self):
    """
    check running actions (threads launched as part of a 'do'
    instruction). if any have crashed or completed, log them and report
    them to the Station, then remove them from the thread cache.
    """
    acts_to_clean, threads_to_clean = [], []
    # this runs asynchronously so iterating over bare .items() is unstable
    items = tuple(self.actions.items())
    for instruction_id, action in items:
        # TODO: multistep "pipeline" case
        exception, running = self.check_on_action(instruction_id)
        if running is True:
            continue
        # TODO: accomplish this with a wrapper
        if exception is not None:
            self._log(
                action,
                exception=exception,
                status="failed",
                category="action",
            )
        else:
            self._log(action, status="completed", category="action")
        # TODO: determine if we should reset update timer here
        response = self._report_on_action(action)
        # TODO: error handling
        if response not in ("connection refused", "err", "timeout"):
            # i.e., try again later
            acts_to_clean.append(instruction_id)
            threads_to_clean.append(f"Instruction_{instruction_id}")
    for target in acts_to_clean:
        self.actions.pop(target)
    for target in threads_to_clean:
        self.threads.pop(target)
_check_in()

send heartbeat Update to the Station.

Source code in hostess/station/delegates.py
318
319
320
321
def _check_in(self):
    """send heartbeat Update to the Station."""
    self.talk_to_station(self._base_message(reason="heartbeat"))
    self.reset_update_timer()
_execute_task_with_actors(actors, instruction, key, noid)

helper function for execute_do_instruction(). run each matching Actor in sequence.

Parameters:

Name Type Description Default
actors Sequence[Actor]

matching actors (output of _match_task_instruction())

required
instruction Instruction

"do" Instruction

required
key Optional[Hashable]

instruction id or randomly-generated key

required
noid bool

True if the instruction didn't come with an id (should never happen), False normally

required
Source code in hostess/station/delegates.py
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
def _execute_task_with_actors(
    self,
    actors: Sequence[bases.Actor],
    instruction: pro.Instruction,
    key: Optional[Hashable],
    noid: bool,
):
    """
    helper function for execute_do_instruction(). run each matching Actor
    in sequence.

    Args:
        actors: matching actors (output of _match_task_instruction())
        instruction: "do" Instruction
        key: instruction id or randomly-generated key
        noid: True if the instruction didn't come with an id (should never
            happen), False normally
    """
    for actor in actors:
        actor.execute(self, instruction, key=key, noid=noid)
_handle_instruction(instruction)

interpret, reply to, and execute (if relevant) an Instruction. should only be called as part of the main loop.

Parameters:

Name Type Description Default
instruction Instruction

Instruction received from Station.

required
Source code in hostess/station/delegates.py
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
def _handle_instruction(self, instruction: pro.Instruction):
    """
    interpret, reply to, and execute (if relevant) an Instruction. should
    only be called as part of the main loop.

    Args:
        instruction: Instruction received from Station.
    """
    status, err = "wilco", None
    try:
        bases.validate_instruction(instruction)
        # TODO: this might be too verbose
        self._log(
            "received instruction",
            content=instruction,
            category="comms",
            direction="recv",
        )
        if enum(instruction, "type") == "configure":
            self._configure_from_instruction(instruction)
        # TODO, maybe: different kill behavior.
        elif enum(instruction, "type") in ("stop", "kill"):
            # this occurs synchronously so move it to the finally block
            pass
        elif enum(instruction, "type") == "do":
            self.execute_do_instruction(instruction)
        else:
            raise bases.DoNotUnderstand(
                f"unknown instruction type {enum(instruction, 'type')}"
            )
    except bases.DoNotUnderstand as dne:
        status = "bad_request"
        if enum(instruction, "type") == "do":
            err = self.explain_match(instruction, "action")
        else:
            err = dne
    finally:
        # don't duplicate exit report behavior
        if enum(instruction, "type") in ("stop", "kill"):
            return self.shutdown()
        # otherwise send wilco or bad_request reply
        # noinspection PyTypeChecker
        self._reply_to_instruction(instruction, status, err)
_insert_state(message)

insert the Delegate's current state information into an Update.

Called immediately before every attempt to send an Update to the Station.

Parameters:

Name Type Description Default
message Update

Update to update.

required

Returns:

Type Description
Update

the updated Update.

Source code in hostess/station/delegates.py
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
def _insert_state(self, message: pro.Update) -> pro.Update:
    """
    insert the Delegate's current state information into an Update.

    Called immediately before every attempt to send an Update to the
    Station.

    Args:
        message: Update to update.

    Returns:
        the updated Update.
    """
    state = pro.DelegateState(
        interface=pack_obj(self.config["interface"]),
        cdict=pack_obj(self.config["cdict"]),
        actors=self.identify_elements("actors"),
        sensors=self.identify_elements("sensors"),
        infocount=dict(self.infocount),
        init_params=pack_obj(self.init_params),
    )
    message.state.MergeFrom(state)
    return message
_interpret_response(response)

interpret a response from the Station. If it contains an Instruction, append it to this Delegate's instruction_queue.

Parameters:

Name Type Description Default
response bytes

bytes containing a hostess com received from the Station.

required

Returns:

Type Description
str

"err" if the comm failed to decode properly, "ok" if the comm decoded properly but did not contain an Instruction (like a simple acknowledgment comm), "instruction" if the comm contained an Instruction.

Source code in hostess/station/delegates.py
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
def _interpret_response(self, response: bytes) -> str:
    """
    interpret a response from the Station. If it contains an Instruction,
    append it to this Delegate's instruction_queue.

    Args:
        response: bytes containing a hostess com received from the Station.

    Returns:
        "err" if the comm failed to decode properly, "ok" if the comm
            decoded properly but did not contain an Instruction (like a
            simple acknowledgment comm), "instruction" if the comm
            contained an Instruction.
    """
    decoded = read_comm(response)
    if isinstance(decoded, dict):
        if decoded["err"]:
            # TODO: log
            return "err"
        decoded = decoded["body"]
    if isinstance(decoded, pro.Instruction):
        self.instruction_queue.append(decoded)
        return "instruction"
    return "ok"
_match_task_instruction(instruction)

wrapper for self.match that specifically checks for actors that can execute a task described in an Instruction.

Parameters:

Name Type Description Default
instruction Instruction

Instruction containing a task.

required

Returns:

Type Description
list[Actor]

list of this Delegate's Actors that match the Instruction.

Raises:

Type Description
NoActorForEvent

if none of this Delegate's Actors match.

Source code in hostess/station/delegates.py
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
def _match_task_instruction(
    self, instruction: pro.Instruction
) -> list[bases.Actor]:
    """
    wrapper for self.match that specifically checks for actors that can
    execute a task described in an Instruction.

    Args:
        instruction: Instruction containing a task.

    Returns:
        list of this Delegate's Actors that match the Instruction.

    Raises:
        NoActorForEvent: if none of this Delegate's Actors match.
    """
    try:
        return self.match(instruction, "action")
    except StopIteration:
        raise bases.NoActorForEvent(
            str(self.explain_match(instruction, "action"))
        )
_reply_to_instruction(instruction, status, err=None)

send a reply Update to an Instruction informing the Station that we will or won't do the thing.

Parameters:

Name Type Description Default
instruction Instruction

received Instruction

required
status Literal['bad_request', 'wilco']

"wilco" if we'll do it, "bad_request" if we won't/can't

required
err Optional[Any]

Object -- a code or Exception, usually -- explaining a "bad_request" status. None if status is "wilco".

None
Source code in hostess/station/delegates.py
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
def _reply_to_instruction(
    self,
    instruction: pro.Instruction,
    status: Literal["bad_request", "wilco"],
    err: Optional[Any] = None,
):
    """
    send a reply Update to an Instruction informing the Station that we
    will or won't do the thing.

    Args:
        instruction: received Instruction
        status: "wilco" if we'll do it, "bad_request" if we won't/can't
        err: Object -- a code or Exception, usually -- explaining a
            "bad_request" status. None if status is "wilco".
    """
    msg = self._base_message()
    msg.MergeFrom(pro.Update(reason=status, instruction_id=instruction.id))
    # TODO, maybe: kinda messy?
    if err is not None:
        msg.MergeFrom(pro.Update(info=[pack_obj(err)]))
    self.talk_to_station(msg)
_report_on_action(action)

report to Station on completed/failed action. should only be called as part of the main loop, specifically from _check_on_actions().

Parameters:

Name Type Description Default
action dict

a value of this delegate's actions dict.

required
Source code in hostess/station/delegates.py
304
305
306
307
308
309
310
311
312
313
314
315
316
def _report_on_action(self, action: dict):
    """
    report to Station on completed/failed action. should only be called as
    part of the main loop, specifically from _check_on_actions().

    Args:
        action: a value of this delegate's `actions` dict.
    """
    msg = self._base_message(
        completed=task_msg(action), reason="completion"
    )
    # TODO: multi-step case
    return self.talk_to_station(msg)
_running_actions_message()

helper function for constructing Updates.

Returns:

Type Description
list[TaskReport]

list of TaskReport Messages, one for each currently-running action.

Source code in hostess/station/delegates.py
558
559
560
561
562
563
564
565
566
567
568
def _running_actions_message(self) -> list[pro.TaskReport]:
    """
    helper function for constructing Updates.

    Returns:
        list of TaskReport Messages, one for each currently-running action.
    """
    running = filter(
        lambda a: a.get("status") == "running", self.actions.values()
    )
    return list(map(task_msg, running))
_send_exit_report(exception=None)

send Update to Station informing it that Delegate is exiting (and why).

Parameters:

Name Type Description Default
exception Optional[Exception]

unhandled Exception that caused Delegate's main loop to exit. None on intentional shutdown.

None
Source code in hostess/station/delegates.py
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
def _send_exit_report(self, exception: Optional[Exception] = None):
    """
    send Update to Station informing it that Delegate is exiting (and why).

    Args:
        exception: unhandled Exception that caused Delegate's main loop
            to exit. None on intentional shutdown.
    """
    self.state = "crashed" if exception is not None else "shutdown"
    msg = self._base_message(reason="exiting")
    if exception is not None:
        try:
            info = pro.Update(
                info=[pack_obj(exc_report(exception), "exception")]
            )
        except Exception as ex:
            info = pro.Update(info=[pack_obj(exc_report(ex), "exception")])
        msg.MergeFrom(info)
    self.talk_to_station(msg)
_send_info()

construct an Update based on everything in the actionable_events cache and send it to the Station, then clear actionable_events.

Source code in hostess/station/delegates.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def _send_info(self):
    """
    construct an Update based on everything in the actionable_events
    cache and send it to the Station, then clear actionable_events.
    """
    message = self._base_message(reason="info")
    # TODO: this might want to be more sophisticated
    # TODO: arbitrary max number
    max_notes, info = 5, []
    for i in range(len(self.actionable_events)):
        info.append(self.actionable_events.pop())
        if i == max_notes - 1:
            break
    self._log(
        "sending info", info=info, category="comms", direction="send"
    )
    message.MergeFrom(pro.Update(info=[pack_obj(i) for i in info]))
    response = self.talk_to_station(message)
    # TODO: perhaps there's a better way to track this 'outbox'...
    #  but I'd rather not do it with a mailbox object, I want it to be
    #  quicker/more ephemeral
    if response in ("err", "connection refused", "timeout"):
        self.actionable_events += info
_sensor_loop(sensor)

continuously check a Sensor. this function must be launched in its own thread or it will block and be useless. NOTE: should only be called from _start().

Parameters:

Name Type Description Default
sensor Sensor

Sensor to poll.

required

Returns:

Type Description
dict[str, Union[str, Optional[int], Optional[Exception]]]

dict with keys: name: name of sensor signal: signal sent to terminate this function (if any) exception: exception that terminated this function (if any)

Source code in hostess/station/delegates.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def _sensor_loop(
    self, sensor: bases.Sensor
) -> dict[str, Union[str, Optional[int], Optional[Exception]]]:
    """
    continuously check a Sensor. this function must be launched in its
    own thread or it will block and be useless. NOTE: should only be
    called from _start().

    Args:
        sensor: Sensor to poll.

    Returns:
        dict with keys:
            name: name of sensor
            signal: signal sent to terminate this function (if any)
            exception: exception that terminated this function (if any)
    """
    exception = None
    try:
        while self.signals.get(sensor.name) is None:
            # noinspection PyPropertyAccess
            if not self.locked:
                sensor.check(self)
            time.sleep(sensor.poll)
    except Exception as ex:
        exception = ex
    finally:
        sensor.close()
        return {
            "name": sensor.name,
            "signal": self.signals.get(sensor.name),
            "exception": exception,
        }
_set_logfile()

internal function to set path to log file.

Source code in hostess/station/delegates.py
 95
 96
 97
 98
 99
100
101
def _set_logfile(self):
    """internal function to set path to log file."""
    self.logfile = Path(
        self.logdir,
        f"{self.loginfo.get('init_time', self.init_time)}_{self.name}_"
        f"{self.station[0]}_{self.station[1]}.log",
    )
_shutdown(exception=None)

internal shutdown handler.

Parameters:

Name Type Description Default
exception Optional[Exception]

Exception that terminated Delegate's main loop, if any. should be None on a 'graceful' shutdown.

None
Source code in hostess/station/delegates.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
def _shutdown(self, exception: Optional[Exception] = None):
    """
    internal shutdown handler.

    Args:
        exception: Exception that terminated Delegate's main loop, if any.
            should be None on a 'graceful' shutdown.
    """
    # divorce oneself from actors and acts, from events and instructions
    self.actions, self.actionable_events = {}, []
    # TODO, maybe: try to kill child processes (can't in general kill
    #  threads but sys.exit should handle it)
    # signal sensors to shut down
    for k in self.threads.keys():
        self.signals[k] = 1
    # goodbye to all that
    self.instruction_queue, self.actors, self.sensors = [], {}, {}
    try:
        self.threads["exit_report"] = self.exc.submit(
            self._send_exit_report, exception
        )
    except Exception as ex:
        self._log("exit report failed", exception=ex, category="system")
    # wait to send exit report
    if "exit_report" in self.threads:
        while self.threads["exit_report"].running():
            time.sleep(0.1)
_trysend(message)

try to send a message to the Station. Sleep if it doesn't work -- or if we're shut down, just assume the Station is dead and leave.

Parameters:

Name Type Description Default
message Message

Message to send to station.

required
Source code in hostess/station/delegates.py
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
def _trysend(self, message: Message):
    """
    try to send a message to the Station. Sleep if it doesn't work --
    or if we're shut down, just assume the Station is dead and leave.

    args:
        message: Message to send to station.
    """
    response, was_locked, timeout_counter = None, self.locked, count()
    self.locked = True
    while response in (None, "timeout", "connection refused"):
        # if we couldn't get to the Station, log that fact, wait, and
        # retry. lock self while this is happening to ensure we don't do
        # this in big pulses.
        if response in ("timeout", "connection refused"):
            if next(timeout_counter) % 10 == 0:
                if self.state == "stopped":
                    self._log(
                        "no response from station, completing termination",
                        category="comms",
                    )
                    self.locked = False  # TODO: sure about this?
                    return "timeout"
                self._log(response, category="comms", direction="recv")
            # TODO, maybe: this could be a separate attribute
            time.sleep(self.update_interval)
        response, _ = stsend(self._insert_state(message), *self.station)
    # if we locked ourselves due to bad responses, and we weren't already
    # locked for some reason -- like we often will have been if sending
    # a task report or something -- unlock ourselves.
    if was_locked is False:
        self.locked = False
    return response
add_actionable_event(event, category=None)

Queue an actionable event, usually received from a Sensor, for transmission to the Station. This method is most often called by an Actor.

Parameters:

Name Type Description Default
event Any

object we'd like Station to know about

required
category Optional[Union[str, Sensor]]

optional label for type of event or originating Sensor, used to update self.infocount

None
Source code in hostess/station/delegates.py
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
def add_actionable_event(
    self, event: Any, category: Optional[Union[str, Sensor]] = None
):
    """
    Queue an actionable event, usually received from a Sensor, for
    transmission to the Station. This method is most often called by an
    Actor.

    Args:
        event: object we'd like Station to know about
        category: optional label for type of event or originating Sensor,
            used to update self.infocount
    """
    if isinstance(category, Sensor):
        category = category.name
    if category is not None:
        self.infocount[category] += 1
    self.actionable_events.append(event)
check_on_action(instruction_id)

check whether one of this delegate's Actions completed. if it crashed, set its status and exception keys appropriately in this delegate's actions dict. typically called as part of the main Delegate loop, specifically from _check_on_actions().

Parameters:

Name Type Description Default
instruction_id int

numerical identifier of Action to check.

required

Returns:

Name Type Description
exception Optional[Exception]

None if the Action terminated successfully or hasn't yet terminated; the Exception the Action raised if it didn't terminate successfully.

done bool

True if the Action has terminated; False if not.

Source code in hostess/station/delegates.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def check_on_action(
    self, instruction_id: int
) -> tuple[Optional[Exception], bool]:
    """
    check whether one of this delegate's Actions completed. if it crashed,
    set its status and exception keys appropriately in this delegate's
    `actions` dict. typically called as part of the main Delegate loop,
    specifically from _check_on_actions().

    Args:
        instruction_id: numerical identifier of Action to check.

    Returns:
        exception: None if the Action terminated successfully or hasn't yet
            terminated; the Exception the Action raised if it didn't
            terminate successfully.
        done: True if the Action has terminated; False if not.
    """
    try:
        self.threads[f"Instruction_{instruction_id}"].result(0)
    except TimeoutError:
        return None, True
    except Exception as ex:
        # action crashed without setting its status as such
        self.actions[instruction_id]["status"] = "crash"
        self.actions[instruction_id]["exception"] = ex
        return ex, False
    # an action wrapped in @reported will catch exceptions and do this
    # politely instead of crashing as above
    if self.actions[instruction_id].get("exception") is not None:
        self.actions[instruction_id]["status"] = "crash"
        return self.actions[instruction_id]["exception"], False
    return None, False
execute_do_instruction(instruction)

identify matching Actors and execute a "do" Instruction (an Instruction specifying an action). typically called from the _handle_instruction() workflow.

Parameters:

Name Type Description Default
instruction Instruction

Instruction to match and execute.

required
Source code in hostess/station/delegates.py
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
def execute_do_instruction(self, instruction: pro.Instruction):
    """
    identify matching Actors and execute a "do" Instruction (an
    Instruction specifying an action). typically called from the
    _handle_instruction() workflow.

    Args:
        instruction: Instruction to match and execute.
    """
    # this will raise NoActorForEvent if none match
    actors = self._match_task_instruction(instruction)
    if instruction.id is None:
        # this should really never happen, but...
        key, noid, noid_infix = (
            random.randint(0, int(1e7)),
            True,
            "noid_",
        )
    else:
        key, noid, noid_infix = instruction.id, False, ""
    threadname = f"Instruction_{noid_infix}{key}"
    # TODO: this could get sticky for the multi-step case
    self.threads[threadname] = self.exc.submit(
        self._execute_task_with_actors, actors, instruction, key, noid
    )
talk_to_station(message)

send a Message to the Station and queue any returned Instruction.

Parameters:

Name Type Description Default
message Message

hostess protobuf Message to send to the Station

required

Returns:

Type Description
Union[str, bytes]

status code for exchange: "ok" if successful but no Instruction received (simple acknowledgement comm received); "timeout" or "connection refused" for failed connections; "err" for receipt of bytes we could not decode as a comm; "instruction" if successful and comm contained an Instruction

Source code in hostess/station/delegates.py
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
def talk_to_station(self, message: pro.Message) -> Union[str, bytes]:
    """
    send a Message to the Station and queue any returned Instruction.

    Args:
        message: hostess protobuf Message to send to the Station

    Returns:
         status code for exchange: "ok" if successful but no Instruction
              received (simple acknowledgement comm received);
              "timeout" or "connection refused" for failed connections;
              "err" for receipt of bytes we could not decode as a comm;
              "instruction" if successful and comm contained an Instruction
    """
    response = self._trysend(message)
    if response not in ("timeout", "connection refused"):
        response = self._interpret_response(response)
    if response in ("err", "timeout", "connection_refused"):
        self._log(
            message, status=response, category="comms", direction="recv"
        )
    return response

HeadlessDelegate

Bases: Delegate

simple Delegate implementation that just does stuff on its own. mostly for testing/prototyping but could easily be useful.

Source code in hostess/station/delegates.py
672
673
674
675
676
677
678
679
680
681
682
683
684
class HeadlessDelegate(Delegate):
    """
    simple Delegate implementation that just does stuff on its own. mostly for
    testing/prototyping but could easily be useful.
    """

    def __init__(self, *args, **kwargs):
        station = ("", -1)
        super().__init__(station, *args, **kwargs)
        self.message_log = []

    def talk_to_station(self, message):
        self.message_log.append(message)

launch_delegate(station_address, name, delegate_module='hostess.station.delegates', delegate_class='Delegate', elements=None, is_local=False, **init_kwargs)

hook for launching a delegate, designed to be easily called either locally or from an interpreter running in a separate process. Designed to be called as part of the Station.launch_delegate() workflow, but may be used in other ways.

Parameters:

Name Type Description Default
station_address tuple[str, int]

address of supervising Station

required
name str

name to assign to Delegate instance

required
delegate_module str

name of, or path to, module in which the desired Delegate subclass is defined

'hostess.station.delegates'
delegate_class str

name of Delegate subclass

'Delegate'
elements tuple[tuple[str, str]]

specifications for Actors and Sensors to attach to Delegate instance

None
is_local bool

is this Delegate being instantiated in the process of the calling Station (or other launcher) or not? if this is False, consider this process "owned by" the instantiated Delegate; terminate it when the Delegate shuts down.

False

Returns:

Type Description
Optional[Delegate]

the instantiated Delegate if is_local is not False; None otherwise.

Source code in hostess/station/delegates.py
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
def launch_delegate(
    station_address: tuple[str, int],
    name: str,
    delegate_module: str = "hostess.station.delegates",
    delegate_class: str = "Delegate",
    elements: tuple[tuple[str, str]] = None,
    is_local: bool = False,
    **init_kwargs,
) -> Optional[Delegate]:
    """
    hook for launching a delegate, designed to be easily called either locally
    or from an interpreter running in a separate process. Designed to be
    called as part of the Station.launch_delegate() workflow, but may be used
    in other ways.

    Args:
        station_address: address of supervising Station
        name: name to assign to Delegate instance
        delegate_module: name of, or path to, module in which the desired
            Delegate subclass is defined
        delegate_class: name of Delegate subclass
        elements: specifications for Actors and Sensors to attach to Delegate
            instance
        is_local: is this Delegate being instantiated in the process of the
            calling Station (or other launcher) or not? if this is False,
            consider this process "owned by" the instantiated Delegate;
            terminate it when the Delegate shuts down.

    Returns:
        the instantiated Delegate if is_local is not False; None otherwise.
    """

    from hostess.utilities import import_module

    module: ModuleType = import_module(delegate_module)
    cls: Type[Delegate] = getattr(module, delegate_class)
    if is_local is False:
        init_kwargs["_is_process_owner"] = True
    delegate: Delegate = cls(station_address, name, **init_kwargs)
    for emod_name, ecls_name in elements:
        emodule: ModuleType = import_module(emod_name)
        ecls: Type[Union[Actor, Sensor]] = getattr(emodule, ecls_name)
        delegate.add_element(ecls)
    # TODO: config-on-launch
    delegate.start()
    if is_local is True:
        return delegate
    # need to prevent the interpreter from exiting in order to not mess up
    # threading if running in an unmanaged process
    while delegate.is_shut_down is False:
        time.sleep(1)
    sys.exit()

station.handlers

shared helper functions for Station objects

SKIPKEYS = frozenset({'delegateid', 'state', 'running', 'arguments', 'localcall', 'data', 'result', 'config'}) module-attribute

keys of Node-internal data structures that we don't generally want to write into logs, either because they often have huge values or because they're generally redundant.

flatten_for_json(event, maxlen=128, maxdepth=3, skipkeys=SKIPKEYS)

simple log-formatting function.

Parameters:

Name Type Description Default
event Union[Message, dict]

protobuf Message or dict to flatten

required
maxlen int

maximum length for stringified values of flattened dict

128
maxdepth int

maximum depth to dig into event before truncating

3
skipkeys Collection[str]

keys / Message field of event to ignore in logginc

SKIPKEYS

Returns:

Type Description
dict[str, str]

flattened version of event w/stringified values, possibly truncated, ready to be passed to json.dump().

Source code in hostess/station/handlers.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
def flatten_for_json(
    event: Union[Message, dict],
    maxlen: int = 128,
    maxdepth: int = 3,
    skipkeys: Collection[str] = SKIPKEYS,
) -> dict[str, str]:
    """
    simple log-formatting function.

    Args:
        event: protobuf Message or dict to flatten
        maxlen: maximum length for stringified values of flattened dict
        maxdepth: maximum depth to dig into `event` before truncating
        skipkeys: keys / Message field of `event` to ignore in logginc

    Returns:
         flattened version of `event` w/stringified values, possibly truncated,
              ready to be passed to `json.dump()`.
    """
    # TODO: if this ends up being unperformant with huge messages, do something
    if isinstance(event, Message):
        event = m2d(event)
    return json_sanitize(event, maxlen, maxdepth, skipkeys)

json_sanitize(value, maxlen=128, maxdepth=1, skipkeys=SKIPKEYS, depth=0, skip=False)

Attempt to make an object representable in JSON, with standardized formatting conventions that include automated skipping, truncation, etc. Primarily intended as a helper function for flatten_for_json().

Parameters:

Name Type Description Default
value Any

object to make representable

required
maxlen int

maximum length of string representations of elements of sanitized object

128
maxdepth int

maximum depth to dig into nested objects.

1
skipkeys Collection[str]

keys or fields to omit from output.

SKIPKEYS
depth int

current dig depth. automatically incremented in recursive calls to this function.

0
skip bool

if True, just return the literal string ''

False

Returns:

Type Description
Union[str, int, float, list[str], dict[str, Union[str, dict[str, str]]]]

JSON-sanitized version of value.

Source code in hostess/station/handlers.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def json_sanitize(
    value: Any,
    maxlen: int = 128,
    maxdepth: int = 1,
    skipkeys: Collection[str] = SKIPKEYS,
    depth: int = 0,
    skip: bool = False,
) -> Union[str, int, float, list[str], dict[str, Union[str, dict[str, str]]]]:
    """
    Attempt to make an object representable in JSON, with standardized
    formatting conventions that include automated skipping, truncation, etc.
    Primarily intended as a helper function for `flatten_for_json()`.

    Args:
        value: object to make representable
        maxlen: maximum length of string representations of elements of
            sanitized object
        maxdepth: maximum depth to dig into nested objects.
        skipkeys: keys or fields to omit from output.
        depth: current dig depth. automatically incremented in recursive calls
            to this function.
        skip: if True, just return the literal string '<skipped>'

    Returns:
        JSON-sanitized version of `value`.
    """
    if skip is True:
        return "<skipped>"
    if isinstance(value, Message):
        value = m2d(value)
    if isinstance(value, (int, float)):
        return value
    elif isinstance(value, bytes):
        value = "<binary>"
    if isinstance(value, str):
        value = str(value[:maxlen])
    elif isinstance(value, Mapping):
        if depth > maxdepth:
            value = "<skipped mapping at max log depth>"
        else:
            return {
                json_sanitize(k): json_sanitize(
                    v, maxlen, maxdepth, skipkeys, depth + 1, k in skipkeys
                )
                for k, v in value.items()
            }
    elif isinstance(value, Collection):
        return [json_sanitize(e) for e in value]
    else:
        value = repr(value)
    return value[:maxlen]

make_actiondict(action)

construct a standardized dict for recording the results of an action described by action.

Parameters:

Name Type Description Default
action Action

a pro.Action message

required

Returns:

Type Description
dict[str, Any]

a dict initialized from basic identifying information in action, intended to be used as a value of a Node.actions dict.

Source code in hostess/station/handlers.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def make_actiondict(action: pro.Action) -> dict[str, Any]:
    """
    construct a standardized dict for recording the results of an action
    described by `action`.

    Args:
        action: a pro.Action message

    Returns:
        a dict initialized from basic identifying information in `action`,
            intended to be used as a value of a `Node.actions` dict.
    """
    return {
        "name": action.name,
        "id": action.id,
        "description": action.description,
        "start": dt.datetime.now(dt.UTC),
        "stop": None,
        "status": "running",
    }

make_function_call(action)

parse an Action Message containing specifications for a function call and create a "deferred" version of a call that matches those specifications.

Parameters:

Name Type Description Default
action Action

hostess Action Message that specifies a function call.

required

Returns:

Name Type Description
caches dict[str, list]

dict of lists the deferred call will write its stdout, stderr, and return values into

deferred Callable

partially-evaluated and wrapped function constructed from the function call specification in action. call it to actually perform the action.

Source code in hostess/station/handlers.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def make_function_call(action: pro.Action) -> tuple[dict[str, list], Callable]:
    """
    parse an Action Message containing specifications for a function call and
    create a "deferred" version of a call that matches those specifications.

    Args:
        action: hostess Action Message that specifies a function call.

    Returns:
        caches: dict of lists the deferred call will write its stdout, stderr,
            and return values into
        deferred: partially-evaluated and wrapped function constructed from
            the function call specification in `action`. call it to actually
            perform the action.
    """
    if action.func is None:
        raise TypeError("Can't actually do this without a function.")
    if action.module is not None:
        try:
            module = get_module(action.module)
        except (AttributeError, ImportError):
            raise FileNotFoundError("module not found")
        try:
            func = getattr(module, action.func)
        except AttributeError:
            raise ImportError("function not found in module")
    else:
        try:
            func = getattr("__builtins__", action.func)
        except AttributeError:
            raise ImportError("function not found in builtins")
    kwargs = unpack_callargs(action.arguments)
    if (ctx := enum(action, "context")) in ("thread", "unknowncontext", None):
        caches = {"result": [], "pid": [os.getpid()]}
        return caches, deferinto(func, _target=caches["result"], **kwargs)
    elif ctx in ("detached", "process"):
        fork, caches = ctx == "detached", make_watch_caches()
        call = defer(watched_process(func, caches=caches, fork=fork), **kwargs)
        return caches, call
    else:
        raise ValueError(f"unknown context {ctx}")

tail_file(position, *, path=None, **_)

simple file-tail function for use in Sensors that watch a file.

Parameters:

Name Type Description Default
position Optional[int]

byte offset from start of file at which to begin reading. if None, start at the beginning of the file.

required
path Optional[Path]

path to file. Typically partially evaluated into the function by the Sensor, not explicilty passed.

None

Returns:

Name Type Description
end Optional[int]

position of last read byte of file, or None if the file doesn't exist.

lines list[str]

all lines of file between position and end.

'

Source code in hostess/station/handlers.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def tail_file(
    position: Optional[int], *, path: Optional[Path] = None, **_
) -> tuple[Optional[int], list[str]]:
    """
    simple file-tail function for use in Sensors that watch a file.

    Args:
        position: byte offset from start of file at which to begin reading.
            if None, start at the beginning of the file.
        path: path to file. Typically partially evaluated into the function by
            the Sensor, not explicilty passed.

    Returns:
        end: position of last read byte of file, or None if the file
            doesn't exist.
        lines: all lines of file between `position` and `end`.
    '"""
    if path is None:
        return position, []
    if not path.exists():
        return None, []
    if position is None:
        position = os.stat(path).st_size - 1
    if os.stat(path).st_size - 1 == position:
        return position, []
    if os.stat(path).st_size - 1 < position:
        position = os.stat(path).st_size - 1
        return position, []
    with path.open() as stream:
        stream.seek(position)
        lines = stream.readlines()
        position = stream.tell()
        return position, lines

unpack_callargs(arguments)

unpack PythonObject Messages into a dict of kwargs.

Parameters:

Name Type Description Default
arguments Sequence[PythonObject]

sequence of PythonObject Messages

required

Returns:

Type Description
dict[str, Any]

dict constructed from deserialized content of arguments, suitable for being splatted into a function

Source code in hostess/station/handlers.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def unpack_callargs(arguments: Sequence[pro.PythonObject]) -> dict[str, Any]:
    """
    unpack PythonObject Messages into a dict of kwargs.

    Args:
        arguments: sequence of PythonObject Messages

    Returns:
        dict constructed from deserialized content of `arguments`, suitable
            for being splatted into a function
    """
    kwargs = {}
    for arg in arguments:
        if any((arg.value is None, arg.name is None)):
            raise ValueError("need both value and argument name")
        value = unpack_obj(arg)
        kwargs[arg.name] = value
    return kwargs

watch_dir(contents, *, path=None, **_)

simple ls-like diff for use by Sensors intended that watch a directory.

Source code in hostess/station/handlers.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def watch_dir(
    contents: list[str], *, path: Optional[Path] = None, **_
) -> tuple[Optional[list[str]], list[str]]:
    """
    simple ls-like diff for use by Sensors intended that watch a directory.
    """
    if path is None:
        return contents, []
    if not path.exists():
        return contents, []
    current = list(map(str, path.iterdir()))
    if contents is None:
        return current, []
    return current, list(set(current).difference(contents))

station.messages

utilities for interpreting and constructing hostess protobuf Messages.

Mailbox

manager class for lists of messages

Source code in hostess/station/messages.py
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
class Mailbox:
    """manager class for lists of messages"""

    # TODO: improve efficiency with caching or something

    def __init__(self, messages: Optional[MutableMapping[int, Msg]] = None):
        messages = {} if messages is None else messages
        if not isinstance(messages, MutableMapping):
            raise TypeError
        self.messages = messages

    def _sizer(self):
        return accumulate(
            map(attrgetter("size"), tuple(self.messages.values())), add
        )

    def prune(self, max_mb: float = 256):
        for i, size in enumerate(self._sizer()):
            if mb(size) > max_mb:
                self.messages = self.messages[:i]
                break

    @staticmethod
    def maybe_construct_msg(thing: Union[dict, Message]):
        # 'outbox' case
        if isinstance(thing, Message):
            return Msg(thing)
        # 'edited Msg' case
        elif isinstance(thing, Msg):
            return thing
        # 'inbox' case
        return Msg(thing["content"]["body"])

    def __getitem__(self, key):
        return self.messages[key]

    def __setitem__(self, key, value):
        self.messages[key] = self.maybe_construct_msg(value)

    def append(self, item):
        if len(self.messages) == 0:
            nextplace = 0
        else:
            nextplace = max(self.messages.keys()) + 1
        self.messages[nextplace] = self.maybe_construct_msg(item)

    def __len__(self):
        return len(self.messages)

    def __iter__(self):
        return iter(self.messages.values())

    # TODO: we should probably partly cache this or something
    def sort(self) -> dict:
        try:
            # noinspection PyTypeChecker
            return groupby(lambda m: m.reason, tuple(self.messages.values()))
        except AttributeError:
            raise TypeError("This method is only used for Station inboxes.")

    def _get_completed(self):
        return self.sort().get("completion", [])

    def _get_heartbeats(self):
        return self.sort().get("heartbeat", [])

    def _get_wilco(self):
        return self.sort().get("wilco", [])

    def _get_info(self):
        return self.sort().get("info", [])

    info = property(_get_info)
    completed = property(_get_completed)
    heartbeats = property(_get_heartbeats)
    wilco = property(_get_wilco)

Msg

Helper class for hostess protobuf Messages. Allows hostess classes to (usually) abstract away protobuf-specific qualities of Messages. Also Improves efficiency of internal Node operations by caching encode/decode operations.

Although Msg is not actually immutable, it should be treated as if it were immutable due to its aggressive caches. If the 'content' of a Msg needs to change, you should always construct a new one rather than modifying it inplace.

Source code in hostess/station/messages.py
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
class Msg:
    """
    Helper class for hostess protobuf Messages. Allows hostess classes to
    (usually) abstract away protobuf-specific qualities of Messages. Also
    Improves efficiency of internal  Node operations by caching encode/decode
    operations.

    Although Msg is not _actually_ immutable, it should be treated as if it
    were immutable due to its aggressive caches. If the 'content' of a Msg
    needs to change, you should always construct a new one rather than
    modifying it inplace.
    """

    def __init__(self, message: Message):
        """
        Args:
            message: protobuf Message, preferably a hostess-specific protobuf
                Message.
        """
        self.message, self.sent = message, False
        self.size = self.message.ByteSize()

    @cached_property
    def comm(self):
        """self.message serialized into a hostess comm."""
        return make_comm(self.message)

    @cache
    def unpack(self, field=None):
        """"""
        if field is None:
            return unpack_message(self.message)
        try:
            assert isinstance(
                element := dig_for_values(self.message, field), Message
            )
            return unpack_message(element)
        except (AttributeError, AssertionError):
            raise AttributeError(f"{field} not found in message")

    @cached_property
    def body(self):
        return self.unpack()

    # TODO, maybe: too expensive?
    @cache
    def __getattr__(self, attr):
        try:
            try:
                out = self.unpack(attr)
            except AttributeError:
                out = dig_for_values(self.body, attr)[0]
            return out
        except TypeError:
            raise AttributeError(f"Msg has no attribute '{attr}'")

    @cache
    def pprint(self, field=None):
        if field is None:
            return format_message(self.body)
        return format_message(getattr(self, field))

    @cache
    def display(self, field=None):
        if field is None:
            return yprint(self.body, maxlen=256)
        return yprint(getattr(self, field), maxlen=256)

    @cache
    def __str__(self):
        try:
            return self.pprint()
        except NotImplementedError:
            return self.display()

    def __getitem__(self, key):
        return self.__getattr__(key)

    def __repr__(self):
        return self.__str__()

    sent = False
comm cached property

self.message serialized into a hostess comm.

__init__(message)

Parameters:

Name Type Description Default
message Message

protobuf Message, preferably a hostess-specific protobuf Message.

required
Source code in hostess/station/messages.py
395
396
397
398
399
400
401
402
def __init__(self, message: Message):
    """
    Args:
        message: protobuf Message, preferably a hostess-specific protobuf
            Message.
    """
    self.message, self.sent = message, False
    self.size = self.message.ByteSize()
unpack(field=None) cached
Source code in hostess/station/messages.py
409
410
411
412
413
414
415
416
417
418
419
420
@cache
def unpack(self, field=None):
    """"""
    if field is None:
        return unpack_message(self.message)
    try:
        assert isinstance(
            element := dig_for_values(self.message, field), Message
        )
        return unpack_message(element)
    except (AttributeError, AssertionError):
        raise AttributeError(f"{field} not found in message")

byteorder()

format system byteorder for inclusion in a struct.Struct format string.

Returns:

Type Description
str

"<" on little-endian platforms, ">" on big-endian platforms.

Source code in hostess/station/messages.py
45
46
47
48
49
50
51
52
def byteorder() -> str:
    """
    format system byteorder for inclusion in a struct.Struct format string.

    Returns:
        "<" on little-endian platforms, ">" on big-endian platforms.
    """
    return "<" if sys.byteorder == "little" else ">"

default_arg_packing(kwargs)

convert a dict that represents kwargs for a function call into a list of pro.PythonObjects.

Parameters:

Name Type Description Default
kwargs dict[str, Any]

dict containing keyword arguments for a function call, in the same format you would use if you were to locally execute target_function(**kwargs)

required

Returns:

Type Description
list[PythonObject]

list of pro.PythonObject Messages giving names and serialized values of kwargs

Source code in hostess/station/messages.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def default_arg_packing(kwargs: dict[str, Any]) -> list[pro.PythonObject]:
    """
    convert a dict that represents kwargs for a function call into a list of
    pro.PythonObjects.

    Args:
        kwargs: `dict` containing keyword arguments for a function call, in the
            same format you would use if you were to locally execute
            `target_function(**kwargs)`

    Returns:
        list of pro.PythonObject Messages giving names and serialized values
            of `kwargs`
    """
    interp = []
    for k, v in kwargs.items():
        obj = pack_obj(v, k)
        interp.append(obj)
    return interp

dict2msg(mapping, proto_class, mtypes=(dict, MPt), proto_module=pro, pack_objects=True)

construct a protobuf from a dict, filtering any keys that are not fields of proto_class and recursively diving into nested dicts.

Source code in hostess/station/messages.py
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
def dict2msg(
    mapping,
    proto_class,
    mtypes=(dict, MPt),
    proto_module=pro,
    pack_objects=True,
) -> Message:
    """
    construct a protobuf from a dict, filtering any keys that are not fields
    of `proto_class` and recursively diving into nested dicts.
    """
    fdict, fields = proto_formatdict(proto_class), {}
    for k, v in mapping.items():
        if k not in fdict.keys():
            continue
        if isinstance(v, mtypes):
            fields[k] = dict2msg(v, getattr(proto_module, k))
        elif isinstance(v, dt.datetime):
            fields[k] = make_timestamp(v)
        elif isinstance(v, dt.timedelta):
            fields[k] = make_duration(v)
        # special behavior for PythonObject
        elif (
            isinstance(fdict[k], dict)
            and fdict[k].get("value") == "BYTES"
            and not isinstance(v, bytes)
            and pack_objects is True
        ):
            fields[k] = pack_obj(v)
        else:
            fields[k] = v
    return proto_class(**fields)

format_message(unpacked, maxlen=256)

default string formatter for unpacked message. TODO: more sophisticated behavior.

Source code in hostess/station/messages.py
621
622
623
624
625
626
627
628
629
630
631
632
def format_message(unpacked, maxlen=256):
    """
    default string formatter for unpacked message.
    TODO: more sophisticated behavior.
    """
    if "nodeid" in unpacked.keys():
        lines = _print_update(unpacked, maxlen)
    elif "loc" in unpacked.keys():
        lines = _print_state(unpacked, maxlen)
    else:
        raise NotImplementedError
    return "\n".join(lines)

make_action(description=None, **fields)

construct a default pro.Action message.

Parameters:

Name Type Description Default
description Optional[dict[str, str]]

optional dict to use as "description" field of Action. For terse task descriptions to agents that may only need to hear a single name or number to know what to do.

None
**fields Union[bool, Mapping[str, str], Message, int, str]

kwargs to interpret as fields of Action Message. Must include "call" if description is None.

{}

Returns:

Type Description
Action

a pro.Action Message.

Raises:

Type Description
TypeError

if you specified neither a description or a call.

Source code in hostess/station/messages.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def make_action(
    description: Optional[dict[str, str]] = None,
    **fields: Union[bool, Mapping[str, str], Message, int, str],
) -> pro.Action:
    """
    construct a default pro.Action message.

    Args:
        description: optional dict to use as "description" field of Action.
            For terse task descriptions to agents that may only need to hear
            a single name or number to know what to do.
        **fields: kwargs to interpret as fields of Action Message. Must
            include "call" if description is None.

    Returns:
        a pro.Action Message.

    Raises:
        TypeError: if you specified neither a description or a call.
    """
    if fields.get("id") is None:
        fields["id"] = random.randint(int(1e7), int(1e8))
    action = pro.Action(description=description, **fields)
    if (action.WhichOneof("call") is None) and (description is None):
        raise TypeError("must pass a description or command message.")
    return action

make_function_call_action(func, module=None, kwargs=MPt({}), context='thread', **action_fields)

make a pro.Action Message specifying a Python function call.

Source code in hostess/station/messages.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
def make_function_call_action(
    func: str,
    module: Optional[str] = None,
    kwargs: Union[list[pro.PythonObject], Mapping[str, Any]] = MPt({}),
    context: Literal["thread", "process", "detached"] = "thread",
    **action_fields,
) -> pro.Action:
    """
    make a pro.Action Message specifying a Python function call.
    """
    if "name" not in action_fields:
        action_fields["name"] = func
    try:
        # if kwargs is already a list of PythonObjects, don't try to repack
        assert isinstance(kwargs[0], pro.PythonObject)
        objects = kwargs
    except (AssertionError, KeyError, TypeError):
        objects = default_arg_packing(kwargs)
    call = pro.FunctionCall(
        func=func, module=module, context=context, arguments=objects
    )
    return make_action(**action_fields, functioncall=call)

make_instruction(instructiontype, **kwargs)

Standardized factory function for Instruction Messages. This is generally the most convenient and reliable way to create an Instruction for a Station to send to a Delegate. Station uses it by default to create 'configure' and 'shutdown'-type Instructions, and it is an essential component of most InstructionFromInfo.instruction_maker functions.

Automatically adds a timestamp and a random id to the Instruction.

Parameters:

Name Type Description Default
instructiontype str

type of instruction to make, typically 'do', 'configure', or 'stop'.

required
kwargs Union[int, Message, Mapping[str, str], Sequence[Message]]

Message fields and values to include in Instruction.

{}

Returns:

Type Description
Instruction

a hostess Instruction protobuf Message.

Source code in hostess/station/messages.py
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
def make_instruction(
    instructiontype: str,
    **kwargs: Union[int, Message, Mapping[str, str], Sequence[Message]],
) -> pro.Instruction:
    """
    Standardized factory function for Instruction Messages. This is generally
    the most convenient and reliable way to create an Instruction for a
    Station to send to a Delegate. `Station` uses it by default to create
    'configure' and 'shutdown'-type Instructions, and it is an essential
    component of most `InstructionFromInfo.instruction_maker` functions.

    Automatically adds a timestamp and a random id to the Instruction.

    Args:
        instructiontype: type of instruction to make, typically 'do',
            'configure', or 'stop'.
        kwargs: Message fields and values to include in Instruction.

    Returns:
        a hostess Instruction protobuf Message.
    """
    if kwargs.get("id") is None:
        kwargs["id"] = random.randint(int(1e7), int(1e8))
    instruction = pro.Instruction(
        time=make_timestamp(), type=instructiontype, **kwargs
    )
    if instruction.type == "do" and instruction.task is None:
        raise ValueError("must assign a task for a 'do' action.")
    return instruction

obj2scanf(obj)

construct a struct / scanf format string for obj, along with a code for the 'string' type if it is str, bytes, or NoneType (struct strings represent all these types with 's', so an additional code is required for recipients to reconstruct them as the correct Python type).

This function accepts most primitive Python types, as well as lists or tuples of primitive types. Does not accept sequences of mixed 'string' types.

Note that this function is generally not useful for lists or tuples containing mixed data types or many distinct strings/bytestrings. It will in general be more efficient to serialize them some other way, because the struct string itself will often be long enough to negate the benefits of terse binary packing.

Parameters:

Name Type Description Default
obj Union[int, float, None, str, bytes, tuple[Union[int, float], tuple[None], tuple[str], tuple[bytes], list[Union[int, float]], list[None], list[str], list[bytes]]]

object for which to construct format string

required

Returns:

Name Type Description
format_string str

format string for obj

string_code Optional[str]

"none" if obj is None or a list / tuple of None; "str" if obj is a str or a list '/ tuple of str; "bytes" if scalar is a bytes or a list / tuple of bytes; None otherwise.

Raises:

Type Description
TypeError

if obj2scanf() does not know how to handle obj's type.

Source code in hostess/station/messages.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def obj2scanf(
    obj: Union[
        int,
        float,
        None,
        str,
        bytes,
        tuple[
            Union[int, float],
            tuple[None],
            tuple[str],
            tuple[bytes],
            list[Union[int, float]],
            list[None],
            list[str],
            list[bytes],
        ],
    ]
) -> tuple[str, Optional[str]]:
    """
    construct a struct / scanf format string for `obj`, along with a code for
    the 'string' type if it is `str`, `bytes`, or `NoneType` (struct strings
    represent all these types with 's', so an additional code is required for
    recipients to reconstruct them as the correct Python type).

    This function accepts most primitive Python types, as well as lists or
    tuples of primitive types. Does not accept sequences of mixed 'string'
    types.

    Note that this function is generally not useful for lists or tuples
    containing mixed data types or many distinct strings/bytestrings. It will
    in general be more efficient to serialize them some other way, because the
    struct string itself will often be long enough to negate the benefits of
    terse binary packing.

    Args:
        obj: object for which to construct format string

    Returns:
        format_string: format string for `obj`
        string_code: "none" if `obj` is `None` or a `list` / `tuple` of `None`;
            "str" if `obj` is a `str` or a `list` '/ `tuple` of `str`;
            "bytes" if scalar is a `bytes` or a `list` / `tuple` of `bytes`;
            `None` otherwise.

    Raises:
        TypeError: if `obj2scanf()` does not know how to handle `obj`'s type.
    """
    if not isinstance(
        obj, (str, bytes, int, float, list, tuple, bool, NoneType)
    ):
        raise TypeError(f"{type(obj)} is not supported.")
    if not isinstance(obj, (list, tuple)):
        return scalarchar(obj)
    chars = gmap(scalarchar, obj)
    if all(c[1] is None for c in chars):
        chars = split_when(chars, lambda x, y: x[0] != y[0])
        return "".join([f"{len(char)}{char[0][0]}" for char in chars]), None
    if not all_equal(c[1] for c in chars):
        raise TypeError("arrays of mixed string types are not supported.")
    return "".join(f"{char[0]}" for char in chars), chars[0][1]

pack_obj(obj, name='')

default function for serializing an in-memory object as a pro.PythonObject Message. If obj is "scalar", serialize it using simple struct formatting; if it is a np.ndarray of non-object type, use its ndarray.tobytes() representation; otherwise, serialize it using dill. This is a good function for general-purpose object passing, and is used extensively in internal Node behaviors. However, it may in some cases be more efficient to implement serialization functions optimized for specific data formats.

Parameters:

Name Type Description Default
obj Any

object to serialize as a pro.PythonObject Message.

required
name str

optional name for obj in Message; useful if you intend the recipient to pass obj as a keyword argument to a function.

''

Returns:

Type Description
PythonObject

obj serialized as a pro.PythonObject.

Source code in hostess/station/messages.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def pack_obj(obj: Any, name: str = "") -> pro.PythonObject:
    """
    default function for serializing an in-memory object as a pro.PythonObject
    Message. If `obj` is "scalar", serialize it using simple struct formatting;
    if it is a `np.ndarray` of non-object type, use its `ndarray.tobytes()`
    representation; otherwise, serialize it using `dill`. This is a good
    function for general-purpose object passing, and is used extensively in
    internal Node behaviors. However, it may in some cases be more efficient
    to implement serialization functions optimized for specific data formats.

    Args:
        obj: object to serialize as a `pro.PythonObject` Message.
        name: optional name for `obj` in Message; useful if you intend the
            recipient to pass `obj` as a keyword argument to a function.

    Returns:
        `obj` serialized as a `pro.PythonObject`.
    """
    if isinstance(obj, pro.PythonObject):
        return obj
    elif isinstance(obj, NoneType):
        (scanf, chartype), obj = obj2scanf(obj), b"\x00"
    elif isinstance(obj, (str, bytes, int, float)):
        scanf, chartype = obj2scanf(obj)
        if isinstance(obj, str):
            obj = obj.encode("utf-8")
    elif isinstance(obj, np.ndarray):
        dtype = str(obj.dtype)
        if dtype == "object" or ", 'O', " in dtype:
            # object dtype does not have stable byte-level representation
            return pro.PythonObject(
                name=name, serialization="dill", value=dill.dumps(obj)
            )
        return pro.PythonObject(
            name=name,
            arrspec=str({'dtype': dtype, 'shape': obj.shape}),
            value=obj.tobytes()
        )
    else:
        return pro.PythonObject(
            name=name, serialization="dill", value=dill.dumps(obj)
        )
    return pro.PythonObject(
        name=name,
        scanf=scanf,
        chartype=chartype,
        value=struct.pack(scanf, obj),
    )

scalarchar(scalar)

Determine an appropriate format code and, if necessary, byte string type identifier for a 'scalar' object, to be used as a component of a struct.Struct format string.

Parameters:

Name Type Description Default
scalar Union[str, bytes, int, float, list, tuple, bool, None]

object for which to determine code / identifier

required

Returns:

Name Type Description
code str

format code for scalar suitable for inclusion in struct string

string_identifier Optional[str]

"none" if scalar is None; "str" if scalar is a str; "bytes" if scalar is a bytes; None otherwise.

Raises:

Type Description
TypeError

if scalar is not actually a scalar (as defined here).

Source code in hostess/station/messages.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def scalarchar(
    scalar: Union[str, bytes, int, float, list, tuple, bool, None]
) -> tuple[str, Optional[str]]:
    """
    Determine an appropriate format code and, if necessary, byte string type
    identifier for a 'scalar' object, to be used as a component of a
    struct.Struct format string.

    Args:
        scalar: object for which to determine code / identifier

    Returns:
        code: format code for `scalar` suitable for inclusion in struct string
        string_identifier: "none" if scalar is `None`; "str" if scalar is a
            `str`; "bytes" if scalar is a `bytes`; `None` otherwise.

    Raises:
        TypeError: if `scalar` is not actually a scalar (as defined here).
    """
    if not isinstance(scalar, (str, bytes, int, float, bool, NoneType)):
        raise TypeError(f"{type(scalar)} is not supported by scalarchar.")
    if isinstance(scalar, (str, bytes, NoneType)):
        repeat = len(scalar) if scalar is not None else 1
        return f"{repeat}s", type(scalar).__name__.lower()
    # noinspection PyUnresolvedReferences
    return np.min_scalar_type(scalar).char, None

task_msg(actiondict, steps=None)

construct a hostess TaskReport Message from an actiondict (a dict of the format produced by make_actiondict() and expected by Delegates as values of their actions attribute). Delegates call this function to help construct Updates to a Station describing the results of a completed task (whether successful or failed).

Parameters:

Name Type Description Default
actiondict dict

dict containing data from and metadata about a completed task (see handlers.make_actiondict() for format).

required
steps Sequence[dict]

Placeholder for 'pipeline' behavior. Not currently implemented; must always be None.

None

Returns:

Type Description
TaskReport

A hostess TaskReport that can be used as the "completion" field of a hostess Update.

Source code in hostess/station/messages.py
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
def task_msg(actiondict: dict, steps: Sequence[dict] = None) -> pro.TaskReport:
    """
    construct a hostess TaskReport Message from an actiondict (a `dict` of
    the format produced by `make_actiondict()` and expected by `Delegates` as
    values of their `actions` attribute). Delegates call this function to
    help construct Updates to a Station describing the results of a completed
    task (whether successful or failed).

    Args:
        actiondict: dict containing data from and metadata about a completed
            task (see `handlers.make_actiondict()` for format).
        steps: Placeholder for 'pipeline' behavior. Not currently implemented;
            must always be None.

    Returns:
        A hostess TaskReport that can be used as the "completion"
            field of a hostess Update.
    """
    if steps is not None:
        raise NotImplementedError
    fields = {}
    if "steps" in actiondict.keys():
        raise NotImplementedError
    if actiondict["status"] != "running":
        fields["result"] = pack_obj(actiondict.get("result"))
    fields["time"] = dict2msg(actiondict, pro.ActionTime)
    fields["id"] = actiondict["id"]
    action = dict2msg(actiondict, pro.ActionReport)
    action.MergeFrom(pro.ActionReport(**fields))
    return pro.TaskReport(
        instruction_id=actiondict["instruction_id"], action=action
    )

unpack_obj(obj)

Default deserialization function for pro.PythonObject Messages. Used extensively in internal Node behaviors and by stock Actors. Good for general-purpose object passing, although it may in some cases be more efficient to implement fancier deserialization optimized for a specific application's data models or formats.

Parameters:

Name Type Description Default
obj PythonObject

hostess PythonObject Message.

required

Returns:

Type Description
Any

object deserialized from obj.

Source code in hostess/station/messages.py
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
def unpack_obj(obj: pro.PythonObject) -> Any:
    """
    Default deserialization function for pro.PythonObject Messages. Used
    extensively in internal Node behaviors and by stock Actors. Good for
    general-purpose object passing, although it may in some cases be more
    efficient to implement fancier deserialization optimized for a specific
    application's data models or formats.

    Args:
        obj: hostess PythonObject Message.

    Returns:
        object deserialized from `obj`.
    """
    if enum(obj, "compression") != "uncompressed":
        # TODO: handle inline compression
        raise NotImplementedError
    if enum(obj, "serialization") == "json":
        value = json.loads(obj.value)
    elif enum(obj, "serialization") == "dill":
        value = dill.loads(obj.value)
    elif obj.arrspec:
        arrspec = literal_eval(obj.arrspec)
        if arrspec['dtype'].startswith("["):
            arrspec['dtype'] = literal_eval(arrspec['dtype'])
        value = np.frombuffer(
            obj.value, dtype=arrspec['dtype']
        ).reshape(arrspec['shape'])
    elif obj.scanf:
        unpacked = struct.unpack(obj.scanf, obj.value)
        if any(isinstance(v, bytes) for v in unpacked):
            chartype = enum(obj, "chartype")
            if chartype == "str":
                unpacked = tuple(map(lambda s: s.decode("utf-8"), unpacked))
            elif chartype == "nonetype":
                unpacked = [None for _ in unpacked]
        value = unpacked if len(unpacked) > 1 else unpacked[0]
    else:
        value = obj.value
    return value

station.proto_utils

utilities for dealing with the protobuf format. Not intended for hostess-specific messages -- these are more generic utilities.

PROTO_TYPES = MPt({(getattr(FieldDescriptor, k)): (k.replace('TYPE_', '')) for k in (dir(FieldDescriptor)) if k.startswith('TYPE')}) module-attribute

mapping from protobuf type codes to types

m2d = google.protobuf.json_format.MessageToDict module-attribute

alias for google.protobuf.json_format.MessageToDict

enum(message, field)

get the string or int value of an enum field in a protobuf Message. (If you directly access the field with the Python API, you will get the enum key instead of its value, which is generally less useful.)

Parameters:

Name Type Description Default
message Message

protobuf Message containing an enum field

required
field str

name of enum field

required

Returns:

Type Description
Union[str, int]

enum value of field; None if field is not present in message

Source code in hostess/station/proto_utils.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def enum(message: Message, field: str) -> Union[str, int]:
    """
    get the string or int value of an enum field in a protobuf Message. (If
    you directly access the field with the Python API, you will get the enum
    key instead of its value, which is generally less useful.)

    Args:
        message: protobuf Message containing an enum field
        field: name of enum field

    Returns:
        enum value of `field`; None if `field` is not present in message
    """
    for desc in message.DESCRIPTOR.fields:
        if desc.name != field:
            continue
        try:
            return desc.enum_type.values_by_number[
                getattr(message, field)
            ].name
        except AttributeError:
            raise TypeError(f"{field} is not an enum")
    raise KeyError(f"{field} is not a field of message")

make_duration(delta)

create a Duration Message from a float or a dt.timedelta object.

Parameters:

Name Type Description Default
delta Union[timedelta, float]

total duration -- if a float, always represents seconds.

required

Returns:

Type Description
Duration

a protobuf Duration Message specifying the same timespan as delta

Source code in hostess/station/proto_utils.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def make_duration(delta: Union[dt.timedelta, float]) -> Duration:
    """
    create a Duration Message from a float or a dt.timedelta object.

    Args:
        delta: total duration -- if a float, always represents seconds.

    Returns:
         a protobuf Duration Message specifying the same timespan as `delta`
    """
    duration = Duration()
    if isinstance(delta, float):
        duration.FromSeconds(delta)
    else:
        duration.FromTimedelta(delta)
    return duration

make_timestamp(datetime=None)

create a Timestamp Message from either the current time or a dt.datetime object.

Parameters:

Name Type Description Default
datetime Optional[datetime]

if None, make a Timestamp from the current time. if a dt.datetime, make a Timestamp from it.

None

Returns:

Type Description
Timestamp

protobuf Timestamp Message.

Source code in hostess/station/proto_utils.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def make_timestamp(datetime: Optional[dt.datetime] = None) -> Timestamp:
    """
    create a Timestamp Message from either the current time or a dt.datetime
    object.

    Args:
        datetime: if None, make a Timestamp from the current time. if a
            dt.datetime, make a Timestamp from it.

    Returns:
        protobuf Timestamp Message.
    """
    timestamp = Timestamp()
    if datetime is None:
        timestamp.GetCurrentTime()
    else:
        timestamp.FromDatetime(datetime)
    return timestamp

proto_formatdict(proto)

return a (possibly nested) dict showing the legal fields of a protobuf message or message type.

Parameters:

Name Type Description Default
proto Union[Message, Descriptor]

protobuf Message or Descriptor whose format to describe

required

Returns:

Type Description
dict[str, Union[dict, str]]

dict whose keys are field names and whose values are protobuf data types or nested dicts (representing child Messages) of the same format.

Source code in hostess/station/proto_utils.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def proto_formatdict(
    proto: Union[Message, Descriptor]
) -> dict[str, Union[dict, str]]:
    """
    return a (possibly nested) dict showing the legal fields of a protobuf
    message or message type.

    Args:
        proto: protobuf Message or Descriptor whose format to describe

    Returns:
        dict whose keys are field names and whose values are protobuf data
            types or nested dicts (representing child Messages) of the same
            format.
    """
    # i.e., it's a descriptor
    if hasattr(proto, "fields_by_name"):
        descriptor = proto
    else:
        descriptor = proto.DESCRIPTOR
    unpacked = {}
    for name, field in descriptor.fields_by_name.items():
        if (ptype := PROTO_TYPES[field.type]) != "MESSAGE":
            unpacked[name] = ptype
        else:
            # TODO: get enumeration values
            unpacked[name] = proto_formatdict(field.message_type)
    return unpacked

station.station

DelegateContext module-attribute

code denoting the relationship between a Delegate's execution context and its supervising Station's interpreter process. "local" means that the Delegate is (or will be) running in separate threads of the same process; "subprocess" means a child process of the Station's interpreter process; "daemon" means a double-forked, fully-disowned and separate process.

Station

Bases: Node

central control node for hostess network. can receive Updates from and send Instructions to Delegates.

Source code in hostess/station/station.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
class Station(bases.Node):
    """
    central control node for hostess network. can receive Updates from and
    send Instructions to Delegates.
    """

    def __init__(
        self,
        host: str,
        port: int,
        name: str = "station",
        n_threads: int = 8,
        max_inbox_mb: float = 250,
        logdir: Path = Path(__file__).parent / ".nodelogs",
        _is_process_owner: bool = False,
        **kwargs: Union[
            bool,
            tuple[Union[type[bases.Sensor], type[bases.Actor]]],
            float,
            int,
        ],
    ):
        """
        Args:
            host: hostname or address for Station. usually should be your
                external IP address for remote connections and 'localhost'
                for local use.
            port: port this Station's TCP server will listen on.
            name: name for Station.
            n_threads: how many threads should the Station use?
            max_inbox_mb: how large can the Station's inbox get, in MB, before
                it dumps older Messages?
            logdir: where should the Station write logs?
            _is_process_owner: if True, the Station will attempt to stop the
                Python interpreter when it shuts down.
            **kwargs: additional kwargs for the Node constructor (see Node
                documentation for valid options).
        """
        super().__init__(
            host=host,
            port=port,
            name=name,
            n_threads=n_threads,
            can_receive=True,
            logdir=logdir,
            **kwargs,
        )
        self.max_inbox_mb = max_inbox_mb
        self.events, self.delegates, self.relaunched, self.tasks = (
            [],
            [],
            [],
            {},
        )
        self.outboxes = defaultdict(Mailbox)
        self.tendtime, self.reset_tend = timeout_factory(False)
        self.last_handler = None
        self.__is_process_owner = _is_process_owner
        self._log("completed initialization", category="system")

    def _set_logfile(self):
        """create standardized log file name."""
        self.logfile = Path(
            self.logdir,
            f"{self.init_time}_station_{self.host}_{self.port}.log",
        )

    def set_delegate_properties(self, delegate: str, **propvals: Any):
        """
        Construct a 'configure' Instruction for a Delegate that instructs it
            to assign specific values to named properties of itself; put that
            Instruction in the outbox for that Delegate.

        Args:
            delegate: name of delegate to configure.
            propvals: argument names correspond to property names of target
                Delegate; argument values will be serialized as PythonObject
                Messages and then bundled into ConfigParam Messages.
        """
        # TODO: update delegate info record if relevant
        if len(propvals) == 0:
            raise TypeError("can't send a no-op config instruction")
        config = [
            pro.ConfigParam(paramtype="config_property", value=pack_obj(v, k))
            for k, v in propvals.items()
        ]
        self.outboxes[delegate].append(
            make_instruction("configure", config=config)
        )

    def set_delegate_config(self, delegate: str, config: Mapping):
        """
        Construct a 'configure' Instruction for a Delegate that instructs it
            to merge a config dict into its existing config dict; put that
            Instruction in the outbox for that Delegate.

        Args:
            delegate: name of Delegate to configure
            config: configuration to add to the Delegate's config dict.
        """
        config = pro.ConfigParam(
            paramtype="config_dict", value=pack_obj(config)
        )
        self.outboxes[delegate].append(
            make_instruction("configure", config=config)
        )

    def queue_task(self, delegate: str, instruction: pro.Instruction):
        """
        queue an Instruction for a Delegate and set up tracking for its state.
        this method is intended for "do" Instructions that contain Actions, not
        Instructions we do not want to track in the same way (like config).
        The default InstructionFromInfo Actor uses this method to queue the
        Instructions it makes.

        Args:
            delegate: name of Delegate for which to queue task
            instruction: Instruction Message
        """
        if instruction.HasField("pipe"):
            raise NotImplementedError("multipart pipelines not implemented")
        if not enum(instruction, "type") == "do":
            raise ValueError("task instructions must have type 'do'")
        self.tasks[instruction.id] = {
            "init_time": instruction.time.ToDatetime(dt.timezone.utc),
            "sent_time": None,
            "ack_time": None,
            "status": "queued",
            "delegate": delegate,
            "name": instruction.action.name,
            "action_id": instruction.action.id,
            "description": dict(instruction.action.description),
            "exception": None,
        }
        self.outboxes[delegate].append(instruction)

    def shutdown_delegate(
        self, delegate: str, how: Literal["stop", "kill"] = "stop"
    ):
        """
        make a shutdown Instruction and queue it in the outbox for the
        specified Delegate.

        Args:
            delegate: name of Delegate we would like to shut down
            how: "stop" if we would like the Delegate to shut down gracefully;
                "kill" if we would like the Delegate to stop immediately no
                matter what.
        """
        self.outboxes[delegate].append(make_instruction(how))

    # TODO, maybe: signatures on these match-and-execute things are getting
    #  a little weird and specialized. maybe that's ok, but maybe we should
    #  make a more unified interface.
    def match_and_execute(self, obj: Any, category: str):
        """
        Check to see if we have an Actor or Actors intended to handle `obj`
        by calling their `match()` methods with `obj`. If any of them say they
        can deal with `obj`, pass `obj` to their `execute()` methods.

        In a typical Station application, `obj` will be something a Delegate
        packed into a "completion" or "info" Update. Its type will be entirely
        application-dependent.

        Args:
            obj: object one of our Actors might be able to work with
            category: what category of Actor might be able to work with `obj`?
        """
        try:
            actors = self.match(obj, category)
        except bases.NoActorForEvent:
            # TODO: _plausibly_ log this?
            return
        except (AttributeError, KeyError) as ex:
            self._log("match crash", **exc_report(ex), category=category)
            return
        self._log(
            obj,
            category=category,
            matches=[a.name for a in actors],
        )
        for actor in actors:
            try:
                actor.execute(self, obj)
            except NoMatchingDelegate:
                self._log(
                    "no delegate for action", actor=actor, category=category
                )
            except Exception as ex:
                self._log(
                    "execution failure",
                    actor=actor,
                    category=category,
                    exception=ex,
                )

    def _handle_info(self, message: pro.Update):
        """
        Handler for 'info' Updates. Unpack all the
        "notes" (serialized Python objects + metadata) bundled in an info
        Update and match each one against our Actors, executing each Actor
        that matches a note with that note.

        If `message` is an exit report, also always log its info.

        Args:
            message: Update Message from a Delegate.
        """
        notes = gmap(unpack_obj, message.info)
        if enum(message, "reason") == "exiting":
            self._log(
                "received exit report",
                delname=message.delegateid.name,
                reason=enum(message.state, "status"),
                exception=notes,
            )
            return
        for note in notes:
            self.match_and_execute(note, "info")

    def _handle_report(self, update: pro.Update):
        """
        Handler for "completion" Updates received from Delegates. Perform
        action tracking and cleanup, and execute any appropriate follow-on
        actions.

        Args:
            update: 'completion' Update from Delegate.
        """
        if not update.HasField("completed"):
            return
        if len(update.completed.steps) > 0:
            raise NotImplementedError
        self._log(update, category="report")
        if not update.completed.HasField("action"):
            # TODO: an unusual case. maybe log
            return
        completed = unpack_message(update.completed)
        try:
            task = self.tasks[completed["instruction_id"]]
            task["status"] = completed["action"]["status"]
            task["start_time"] = completed["action"]["time"]["start"]
            task["end_time"] = completed["action"]["time"]["end"]
            task["duration"] = completed["action"]["time"]["duration"]
            if task["exception"] is not None:
                task["exception"] = exc_report(task["exception"])
        except KeyError:
            # TODO: an undesirable case, log
            pass
        obj = unpack_obj(update.completed.action.result)
        self.match_and_execute(obj, "completion")

    def get_delegate(self, name: str) -> dict[str, Any]:
        """
        return delegate info structure for first Delegate named "name"
        (there should never be more than one unless someone has seriously
        messed with the Station).

        Args:
            name: name of Delegate

        Returns:
            info `dict` for Delegate. If Delegate is running locally,
                this `dict` will include a reference to the Delegate itself.
        """
        return [n for n in self.delegates if n["name"] == name][0]

    def _handle_state(self, update: pro.Update):
        """
        update the info dict for one of our Delegates in response to state
        and id elements of an Update Message.

        Args:
            update: Update Message from Delegate.
        """
        try:
            delegate = self.get_delegate(update.delegateid.name)
        # TODO: how do we handle mystery-appearing delegates with dupe names
        except IndexError:
            delegate = blank_delegateinfo()
            self.delegates.append(delegate)
        update = unpack_message(update)
        if delegate["reported_status"] == "no_report":
            self._log(
                "first message from delegate",
                delname=delegate["name"],
                category="comms",
                direction="recv",
            )
        delegate |= {
            "last_seen": dt.datetime.fromisoformat(update["time"]),
            "wait_time": 0,
            "interface": update["state"]["interface"],
            "cdict": update["state"]["cdict"],
            "reported_status": update["state"]["status"],
            "pid": update["delegateid"]["pid"],
            "actors": update["state"].get("actors", []),
            "sensors": update["state"].get("sensors", []),
            "busy": update["state"]["busy"],
            "host": update["delegateid"]["host"],
            "running": update.get("running", []),
            "infocount": update["state"].get("infocount", {}),
            "init_params": update["state"]["init_params"],
        }
        for name, state in update["state"]["threads"].items():
            try:
                instruction_id = int(name.replace("Instruction_", ""))
                if self.tasks[instruction_id]["status"] not in (
                    "success",
                    "failure",
                    "crash",
                    "timeout",
                ):
                    # don't override formally reported status
                    self.tasks[instruction_id]["status"] = state.lower()
            except (ValueError, KeyError):  # main thread, sensors, etc.
                continue

    def _handle_wilco(self, update: pro.Update):
        """
        handle 'wilco' Update from a Delegate. Delegates send these on receipt
        of Instructions prior to actually performing them. Record time and
        whether or not the Delegate acknowledged  that it understood and would
        follow the Instruction.

        Args:
            update: 'wilco' Update Message from Delegate.
        """
        # TODO: handle do not understand messages
        if not enum(update, "reason") in ("wilco", "bad_request"):
            return
        # TODO: handle config acks
        if update.instruction_id not in self.tasks.keys():
            return
        task = self.tasks[update.instruction_id]
        task["ack_time"] = update.time.ToDatetime(dt.timezone.utc)
        task["status"] = enum(update, "reason")
        # TODO: behavior in response to bad_request notifications

    def _handle_incoming_message(self, update: pro.Update):
        """
        Top-level dispatcher function for handling Updates from Delegates.
        Route wilco, state, info, and report components of Update -- if any --
        to the appropriate methods.

        Args:
            update: Update Message from Delegate.
        """
        for op in ("wilco", "state", "info", "report"):
            try:
                getattr(self, f"_handle_{op}")(update)
            except Exception as ex:
                self._log(
                    "bad message handling",
                    category="comms",
                    direction="recv",
                    exception=ex,
                    op=op,
                )

    @property
    def running_delegates(self) -> list[dict[str, Any]]:
        """get metadata dicts for all still-running Delegates."""
        return [
            n
            for n in self.delegates
            if n["inferred_status"] not in ("missing", "shutdown", "crashed")
        ]

    @property
    def unfinished_delegates(self) -> list[dict[str, Any]]:
        """
        get metadata dicts for all still-running Delegates executing in local
        context (this Station's process).
        """
        unfinished = []
        for n in filter(lambda x: "obj" in x, self.delegates):
            if any(map(lambda t: t.running(), n["obj"].threads.values())):
                unfinished.append(n)
        return unfinished

    def _shutdown(self, exception: Optional[Exception] = None):
        """
        shut down the Station. This method should normally be called only by
        `Station.shutdown()`.

        Args:
            exception: unhandled Exception that caused shutdown, if any.
        """
        # TODO: add some internal logging here when nodes fail to
        #  shut down or respond in a timely fashion
        self.exception = exception
        # clear outbox etc.
        for k in self.outboxes.keys():
            self.outboxes[k] = Mailbox()
        self.actors, self.sensors = {}, {}
        for delegate in self.delegates:
            self.shutdown_delegate(delegate["name"], "stop")
        waiting, unwait = timeout_factory(timeout=30)
        # make sure every delegate is shut down, timing out at 30s --
        # this will also ensure we get all exit reports from newly-shutdown
        # delegates
        self._check_delegates()
        while len(self.running_delegates) > 0:
            try:
                waiting()
            except TimeoutError:
                self._log(
                    "delegate_shutdown_timeout",
                    category="system",
                    running=[n["name"] for n in self.running_delegates],
                )
                break
            time.sleep(0.1)
            self._check_delegates()
        unwait()
        # ensure local delegate threads are totally shut down
        while len(self.unfinished_delegates) > 0:
            time.sleep(0.1)
            try:
                waiting()
            except TimeoutError:
                self._log(
                    "local_delegate_thread_shutdown_timeout",
                    category="system",
                    running=[n["name"] for n in self.unfinished_delegates],
                )
                break
        # shut down the server etc.
        # TODO: this is a little messy because of the discrepancy in thread
        #  and signal names. maybe unify this somehow.
        for k in self.signals:
            self.signals[k] = 1
        self.server.kill()
        while any(t.running() for t in self.server.threads.values()):
            try:
                waiting()
            except TimeoutError:
                self._log("self_server_shutdown_timeout", category="system")
                break
            time.sleep(0.1)
        if self.__is_process_owner:
            self._log("shutdown complete, exiting process", category="system")
            sys.exit()

    def _check_delegates(self):
        """
        Update time-based elements of metadata dicts for our Delegates.
        Crucial step of Station's main loop. Should only be called by
        `Station._main_loop()`.
        """
        now = dt.datetime.now(tz=dt.timezone.utc)
        for n in self.delegates:
            # shared kwargs for those status changes we want to log
            lkwargs = {
                "event": "delegate_status",
                "category": "system",
                "name": n["name"],
            }
            if n["reported_status"] in ("shutdown", "crashed"):
                if n["reported_status"] != n["inferred_status"]:
                    self._log(status=n["reported_status"], **lkwargs)
                n["inferred_status"] = n["reported_status"]
                continue
            if n["reported_status"] == "no_report":
                n["wait_time"] = (now - n["init_time"]).total_seconds()
            else:
                n["wait_time"] = (now - n["last_seen"]).total_seconds()
            # adding 5 seconds here as grace for network lag spikes
            if n["wait_time"] > 10 * n["update_interval"] + 5:
                if n["inferred_status"] != "missing":
                    self._log(status="missing", **lkwargs)
                n["inferred_status"] = "missing"
            elif n["wait_time"] > 3 * n["update_interval"]:
                # don't care about logging delays
                n["inferred_status"] = "delayed"
            else:
                n["inferred_status"] = n["reported_status"]
            # TODO: trigger some behavior

    def _main_loop(self):
        """
        launch main loop for Station. This should only be called by
        `Station._start()`, which should only be called by `Station.start()`.
        """
        while self.signals.get("main") is None:
            self._check_delegates()
            if self.tendtime() > self.poll * 8:
                crashed_threads = self.server.tend()
                if len(crashed_threads) > 0:
                    self._log(crashed_threads, category="server_errors")
                    self.threads |= self.server.threads
                self.inbox.prune(self.max_inbox_mb)
                self.reset_tend()
            time.sleep(self.poll)

    def _record_message(self, box: Mailbox, msg: Msg, pos: int):
        """
        helper function for `Station._ackcheck()`. write log entry for sent
        Message and record it as sent.

        Args:
            box: outbox for Delegate we sent Message in msg to
            msg: wrappper for Message we sent to delegate
            pos: index of msg within box
        """
        # make new Msg object w/updated timestamp.
        # this is weird-looking, but, by intent, Msg object cached
        # properties are essentially immutable wrt the underlying message
        update_instruction_timestamp(msg.message)
        box[pos] = Msg(msg.message)
        self._log(box[pos].message, category="comms", direction="send")
        box[pos].sent = True

    def _select_outgoing_message(
        self, delegate: str
    ) -> tuple[Optional[Mailbox], Optional[pro.Instruction], Optional[int]]:
        """
        Helper function for `Station._ackcheck()`. When we receive an Update
        from one of our Delegates and we've got one or more Instructions
        prepared for them, we reply with one. This function checks if we have
        any Instructions for that Delegate, and, if so, picks which we should
        send.

        Args:
            delegate: name of Delegate we're talking to.

        Returns:
            * outbox for `delegate`, or None if we don't have any
                Instructions for it
            * selected Instruction for Delegate, or None if we don't have any
                to send
            * index of Instruction within outbox, or None if there isn't one
        """
        # TODO, probably: send more than one Instruction when available.
        #  we might want a special control code for that.
        box = self.outboxes[delegate]
        # TODO, maybe: this search will be expensive if outboxes get really big
        #  -- might want some kind of hashing
        messages = tuple(
            filter(lambda pm: pm[1].sent is False, tuple(enumerate(box)))
        )
        if len(messages) == 0:
            return None, None, None  # this will trigger an empty ack message
        # ensure that we send shudown Instructions before config instructions,
        # and config Instructions before do Instructions
        # (the do instructions might need correct config to work!)
        priorities, pos, msg = ("kill", "stop", "configure"), None, None
        for priority in priorities:
            try:
                pos, msg = filtern(lambda pm: pm[1].type == priority, messages)
                break
            except StopIteration:
                continue
        if msg is None:
            pos, msg = messages[0]
        msg.sent = True
        return box, msg, pos

    # TODO: is this always a Msg or sometimes a Message?
    def _update_task_record(self, msg: Msg):
        """
        helper function for _ackcheck(). update task record associated with a
        'do' Instruction so that we know that we sent it and when.

        Args:
            msg: wrapper for 'do' Instruction we just sent to a Delegate.
        """
        task_record = self.tasks[msg.id]
        task_record["sent_time"] = dt.datetime.now(tz=dt.timezone.utc)
        task_record["status"] = "sent"

    def _ackcheck(self, _conn: socket.socket, comm: dict) -> tuple[bytes, str]:
        """
        callback for interpreting comms and responding as appropriate.
        should only be called inline of the `ack()` method of the Station's
        `server` attribute (a `talkie.TCPTalk` object).

        Args:
            _conn: open `socket.socket` object
            comm: decoded hostess comm as produced by `read_comm()`

        Returns:
            response: response to comm
            description: loggable description of response type / status
        """
        # TODO: lockout might be too strict
        msg, self.locked = None, True
        try:
            if comm["body"] == b"situation":
                if self.state in ("shutdown", "crashed"):
                    return make_comm(b"shutting down"), "sent shutdown notice"
                return self._situation_comm(), "sent situation"
            if comm["err"]:
                self._log(
                    "failed to decode",
                    category="comms",
                    direction="recv",
                    conn=_conn,
                )
                return make_comm(b"bad decode"), "notified sender bad decode"
            incoming = comm["body"]
            try:
                delegatename = incoming.delegateid.name
            except (AttributeError, ValueError):
                self._log("bad request", error="bad request", category="comms")
                return make_comm(b"bad request"), "notified sender bad request"
            # interpret the comm here in case we want to immediately send a
            # response based on its contents (e.g., in a gPhoton 2-like
            # pipeline that's mostly coordinating execution of a big list of
            # non-serial processes, we would want to immediately send
            # another task to any Delegate that tells us it's finished one)
            self._handle_incoming_message(comm["body"])
            if enum(incoming.state, "status") in ("shutdown", "crashed"):
                return make_comm(b""), "send ack to terminating delegate"
            # if we have any Instructions for the Delegate -- including ones
            # that might have been added to the outbox in the
            # _handle_incoming_message() workflow -- pick one to send
            box, msg, pos = self._select_outgoing_message(delegatename)
            # ...and if we don't have any, send empty ack comm
            if msg is None:
                return make_comm(b""), "sent ack"
            # log message and, if relevant, update task queue
            # TODO, maybe: logging and updating task queue here is much less
            #  complicated, but has the downside that it will occur _before_
            #  we confirm receipt.
            # this type should have been validated earlier in queue_task
            if msg["type"] == "do":
                self._update_task_record(msg)
            self._record_message(box, msg, pos)
            return box[pos].comm, f"sent instruction {box[pos].id}"
        except Exception as ex:
            self._log(exc_report(ex), category="comms")
            return b"response failure", "failed to respond"
        finally:
            self.locked = False

    @staticmethod
    def _launch_delegate_in_subprocess(
        context: DelegateContext, kwargs: dict[str, Any]
    ):
        """
        Launch a delegate in a child or daemonized process. Should only be
        called by `Station.launch_delegate()`.

        Args:
            context: how to launch the delegate. Should only be 'daemon' or
                'subprocess'; if 'local', we should never have gotten here.
            kwargs: kwargs for launch, passed to `launch_delegate()` via
                `generic_python_endpoint()`
        """
        endpoint = generic_python_endpoint(
            "hostess.station.delegates",
            "launch_delegate",
            payload=kwargs,
            splat="**",
            return_result=True,
        )
        if context == "daemon":
            RunCommand(endpoint, _disown=True)()
        elif context == "subprocess":
            RunCommand(endpoint, _asynchronous=True)()
        else:
            raise ValueError(
                f"unsupported context {context}. Supported contexts for this "
                f"function are 'daemon' and 'subprocess'"
            )

    def launch_delegate(
        self,
        name: str,
        elements: Sequence[tuple[str, str]] = (),
        host: str = "localhost",
        update_interval: float = 0.25,
        context: DelegateContext = "daemon",
        **kwargs: Union[int, float],
    ) -> Optional[bases.Node]:
        """
        launch a Delegate, by default daemonized. prepare a metadata dict for
        it, and prepare ourselves to receive Messages from it.

        Args:
            name: name to assign to Delegate.
            elements: sequence of (module_name, class_name) describing Actors
                and Sensors Delegate should construct and attach to itself at
                launch.
            host: hostname or ip on which Delegate should launch. remote
                launch is not yet implemented.
            update_interval: how frequently the Delegate should send
                unprompted 'heartbeat' Updates to this Station.
            context: where to launch the Delegate in relation to this Station's
                interpreter process: "local" to run threaded in the same
                process; "subprocess" to run in a child process; "daemon" to
                run in a fully-detached process.
            kwargs: additional kwargs to pass to
                `hostess.station.delegates.launch_delegate()`.

        Returns:
            None if `context` == "subprocess" or "daemon"; the launched
                Delegate if `context` == "local".
        """
        if not self._Node__started:
            raise ValueError(
                "cannot launch delegates from an unstarted Station"
            )
        # TODO: option to specify remote host and run this using SSH (update
        #  relaunch_delegate as well when adding this feature)
        if host != "localhost":
            raise NotImplementedError
        if any(n["name"] == name for n in self.delegates):
            raise ValueError("can't launch a delegate with a duplicate name")
        kwargs = {
            "station_address": (self.host, self.port),
            "name": name,
            "elements": elements,
            "update_interval": update_interval,
            "loginfo": {
                # must pass logdir as a string -- delegate is not
                # initialized yet, so this is inserted directly into
                # generated source code
                "logdir": str(self.logdir),
                "init_time": self.init_time,
            },
        } | kwargs
        delegateinfo = blank_delegateinfo() | {
            "name": name,
            "inferred_status": "initializing",
            "update_interval": update_interval,
        }
        # kwargs for logging launch
        lkwargs = {"delname": name, "elements": elements, "category": "system"}
        self._log("init delegate launch", **lkwargs)
        try:
            if context == "local":
                # mostly for debugging / dev purposes
                from hostess.station.delegates import launch_delegate

                output = launch_delegate(is_local=True, **kwargs)
                delegateinfo["obj"] = output
            else:
                output = self._launch_delegate_in_subprocess(context, kwargs)
            self.delegates.append(delegateinfo)
            self._log("launched delegate", **lkwargs)
            return output
        except Exception as ex:
            self._log("delegate launch fail", **lkwargs, **exc_report(ex))

    def relaunch_delegate(self, name: str):
        """
        Relaunch an existing Delegate with the same initialization settings,
        although not full runtime configuration. If it's still running, shut
        it down first.

        Args:
            name: name of Delegate to relaunch.
        """
        delegate = self.get_delegate(name)
        if delegate["inferred_status"] == "missing":
            pass  # TODO: os level kill
        elif delegate["inferred_status"] not in ["shutdown", "crashed"]:
            self.shutdown_delegate(name, "stop")
            waiting, unwait = timeout_factory(timeout=20)
            self._check_delegates()
            while self.get_delegate(name)["inferred_status"] not in (
                "shutdown",
                "crashed",
            ):
                try:
                    waiting()
                except TimeoutError:
                    break
                time.sleep(0.1)
                self._check_delegates()
            unwait()
        elements = []
        elements_dict = dict(delegate["actors"]) | dict(delegate["sensors"])
        for k in elements_dict.keys():
            cls = elements_dict[k].split(".")[-1]
            mod = elements_dict[k].removesuffix("." + cls)
            elements = elements + [(mod, cls)]
        elements = tuple(elements)
        host = "localhost"
        # TODO: add remote host detection/relaunch capability
        if delegate["init_params"]["_is_process_owner"]:
            context = "daemon"
            # TODO: how do you know if it's "daemon" vs "subprocess"?
        else:
            context = "local"
        self.delegates.remove(delegate)
        self.relaunched.append(delegate)
        self.launch_delegate(
            name,
            elements,
            host=host,
            context=context,
            **delegate["init_params"],
        )

    def save_port_to_shared_memory(self, address: Optional[str] = None):
        """
        write this Station's port number to a shared memory address, allowing
        other applications to query or monitor it. Specifically, calling this
        function will allow the hostess `situation` app to automatically find
        this Station.

        Args:
            address: shared memory address to write port to. Exactly what this
                means depends on the operating environment. In CPython on
                Linux, it denotes a filename in /dev/shm. if not specified,
                defaults to the name of this Station.
        """
        from dustgoggles.codex.implements import Sticky
        from dustgoggles.codex.memutilz import (
            deactivate_shared_memory_resource_tracker,
        )

        deactivate_shared_memory_resource_tracker()
        address = self.name if address is None else address
        Sticky.note(
            self.port, address=f"{address}-port-report", cleanup_on_exit=True
        )

    def _situation_comm(self) -> bytes:
        """
        Construct a hostess comm describing this Station's overall situation.
        The hostess `situation` app works by constructing human-readable
        representations of these comms.

        """
        from hostess.station.situation.response_organizers import situation_of

        return make_comm(pack_obj(situation_of(self)))
running_delegates property

get metadata dicts for all still-running Delegates.

unfinished_delegates property

get metadata dicts for all still-running Delegates executing in local context (this Station's process).

__init__(host, port, name='station', n_threads=8, max_inbox_mb=250, logdir=Path(__file__).parent / '.nodelogs', _is_process_owner=False, **kwargs)

Parameters:

Name Type Description Default
host str

hostname or address for Station. usually should be your external IP address for remote connections and 'localhost' for local use.

required
port int

port this Station's TCP server will listen on.

required
name str

name for Station.

'station'
n_threads int

how many threads should the Station use?

8
max_inbox_mb float

how large can the Station's inbox get, in MB, before it dumps older Messages?

250
logdir Path

where should the Station write logs?

parent / '.nodelogs'
_is_process_owner bool

if True, the Station will attempt to stop the Python interpreter when it shuts down.

False
**kwargs Union[bool, tuple[Union[type[Sensor], type[Actor]]], float, int]

additional kwargs for the Node constructor (see Node documentation for valid options).

{}
Source code in hostess/station/station.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def __init__(
    self,
    host: str,
    port: int,
    name: str = "station",
    n_threads: int = 8,
    max_inbox_mb: float = 250,
    logdir: Path = Path(__file__).parent / ".nodelogs",
    _is_process_owner: bool = False,
    **kwargs: Union[
        bool,
        tuple[Union[type[bases.Sensor], type[bases.Actor]]],
        float,
        int,
    ],
):
    """
    Args:
        host: hostname or address for Station. usually should be your
            external IP address for remote connections and 'localhost'
            for local use.
        port: port this Station's TCP server will listen on.
        name: name for Station.
        n_threads: how many threads should the Station use?
        max_inbox_mb: how large can the Station's inbox get, in MB, before
            it dumps older Messages?
        logdir: where should the Station write logs?
        _is_process_owner: if True, the Station will attempt to stop the
            Python interpreter when it shuts down.
        **kwargs: additional kwargs for the Node constructor (see Node
            documentation for valid options).
    """
    super().__init__(
        host=host,
        port=port,
        name=name,
        n_threads=n_threads,
        can_receive=True,
        logdir=logdir,
        **kwargs,
    )
    self.max_inbox_mb = max_inbox_mb
    self.events, self.delegates, self.relaunched, self.tasks = (
        [],
        [],
        [],
        {},
    )
    self.outboxes = defaultdict(Mailbox)
    self.tendtime, self.reset_tend = timeout_factory(False)
    self.last_handler = None
    self.__is_process_owner = _is_process_owner
    self._log("completed initialization", category="system")
_ackcheck(_conn, comm)

callback for interpreting comms and responding as appropriate. should only be called inline of the ack() method of the Station's server attribute (a talkie.TCPTalk object).

Parameters:

Name Type Description Default
_conn socket

open socket.socket object

required
comm dict

decoded hostess comm as produced by read_comm()

required

Returns:

Name Type Description
response bytes

response to comm

description str

loggable description of response type / status

Source code in hostess/station/station.py
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
def _ackcheck(self, _conn: socket.socket, comm: dict) -> tuple[bytes, str]:
    """
    callback for interpreting comms and responding as appropriate.
    should only be called inline of the `ack()` method of the Station's
    `server` attribute (a `talkie.TCPTalk` object).

    Args:
        _conn: open `socket.socket` object
        comm: decoded hostess comm as produced by `read_comm()`

    Returns:
        response: response to comm
        description: loggable description of response type / status
    """
    # TODO: lockout might be too strict
    msg, self.locked = None, True
    try:
        if comm["body"] == b"situation":
            if self.state in ("shutdown", "crashed"):
                return make_comm(b"shutting down"), "sent shutdown notice"
            return self._situation_comm(), "sent situation"
        if comm["err"]:
            self._log(
                "failed to decode",
                category="comms",
                direction="recv",
                conn=_conn,
            )
            return make_comm(b"bad decode"), "notified sender bad decode"
        incoming = comm["body"]
        try:
            delegatename = incoming.delegateid.name
        except (AttributeError, ValueError):
            self._log("bad request", error="bad request", category="comms")
            return make_comm(b"bad request"), "notified sender bad request"
        # interpret the comm here in case we want to immediately send a
        # response based on its contents (e.g., in a gPhoton 2-like
        # pipeline that's mostly coordinating execution of a big list of
        # non-serial processes, we would want to immediately send
        # another task to any Delegate that tells us it's finished one)
        self._handle_incoming_message(comm["body"])
        if enum(incoming.state, "status") in ("shutdown", "crashed"):
            return make_comm(b""), "send ack to terminating delegate"
        # if we have any Instructions for the Delegate -- including ones
        # that might have been added to the outbox in the
        # _handle_incoming_message() workflow -- pick one to send
        box, msg, pos = self._select_outgoing_message(delegatename)
        # ...and if we don't have any, send empty ack comm
        if msg is None:
            return make_comm(b""), "sent ack"
        # log message and, if relevant, update task queue
        # TODO, maybe: logging and updating task queue here is much less
        #  complicated, but has the downside that it will occur _before_
        #  we confirm receipt.
        # this type should have been validated earlier in queue_task
        if msg["type"] == "do":
            self._update_task_record(msg)
        self._record_message(box, msg, pos)
        return box[pos].comm, f"sent instruction {box[pos].id}"
    except Exception as ex:
        self._log(exc_report(ex), category="comms")
        return b"response failure", "failed to respond"
    finally:
        self.locked = False
_check_delegates()

Update time-based elements of metadata dicts for our Delegates. Crucial step of Station's main loop. Should only be called by Station._main_loop().

Source code in hostess/station/station.py
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
def _check_delegates(self):
    """
    Update time-based elements of metadata dicts for our Delegates.
    Crucial step of Station's main loop. Should only be called by
    `Station._main_loop()`.
    """
    now = dt.datetime.now(tz=dt.timezone.utc)
    for n in self.delegates:
        # shared kwargs for those status changes we want to log
        lkwargs = {
            "event": "delegate_status",
            "category": "system",
            "name": n["name"],
        }
        if n["reported_status"] in ("shutdown", "crashed"):
            if n["reported_status"] != n["inferred_status"]:
                self._log(status=n["reported_status"], **lkwargs)
            n["inferred_status"] = n["reported_status"]
            continue
        if n["reported_status"] == "no_report":
            n["wait_time"] = (now - n["init_time"]).total_seconds()
        else:
            n["wait_time"] = (now - n["last_seen"]).total_seconds()
        # adding 5 seconds here as grace for network lag spikes
        if n["wait_time"] > 10 * n["update_interval"] + 5:
            if n["inferred_status"] != "missing":
                self._log(status="missing", **lkwargs)
            n["inferred_status"] = "missing"
        elif n["wait_time"] > 3 * n["update_interval"]:
            # don't care about logging delays
            n["inferred_status"] = "delayed"
        else:
            n["inferred_status"] = n["reported_status"]
_handle_incoming_message(update)

Top-level dispatcher function for handling Updates from Delegates. Route wilco, state, info, and report components of Update -- if any -- to the appropriate methods.

Parameters:

Name Type Description Default
update Update

Update Message from Delegate.

required
Source code in hostess/station/station.py
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
def _handle_incoming_message(self, update: pro.Update):
    """
    Top-level dispatcher function for handling Updates from Delegates.
    Route wilco, state, info, and report components of Update -- if any --
    to the appropriate methods.

    Args:
        update: Update Message from Delegate.
    """
    for op in ("wilco", "state", "info", "report"):
        try:
            getattr(self, f"_handle_{op}")(update)
        except Exception as ex:
            self._log(
                "bad message handling",
                category="comms",
                direction="recv",
                exception=ex,
                op=op,
            )
_handle_info(message)

Handler for 'info' Updates. Unpack all the "notes" (serialized Python objects + metadata) bundled in an info Update and match each one against our Actors, executing each Actor that matches a note with that note.

If message is an exit report, also always log its info.

Parameters:

Name Type Description Default
message Update

Update Message from a Delegate.

required
Source code in hostess/station/station.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def _handle_info(self, message: pro.Update):
    """
    Handler for 'info' Updates. Unpack all the
    "notes" (serialized Python objects + metadata) bundled in an info
    Update and match each one against our Actors, executing each Actor
    that matches a note with that note.

    If `message` is an exit report, also always log its info.

    Args:
        message: Update Message from a Delegate.
    """
    notes = gmap(unpack_obj, message.info)
    if enum(message, "reason") == "exiting":
        self._log(
            "received exit report",
            delname=message.delegateid.name,
            reason=enum(message.state, "status"),
            exception=notes,
        )
        return
    for note in notes:
        self.match_and_execute(note, "info")
_handle_report(update)

Handler for "completion" Updates received from Delegates. Perform action tracking and cleanup, and execute any appropriate follow-on actions.

Parameters:

Name Type Description Default
update Update

'completion' Update from Delegate.

required
Source code in hostess/station/station.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
def _handle_report(self, update: pro.Update):
    """
    Handler for "completion" Updates received from Delegates. Perform
    action tracking and cleanup, and execute any appropriate follow-on
    actions.

    Args:
        update: 'completion' Update from Delegate.
    """
    if not update.HasField("completed"):
        return
    if len(update.completed.steps) > 0:
        raise NotImplementedError
    self._log(update, category="report")
    if not update.completed.HasField("action"):
        # TODO: an unusual case. maybe log
        return
    completed = unpack_message(update.completed)
    try:
        task = self.tasks[completed["instruction_id"]]
        task["status"] = completed["action"]["status"]
        task["start_time"] = completed["action"]["time"]["start"]
        task["end_time"] = completed["action"]["time"]["end"]
        task["duration"] = completed["action"]["time"]["duration"]
        if task["exception"] is not None:
            task["exception"] = exc_report(task["exception"])
    except KeyError:
        # TODO: an undesirable case, log
        pass
    obj = unpack_obj(update.completed.action.result)
    self.match_and_execute(obj, "completion")
_handle_state(update)

update the info dict for one of our Delegates in response to state and id elements of an Update Message.

Parameters:

Name Type Description Default
update Update

Update Message from Delegate.

required
Source code in hostess/station/station.py
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
def _handle_state(self, update: pro.Update):
    """
    update the info dict for one of our Delegates in response to state
    and id elements of an Update Message.

    Args:
        update: Update Message from Delegate.
    """
    try:
        delegate = self.get_delegate(update.delegateid.name)
    # TODO: how do we handle mystery-appearing delegates with dupe names
    except IndexError:
        delegate = blank_delegateinfo()
        self.delegates.append(delegate)
    update = unpack_message(update)
    if delegate["reported_status"] == "no_report":
        self._log(
            "first message from delegate",
            delname=delegate["name"],
            category="comms",
            direction="recv",
        )
    delegate |= {
        "last_seen": dt.datetime.fromisoformat(update["time"]),
        "wait_time": 0,
        "interface": update["state"]["interface"],
        "cdict": update["state"]["cdict"],
        "reported_status": update["state"]["status"],
        "pid": update["delegateid"]["pid"],
        "actors": update["state"].get("actors", []),
        "sensors": update["state"].get("sensors", []),
        "busy": update["state"]["busy"],
        "host": update["delegateid"]["host"],
        "running": update.get("running", []),
        "infocount": update["state"].get("infocount", {}),
        "init_params": update["state"]["init_params"],
    }
    for name, state in update["state"]["threads"].items():
        try:
            instruction_id = int(name.replace("Instruction_", ""))
            if self.tasks[instruction_id]["status"] not in (
                "success",
                "failure",
                "crash",
                "timeout",
            ):
                # don't override formally reported status
                self.tasks[instruction_id]["status"] = state.lower()
        except (ValueError, KeyError):  # main thread, sensors, etc.
            continue
_handle_wilco(update)

handle 'wilco' Update from a Delegate. Delegates send these on receipt of Instructions prior to actually performing them. Record time and whether or not the Delegate acknowledged that it understood and would follow the Instruction.

Parameters:

Name Type Description Default
update Update

'wilco' Update Message from Delegate.

required
Source code in hostess/station/station.py
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
def _handle_wilco(self, update: pro.Update):
    """
    handle 'wilco' Update from a Delegate. Delegates send these on receipt
    of Instructions prior to actually performing them. Record time and
    whether or not the Delegate acknowledged  that it understood and would
    follow the Instruction.

    Args:
        update: 'wilco' Update Message from Delegate.
    """
    # TODO: handle do not understand messages
    if not enum(update, "reason") in ("wilco", "bad_request"):
        return
    # TODO: handle config acks
    if update.instruction_id not in self.tasks.keys():
        return
    task = self.tasks[update.instruction_id]
    task["ack_time"] = update.time.ToDatetime(dt.timezone.utc)
    task["status"] = enum(update, "reason")
_launch_delegate_in_subprocess(context, kwargs) staticmethod

Launch a delegate in a child or daemonized process. Should only be called by Station.launch_delegate().

Parameters:

Name Type Description Default
context DelegateContext

how to launch the delegate. Should only be 'daemon' or 'subprocess'; if 'local', we should never have gotten here.

required
kwargs dict[str, Any]

kwargs for launch, passed to launch_delegate() via generic_python_endpoint()

required
Source code in hostess/station/station.py
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
@staticmethod
def _launch_delegate_in_subprocess(
    context: DelegateContext, kwargs: dict[str, Any]
):
    """
    Launch a delegate in a child or daemonized process. Should only be
    called by `Station.launch_delegate()`.

    Args:
        context: how to launch the delegate. Should only be 'daemon' or
            'subprocess'; if 'local', we should never have gotten here.
        kwargs: kwargs for launch, passed to `launch_delegate()` via
            `generic_python_endpoint()`
    """
    endpoint = generic_python_endpoint(
        "hostess.station.delegates",
        "launch_delegate",
        payload=kwargs,
        splat="**",
        return_result=True,
    )
    if context == "daemon":
        RunCommand(endpoint, _disown=True)()
    elif context == "subprocess":
        RunCommand(endpoint, _asynchronous=True)()
    else:
        raise ValueError(
            f"unsupported context {context}. Supported contexts for this "
            f"function are 'daemon' and 'subprocess'"
        )
_main_loop()

launch main loop for Station. This should only be called by Station._start(), which should only be called by Station.start().

Source code in hostess/station/station.py
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
def _main_loop(self):
    """
    launch main loop for Station. This should only be called by
    `Station._start()`, which should only be called by `Station.start()`.
    """
    while self.signals.get("main") is None:
        self._check_delegates()
        if self.tendtime() > self.poll * 8:
            crashed_threads = self.server.tend()
            if len(crashed_threads) > 0:
                self._log(crashed_threads, category="server_errors")
                self.threads |= self.server.threads
            self.inbox.prune(self.max_inbox_mb)
            self.reset_tend()
        time.sleep(self.poll)
_record_message(box, msg, pos)

helper function for Station._ackcheck(). write log entry for sent Message and record it as sent.

Parameters:

Name Type Description Default
box Mailbox

outbox for Delegate we sent Message in msg to

required
msg Msg

wrappper for Message we sent to delegate

required
pos int

index of msg within box

required
Source code in hostess/station/station.py
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
def _record_message(self, box: Mailbox, msg: Msg, pos: int):
    """
    helper function for `Station._ackcheck()`. write log entry for sent
    Message and record it as sent.

    Args:
        box: outbox for Delegate we sent Message in msg to
        msg: wrappper for Message we sent to delegate
        pos: index of msg within box
    """
    # make new Msg object w/updated timestamp.
    # this is weird-looking, but, by intent, Msg object cached
    # properties are essentially immutable wrt the underlying message
    update_instruction_timestamp(msg.message)
    box[pos] = Msg(msg.message)
    self._log(box[pos].message, category="comms", direction="send")
    box[pos].sent = True
_select_outgoing_message(delegate)

Helper function for Station._ackcheck(). When we receive an Update from one of our Delegates and we've got one or more Instructions prepared for them, we reply with one. This function checks if we have any Instructions for that Delegate, and, if so, picks which we should send.

Parameters:

Name Type Description Default
delegate str

name of Delegate we're talking to.

required

Returns:

Type Description
Optional[Mailbox]
  • outbox for delegate, or None if we don't have any Instructions for it
Optional[Instruction]
  • selected Instruction for Delegate, or None if we don't have any to send
Optional[int]
  • index of Instruction within outbox, or None if there isn't one
Source code in hostess/station/station.py
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
def _select_outgoing_message(
    self, delegate: str
) -> tuple[Optional[Mailbox], Optional[pro.Instruction], Optional[int]]:
    """
    Helper function for `Station._ackcheck()`. When we receive an Update
    from one of our Delegates and we've got one or more Instructions
    prepared for them, we reply with one. This function checks if we have
    any Instructions for that Delegate, and, if so, picks which we should
    send.

    Args:
        delegate: name of Delegate we're talking to.

    Returns:
        * outbox for `delegate`, or None if we don't have any
            Instructions for it
        * selected Instruction for Delegate, or None if we don't have any
            to send
        * index of Instruction within outbox, or None if there isn't one
    """
    # TODO, probably: send more than one Instruction when available.
    #  we might want a special control code for that.
    box = self.outboxes[delegate]
    # TODO, maybe: this search will be expensive if outboxes get really big
    #  -- might want some kind of hashing
    messages = tuple(
        filter(lambda pm: pm[1].sent is False, tuple(enumerate(box)))
    )
    if len(messages) == 0:
        return None, None, None  # this will trigger an empty ack message
    # ensure that we send shudown Instructions before config instructions,
    # and config Instructions before do Instructions
    # (the do instructions might need correct config to work!)
    priorities, pos, msg = ("kill", "stop", "configure"), None, None
    for priority in priorities:
        try:
            pos, msg = filtern(lambda pm: pm[1].type == priority, messages)
            break
        except StopIteration:
            continue
    if msg is None:
        pos, msg = messages[0]
    msg.sent = True
    return box, msg, pos
_set_logfile()

create standardized log file name.

Source code in hostess/station/station.py
93
94
95
96
97
98
def _set_logfile(self):
    """create standardized log file name."""
    self.logfile = Path(
        self.logdir,
        f"{self.init_time}_station_{self.host}_{self.port}.log",
    )
_shutdown(exception=None)

shut down the Station. This method should normally be called only by Station.shutdown().

Parameters:

Name Type Description Default
exception Optional[Exception]

unhandled Exception that caused shutdown, if any.

None
Source code in hostess/station/station.py
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
def _shutdown(self, exception: Optional[Exception] = None):
    """
    shut down the Station. This method should normally be called only by
    `Station.shutdown()`.

    Args:
        exception: unhandled Exception that caused shutdown, if any.
    """
    # TODO: add some internal logging here when nodes fail to
    #  shut down or respond in a timely fashion
    self.exception = exception
    # clear outbox etc.
    for k in self.outboxes.keys():
        self.outboxes[k] = Mailbox()
    self.actors, self.sensors = {}, {}
    for delegate in self.delegates:
        self.shutdown_delegate(delegate["name"], "stop")
    waiting, unwait = timeout_factory(timeout=30)
    # make sure every delegate is shut down, timing out at 30s --
    # this will also ensure we get all exit reports from newly-shutdown
    # delegates
    self._check_delegates()
    while len(self.running_delegates) > 0:
        try:
            waiting()
        except TimeoutError:
            self._log(
                "delegate_shutdown_timeout",
                category="system",
                running=[n["name"] for n in self.running_delegates],
            )
            break
        time.sleep(0.1)
        self._check_delegates()
    unwait()
    # ensure local delegate threads are totally shut down
    while len(self.unfinished_delegates) > 0:
        time.sleep(0.1)
        try:
            waiting()
        except TimeoutError:
            self._log(
                "local_delegate_thread_shutdown_timeout",
                category="system",
                running=[n["name"] for n in self.unfinished_delegates],
            )
            break
    # shut down the server etc.
    # TODO: this is a little messy because of the discrepancy in thread
    #  and signal names. maybe unify this somehow.
    for k in self.signals:
        self.signals[k] = 1
    self.server.kill()
    while any(t.running() for t in self.server.threads.values()):
        try:
            waiting()
        except TimeoutError:
            self._log("self_server_shutdown_timeout", category="system")
            break
        time.sleep(0.1)
    if self.__is_process_owner:
        self._log("shutdown complete, exiting process", category="system")
        sys.exit()
_situation_comm()

Construct a hostess comm describing this Station's overall situation. The hostess situation app works by constructing human-readable representations of these comms.

Source code in hostess/station/station.py
855
856
857
858
859
860
861
862
863
864
def _situation_comm(self) -> bytes:
    """
    Construct a hostess comm describing this Station's overall situation.
    The hostess `situation` app works by constructing human-readable
    representations of these comms.

    """
    from hostess.station.situation.response_organizers import situation_of

    return make_comm(pack_obj(situation_of(self)))
_update_task_record(msg)

helper function for _ackcheck(). update task record associated with a 'do' Instruction so that we know that we sent it and when.

Parameters:

Name Type Description Default
msg Msg

wrapper for 'do' Instruction we just sent to a Delegate.

required
Source code in hostess/station/station.py
593
594
595
596
597
598
599
600
601
602
603
def _update_task_record(self, msg: Msg):
    """
    helper function for _ackcheck(). update task record associated with a
    'do' Instruction so that we know that we sent it and when.

    Args:
        msg: wrapper for 'do' Instruction we just sent to a Delegate.
    """
    task_record = self.tasks[msg.id]
    task_record["sent_time"] = dt.datetime.now(tz=dt.timezone.utc)
    task_record["status"] = "sent"
get_delegate(name)

return delegate info structure for first Delegate named "name" (there should never be more than one unless someone has seriously messed with the Station).

Parameters:

Name Type Description Default
name str

name of Delegate

required

Returns:

Type Description
dict[str, Any]

info dict for Delegate. If Delegate is running locally, this dict will include a reference to the Delegate itself.

Source code in hostess/station/station.py
285
286
287
288
289
290
291
292
293
294
295
296
297
298
def get_delegate(self, name: str) -> dict[str, Any]:
    """
    return delegate info structure for first Delegate named "name"
    (there should never be more than one unless someone has seriously
    messed with the Station).

    Args:
        name: name of Delegate

    Returns:
        info `dict` for Delegate. If Delegate is running locally,
            this `dict` will include a reference to the Delegate itself.
    """
    return [n for n in self.delegates if n["name"] == name][0]
launch_delegate(name, elements=(), host='localhost', update_interval=0.25, context='daemon', **kwargs)

launch a Delegate, by default daemonized. prepare a metadata dict for it, and prepare ourselves to receive Messages from it.

Parameters:

Name Type Description Default
name str

name to assign to Delegate.

required
elements Sequence[tuple[str, str]]

sequence of (module_name, class_name) describing Actors and Sensors Delegate should construct and attach to itself at launch.

()
host str

hostname or ip on which Delegate should launch. remote launch is not yet implemented.

'localhost'
update_interval float

how frequently the Delegate should send unprompted 'heartbeat' Updates to this Station.

0.25
context DelegateContext

where to launch the Delegate in relation to this Station's interpreter process: "local" to run threaded in the same process; "subprocess" to run in a child process; "daemon" to run in a fully-detached process.

'daemon'
kwargs Union[int, float]

additional kwargs to pass to hostess.station.delegates.launch_delegate().

{}

Returns:

Type Description
Optional[Node]

None if context == "subprocess" or "daemon"; the launched Delegate if context == "local".

Source code in hostess/station/station.py
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
def launch_delegate(
    self,
    name: str,
    elements: Sequence[tuple[str, str]] = (),
    host: str = "localhost",
    update_interval: float = 0.25,
    context: DelegateContext = "daemon",
    **kwargs: Union[int, float],
) -> Optional[bases.Node]:
    """
    launch a Delegate, by default daemonized. prepare a metadata dict for
    it, and prepare ourselves to receive Messages from it.

    Args:
        name: name to assign to Delegate.
        elements: sequence of (module_name, class_name) describing Actors
            and Sensors Delegate should construct and attach to itself at
            launch.
        host: hostname or ip on which Delegate should launch. remote
            launch is not yet implemented.
        update_interval: how frequently the Delegate should send
            unprompted 'heartbeat' Updates to this Station.
        context: where to launch the Delegate in relation to this Station's
            interpreter process: "local" to run threaded in the same
            process; "subprocess" to run in a child process; "daemon" to
            run in a fully-detached process.
        kwargs: additional kwargs to pass to
            `hostess.station.delegates.launch_delegate()`.

    Returns:
        None if `context` == "subprocess" or "daemon"; the launched
            Delegate if `context` == "local".
    """
    if not self._Node__started:
        raise ValueError(
            "cannot launch delegates from an unstarted Station"
        )
    # TODO: option to specify remote host and run this using SSH (update
    #  relaunch_delegate as well when adding this feature)
    if host != "localhost":
        raise NotImplementedError
    if any(n["name"] == name for n in self.delegates):
        raise ValueError("can't launch a delegate with a duplicate name")
    kwargs = {
        "station_address": (self.host, self.port),
        "name": name,
        "elements": elements,
        "update_interval": update_interval,
        "loginfo": {
            # must pass logdir as a string -- delegate is not
            # initialized yet, so this is inserted directly into
            # generated source code
            "logdir": str(self.logdir),
            "init_time": self.init_time,
        },
    } | kwargs
    delegateinfo = blank_delegateinfo() | {
        "name": name,
        "inferred_status": "initializing",
        "update_interval": update_interval,
    }
    # kwargs for logging launch
    lkwargs = {"delname": name, "elements": elements, "category": "system"}
    self._log("init delegate launch", **lkwargs)
    try:
        if context == "local":
            # mostly for debugging / dev purposes
            from hostess.station.delegates import launch_delegate

            output = launch_delegate(is_local=True, **kwargs)
            delegateinfo["obj"] = output
        else:
            output = self._launch_delegate_in_subprocess(context, kwargs)
        self.delegates.append(delegateinfo)
        self._log("launched delegate", **lkwargs)
        return output
    except Exception as ex:
        self._log("delegate launch fail", **lkwargs, **exc_report(ex))
match_and_execute(obj, category)

Check to see if we have an Actor or Actors intended to handle obj by calling their match() methods with obj. If any of them say they can deal with obj, pass obj to their execute() methods.

In a typical Station application, obj will be something a Delegate packed into a "completion" or "info" Update. Its type will be entirely application-dependent.

Parameters:

Name Type Description Default
obj Any

object one of our Actors might be able to work with

required
category str

what category of Actor might be able to work with obj?

required
Source code in hostess/station/station.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def match_and_execute(self, obj: Any, category: str):
    """
    Check to see if we have an Actor or Actors intended to handle `obj`
    by calling their `match()` methods with `obj`. If any of them say they
    can deal with `obj`, pass `obj` to their `execute()` methods.

    In a typical Station application, `obj` will be something a Delegate
    packed into a "completion" or "info" Update. Its type will be entirely
    application-dependent.

    Args:
        obj: object one of our Actors might be able to work with
        category: what category of Actor might be able to work with `obj`?
    """
    try:
        actors = self.match(obj, category)
    except bases.NoActorForEvent:
        # TODO: _plausibly_ log this?
        return
    except (AttributeError, KeyError) as ex:
        self._log("match crash", **exc_report(ex), category=category)
        return
    self._log(
        obj,
        category=category,
        matches=[a.name for a in actors],
    )
    for actor in actors:
        try:
            actor.execute(self, obj)
        except NoMatchingDelegate:
            self._log(
                "no delegate for action", actor=actor, category=category
            )
        except Exception as ex:
            self._log(
                "execution failure",
                actor=actor,
                category=category,
                exception=ex,
            )
queue_task(delegate, instruction)

queue an Instruction for a Delegate and set up tracking for its state. this method is intended for "do" Instructions that contain Actions, not Instructions we do not want to track in the same way (like config). The default InstructionFromInfo Actor uses this method to queue the Instructions it makes.

Parameters:

Name Type Description Default
delegate str

name of Delegate for which to queue task

required
instruction Instruction

Instruction Message

required
Source code in hostess/station/station.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def queue_task(self, delegate: str, instruction: pro.Instruction):
    """
    queue an Instruction for a Delegate and set up tracking for its state.
    this method is intended for "do" Instructions that contain Actions, not
    Instructions we do not want to track in the same way (like config).
    The default InstructionFromInfo Actor uses this method to queue the
    Instructions it makes.

    Args:
        delegate: name of Delegate for which to queue task
        instruction: Instruction Message
    """
    if instruction.HasField("pipe"):
        raise NotImplementedError("multipart pipelines not implemented")
    if not enum(instruction, "type") == "do":
        raise ValueError("task instructions must have type 'do'")
    self.tasks[instruction.id] = {
        "init_time": instruction.time.ToDatetime(dt.timezone.utc),
        "sent_time": None,
        "ack_time": None,
        "status": "queued",
        "delegate": delegate,
        "name": instruction.action.name,
        "action_id": instruction.action.id,
        "description": dict(instruction.action.description),
        "exception": None,
    }
    self.outboxes[delegate].append(instruction)
relaunch_delegate(name)

Relaunch an existing Delegate with the same initialization settings, although not full runtime configuration. If it's still running, shut it down first.

Parameters:

Name Type Description Default
name str

name of Delegate to relaunch.

required
Source code in hostess/station/station.py
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
def relaunch_delegate(self, name: str):
    """
    Relaunch an existing Delegate with the same initialization settings,
    although not full runtime configuration. If it's still running, shut
    it down first.

    Args:
        name: name of Delegate to relaunch.
    """
    delegate = self.get_delegate(name)
    if delegate["inferred_status"] == "missing":
        pass  # TODO: os level kill
    elif delegate["inferred_status"] not in ["shutdown", "crashed"]:
        self.shutdown_delegate(name, "stop")
        waiting, unwait = timeout_factory(timeout=20)
        self._check_delegates()
        while self.get_delegate(name)["inferred_status"] not in (
            "shutdown",
            "crashed",
        ):
            try:
                waiting()
            except TimeoutError:
                break
            time.sleep(0.1)
            self._check_delegates()
        unwait()
    elements = []
    elements_dict = dict(delegate["actors"]) | dict(delegate["sensors"])
    for k in elements_dict.keys():
        cls = elements_dict[k].split(".")[-1]
        mod = elements_dict[k].removesuffix("." + cls)
        elements = elements + [(mod, cls)]
    elements = tuple(elements)
    host = "localhost"
    # TODO: add remote host detection/relaunch capability
    if delegate["init_params"]["_is_process_owner"]:
        context = "daemon"
        # TODO: how do you know if it's "daemon" vs "subprocess"?
    else:
        context = "local"
    self.delegates.remove(delegate)
    self.relaunched.append(delegate)
    self.launch_delegate(
        name,
        elements,
        host=host,
        context=context,
        **delegate["init_params"],
    )
save_port_to_shared_memory(address=None)

write this Station's port number to a shared memory address, allowing other applications to query or monitor it. Specifically, calling this function will allow the hostess situation app to automatically find this Station.

Parameters:

Name Type Description Default
address Optional[str]

shared memory address to write port to. Exactly what this means depends on the operating environment. In CPython on Linux, it denotes a filename in /dev/shm. if not specified, defaults to the name of this Station.

None
Source code in hostess/station/station.py
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
def save_port_to_shared_memory(self, address: Optional[str] = None):
    """
    write this Station's port number to a shared memory address, allowing
    other applications to query or monitor it. Specifically, calling this
    function will allow the hostess `situation` app to automatically find
    this Station.

    Args:
        address: shared memory address to write port to. Exactly what this
            means depends on the operating environment. In CPython on
            Linux, it denotes a filename in /dev/shm. if not specified,
            defaults to the name of this Station.
    """
    from dustgoggles.codex.implements import Sticky
    from dustgoggles.codex.memutilz import (
        deactivate_shared_memory_resource_tracker,
    )

    deactivate_shared_memory_resource_tracker()
    address = self.name if address is None else address
    Sticky.note(
        self.port, address=f"{address}-port-report", cleanup_on_exit=True
    )
set_delegate_config(delegate, config)

Construct a 'configure' Instruction for a Delegate that instructs it to merge a config dict into its existing config dict; put that Instruction in the outbox for that Delegate.

Parameters:

Name Type Description Default
delegate str

name of Delegate to configure

required
config Mapping

configuration to add to the Delegate's config dict.

required
Source code in hostess/station/station.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def set_delegate_config(self, delegate: str, config: Mapping):
    """
    Construct a 'configure' Instruction for a Delegate that instructs it
        to merge a config dict into its existing config dict; put that
        Instruction in the outbox for that Delegate.

    Args:
        delegate: name of Delegate to configure
        config: configuration to add to the Delegate's config dict.
    """
    config = pro.ConfigParam(
        paramtype="config_dict", value=pack_obj(config)
    )
    self.outboxes[delegate].append(
        make_instruction("configure", config=config)
    )
set_delegate_properties(delegate, **propvals)

Construct a 'configure' Instruction for a Delegate that instructs it to assign specific values to named properties of itself; put that Instruction in the outbox for that Delegate.

Parameters:

Name Type Description Default
delegate str

name of delegate to configure.

required
propvals Any

argument names correspond to property names of target Delegate; argument values will be serialized as PythonObject Messages and then bundled into ConfigParam Messages.

{}
Source code in hostess/station/station.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def set_delegate_properties(self, delegate: str, **propvals: Any):
    """
    Construct a 'configure' Instruction for a Delegate that instructs it
        to assign specific values to named properties of itself; put that
        Instruction in the outbox for that Delegate.

    Args:
        delegate: name of delegate to configure.
        propvals: argument names correspond to property names of target
            Delegate; argument values will be serialized as PythonObject
            Messages and then bundled into ConfigParam Messages.
    """
    # TODO: update delegate info record if relevant
    if len(propvals) == 0:
        raise TypeError("can't send a no-op config instruction")
    config = [
        pro.ConfigParam(paramtype="config_property", value=pack_obj(v, k))
        for k, v in propvals.items()
    ]
    self.outboxes[delegate].append(
        make_instruction("configure", config=config)
    )
shutdown_delegate(delegate, how='stop')

make a shutdown Instruction and queue it in the outbox for the specified Delegate.

Parameters:

Name Type Description Default
delegate str

name of Delegate we would like to shut down

required
how Literal['stop', 'kill']

"stop" if we would like the Delegate to shut down gracefully; "kill" if we would like the Delegate to stop immediately no matter what.

'stop'
Source code in hostess/station/station.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def shutdown_delegate(
    self, delegate: str, how: Literal["stop", "kill"] = "stop"
):
    """
    make a shutdown Instruction and queue it in the outbox for the
    specified Delegate.

    Args:
        delegate: name of Delegate we would like to shut down
        how: "stop" if we would like the Delegate to shut down gracefully;
            "kill" if we would like the Delegate to stop immediately no
            matter what.
    """
    self.outboxes[delegate].append(make_instruction(how))

blank_delegateinfo()

utility function for Station. creates an empty Delegate metadata dict.

Returns: "blank" Delegate metadata dict suitable for use as an element of Station.delegates.

Source code in hostess/station/station.py
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
def blank_delegateinfo() -> dict[
    str, Union[None, str, dt.datetime, list, int, dict]
]:
    """
    utility function for Station. creates an empty Delegate metadata dict.

     Returns:
        "blank" Delegate metadata dict suitable for use as an element of
            Station.delegates.
    """
    return {
        "last_seen": None,
        "reported_status": "no_report",
        "inferred_status": "initializing",
        "init_time": dt.datetime.now(dt.timezone.utc),
        "wait_time": 0,
        "running": [],
        "interface": {},
        "actors": {},
    }

get_port_from_shared_memory(memory_address='station')

fetch a named Station's port number from a shared memory address. Used by the hostess situation app; can also be used by other 'plugins' or for ad-hoc inspection of a Station.

Parameters:

Name Type Description Default
memory_address str

shared memory address a Station's port number might be stored in. Exactly what this means depends on the environment. In CPython on Linux, it denotes a filename in /dev/shm.

'station'

Returns:

Type Description
int

port number of Station that saved its port number to memory_address.

Source code in hostess/station/station.py
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
def get_port_from_shared_memory(memory_address: str = "station") -> int:
    """
    fetch a named Station's port number from a shared memory address. Used
    by the hostess `situation` app; can also be used by other 'plugins' or
    for ad-hoc inspection of a Station.

    Args:
        memory_address: shared memory address a Station's port number might be
            stored in. Exactly what this means depends on the environment. In
            CPython on Linux, it denotes a filename in /dev/shm.

    Returns:
        port number of Station that saved its port number to `memory_address`.
    """
    from dustgoggles.codex.implements import Sticky
    from dustgoggles.codex.memutilz import (
        deactivate_shared_memory_resource_tracker,
    )

    deactivate_shared_memory_resource_tracker()
    if (port := Sticky(f"{memory_address}-port-report").read()) is None:
        raise FileNotFoundError("no port at address")
    return port

station.talkie

TCPTalk

lightweight multithreaded tcp server.

Source code in hostess/station/talkie.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
class TCPTalk:
    """lightweight multithreaded tcp server."""

    def __init__(
        self,
        host: str,
        port: int,
        n_threads: int = 4,
        poll: float = 0.01,
        decoder: Optional[Callable] = read_comm,
        ackcheck: Optional[Callable] = None,
        executor: Optional[ThreadPoolExecutor] = None,
        lock: Optional[threading.Lock] = None,
        chunksize: int = 16384,
        delay: float = 0.01,
        timeout: int = 10,
    ):
        """
        Note: `TCPTalk` immediately starts running when initialized.

        Args:
            host: hostname for server's socket, either an ip address or a
                resolvable name like "localhost"
            port: port number for server's socket
            n_threads: number of i/o threads. server will always launch
                n_threads + 1 threads; the +1 is its selector thread.
            poll: poll/spool delay for threads
            decoder: optional callable used to decode received messages
            ackcheck: callable for inserting message responses -- this can be
                used to attach a Station's responder rules to the server
            executor: optional ThreadPoolExecutor, if the server should run in
                existing thread pool
            lock: optional lock, if the tcp server should be subject to an
                external lockout
            chunksize: chunk size for reading responses from socket
            delay: time to wait before checking socket again after failed read
            timeout: timeout on socket
        """
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        atexit.register(self.sock.close)
        self.status = "initializing"
        self.timeout, self.delay, self.chunksize = timeout, delay, chunksize
        self.poll, self.decoder, self.ackcheck = poll, decoder, ackcheck
        self.sel = selectors.DefaultSelector()
        try:
            self.sock.bind((host, port))
            self.sock.listen()
            self.sock.setblocking(False)
            if executor is None:
                executor = ThreadPoolExecutor(n_threads + 1)
            self.exec, self._lock = executor, lock
            self.threads, self.events = {}, []
            self.data, self.peers = Mailbox(), {}
            self.queues = {i: [] for i in range(n_threads)}
            self.signals = {i: None for i in range(n_threads)} | {
                "select": None
            }
            self.sig = signal_factory(self.signals)
            self.threads["select"] = executor.submit(self.launch_selector)
            for ix in range(n_threads):
                self.threads[ix] = executor.submit(self.launch_io, ix)
            self.status = "running"
        except Exception as _ex:
            self.sock.close()
            self.status = "crashed"
            self.kill()
            raise

    def kill(self, signal: int = 0):
        """
        immediately shut down, closing the server's socket and attempting to
        terminate all its threads.

        Args:
            signal: termination signal to send to threads. changing this
                number does nothing special by default and is intended for
                subclasses or application-specific purposes.
        """
        self.sock.close()
        if "sig" in dir(self):
            # won't be present if we encountered an error on init
            self.sig("all", signal)
        self.status = "terminated"

    def tend(self) -> Optional[list]:
        """
        check on all threads we believe to be running. If any of them aren't,
        relaunch them. Never called automatically.

        Returns:
            None, if server is still initializing. otherwise, list of
                Exceptions raised by crashed threads (empty if none crashed).
        """
        if self.status == "initializing":
            return
        threads = tuple(self.threads.items())
        crashed_threads = []
        for k, v in threads:
            # will be dict if it is a crashed thread running in trywrap
            if not isinstance(v, dict) and (v._state == "RUNNING"):
                continue
            self.sig(k, 0)
            time.sleep(self.poll * 2)
            self.sig(k, None)
            thread = self.threads.pop(k, None)
            if isinstance(thread, dict):
                exception = thread["exception"]
            elif thread is not None:
                exception = thread.exception()
            else:
                exception = None
            crashed_threads.append(exception)
            if k == "select":
                self.threads["select"] = self.exec.submit(self.launch_selector)
            else:
                self.threads[k] = self.exec.submit(self.launch_io, k)
        self.status = "running"
        return crashed_threads

    def _get_locked(self) -> bool:
        """getter for self.locked"""
        if self._lock is None:
            return False
        return self._lock.locked()

    def _set_locked(self, _val: bool):
        """
        intentionally nonfunctional setter for self.locked. Always raises
        AttributeError.
        """
        raise AttributeError("server is not directly lockable")

    locked = property(_get_locked, _set_locked)
    """
    is the server locked, preventing it from communicating with peers?
    note that TCPTalk never locks itself. Its optional lockout behavior is
    intended to be handled by some sort of lock object shared with a handler 
    application, for cases in which something needs locks for synchronization.
    """

    def _handle_callback(
        self,
        callback: Callable,
        peername: Optional[str],
        peersock: socket.socket,
    ) -> tuple[Any, str, Optional[tuple[str, int]], str]:
        """
        inner callback-handler tree for i/o threads. should only ever be
        called from an io thread loop (`TCPTalk.launch_io()`).

        Args:
            callback: one of self._read, self._ack, or self._accept. attached
                to peersock by self.sel, queued by a call to self.sel.register
                in an io or selector thread.
            peername: name of peer, if known (tuple of (ip, fileno)).
            peersock: open socket to peer

        Returns:
            stream: decoded or raw bytes read from socket, if any
            event: description of event, primarily for logging
            peername: existing or newly-discovered peername (ip, fileno), or
                None if we still don't know it
            status: code for event, primarily for control flow
        """
        if callback.__name__ == "_read":
            self.peers[peername] = True
            try:
                # callback is self._read
                stream, event, status = callback(peersock)
            except KeyError:
                # attempting to unregister an already-unregistered conn
                return None, "guard", peername, "already unregistered"
        elif callback.__name__ == "_ack":
            # callback is self._ack (usually a partially-evaluated version of
            # it constructed in self._read())
            stream, event, status = callback(peersock)
            # remove peer from peering-lock dict
            self.peers.pop(peername, None)
        elif callback.__name__ == "_accept":
            # callback is self._accept
            stream, event, peername, status = callback(peersock)
        else:
            # who attached some weirdo function?
            stream, event, status = None, "skipped", "invalid callback"
        return stream, event, peername, status

    def queued_descriptors(self) -> set[int]:
        """
        Returns:
             set of all file descriptors for currently-queued sockets.
                  Primarily for selector thread loop but can also be used
                  diagnostically.
        """
        return {s[0].fd for s in chain.from_iterable(self.queues.values())}

    # TODO: should this be running in @trywrap?
    def launch_selector(self):
        """launch the server's selector thread."""
        id_, cycler = 0, cycle(self.queues.keys())
        try:
            self.sel.register(self.sock, selectors.EVENT_READ, self._accept)
        except KeyError:  # will occur on relaunch
            pass
        while self.signals.get("select") is None:
            try:
                events = self.sel.select(1)
            except TimeoutError:
                continue
            queued = self.queued_descriptors()
            for key, _mask in events:
                # try to ensure we don't have a million pending events
                if key.fd in queued:
                    continue
                target = next(cycler)
                self.queues[target].append((key, id_))
                id_ += 1
            time.sleep(self.poll)

    # TODO: should this be running in @trywrap?
    def launch_io(self, name: Union[str, int]):
        """
        launch a read thread in this server's executor.
        must be run in a thread or it will block and be useless.

        Args:
            name: identifier for thread
        """
        while self.signals.get(name) is None:
            time.sleep(self.poll)
            try:
                key, id_ = self.queues[name].pop()
            except IndexError:
                continue
            # noinspection PyProtectedMember
            peername, peerage = self._check_peerage(key)
            callback, peersock = key.data, key.fileobj  # explanatory variables
            if (peerage is True) and (callback.__name__ != "_ack"):
                # connection / read already handled
                continue
            if self.locked and callback.__name__ != "_ack":
                continue
            try:
                stream, event, peername, status = self._handle_callback(
                    callback, peername, peersock
                )
                # task was already handled (or unhandleable)
                if event == "guard":
                    continue
            except OSError as err:
                stream, event, status = None, "oserror", str(err)
            event = {
                "event": event,
                "peer": peername,
                "status": status,
                "time": logstamp(),
                "thread": name,
                "id": id_,
                "callback": callback.__name__,
            }
            self.events.append(event)
            if (stream is None) or (len(stream) == 0):
                continue
            if not isinstance(stream["body"], bytes):  # control codes, etc.
                self.data.append(event | {"content": stream})

    def _accept(
        self, sock: socket.socket
    ) -> tuple[None, str, Optional[tuple[str, int]], str]:
        """
        accept-connection callback for i/o threads.

        Args:
            sock: TCP socket we've received a connection request on. in normal
                operation, this will always be self.sock.

        Returns:
            stream: always None. (present for signature compatibility)
            event: "blocking" if accepting connection would block; "accept"
                on successful accept
            peername: name of peer on successful accept (tuple of ip address,
                fileno). None if blocking (because peer name is not known).
            status: "blocking on self" if blocking, "ok" on successful accept
        """
        try:
            conn, addr = sock.accept()
        except BlockingIOError:
            return None, "blocking", None, "blocking on self"
        conn.setblocking(False)
        # tell the selector the socket is ready for a `read` callback
        self.sel.register(conn, selectors.EVENT_READ, self._read)
        return None, "accept", conn.getpeername(), "ok"

    def _trydecode(self, stream: bytes) -> tuple[Any, str, str]:
        """
        inner stream-decode handler function for self.read().

        Args:
            stream: bytes received from peer.

        Returns:
            stream: output of self.decoder, if successful; undecoded stream
                if not.
            event: "decoded {nbytes}" on successful decode; "read {nbytes}"
                on failed decode
            status: "ok" if successful; description of decode error if not
        """
        nbytes = len(stream)
        try:
            stream = self.decoder(stream)
            event, status = f"decoded {nbytes}", "ok"
        except KeyboardInterrupt:
            raise
        except Exception as ex:
            event, status = f"read {nbytes}", f"decode error;{type(ex)};{ex}"
        return stream, event, status

    def _tryread(self, peersock: socket.socket) -> tuple[Optional[bytes], str]:
        """
        inner read-individual-chunk-from-socket handler for `read`

        Args:
            peersock: open socket to read chunk from

        Returns:
            data: bytes read from peersock, if any
            status: "streaming" if receive operation was successful;
                "unavailable" if peer is temporarily unavailable

        Raises:
            OSError: for any OSError other than temporary unavailability
        """
        status = "streaming"
        try:
            data = peersock.recv(self.chunksize)
        except OSError as ose:
            if "temporarily" not in str(ose):
                raise
            return None, "unavailable"
        return data, status

    def _read(self, peersock: socket.socket) -> tuple[Any, str, str]:
        """
        read-from-socket callback for i/o threads.

        Args:
            peersock: open socket to peer

        Returns:
            stream: output of self.decoder on successful read and decode;
                bytes read from peersock on successful read and failed decode;
                None on failed read
            event: description of read/decode length or read/decode error
            status: "ok" if everything went well, error code if not
        """
        event, status, stream = None, "unk", b""
        try:
            self.sel.unregister(peersock)
            waiting, unwait = timeout_factory(timeout=self.timeout)
            stream, length = peersock.recv(self.chunksize), None
            length = read_header(stream)["length"]
            while waiting() >= 0:  # syntactic handwaving. breaks w/exception.
                if (length is not None) and (len(stream) >= length):
                    break
                data, status = self._tryread(peersock)
                if status == "unavailable":
                    time.sleep(self.delay)
                    continue
                stream += data
                unwait()
            # tell the selector the socket is ready for an `ack` callback
            stream, event, status = self._trydecode(stream)
            self.sel.register(
                peersock, selectors.EVENT_WRITE, curry(self._ack)(stream)
            )
        except BrokenPipeError:
            self.peers.pop(peersock.getpeername(), None)
            status = "broken pipe"
        except TimeoutError:
            self.peers.pop(peersock.getpeername(), None)
            status = "timed out"
        except KeyError as ke:
            if "is not registered" in str(ke):
                status = f"{peersock} already unregistered"
            else:
                raise
        except BlockingIOError:
            self.peers.pop(peersock.getpeername(), None)
            status = f"cleared blocking socket {peersock.getpeername()}"
        except (IOError, OSError) as err:
            status = f"{type(err)}: {str(err)}"
        event = f"read {len(stream)}" if event is None else event
        return stream, event, status

    def _ack(
        self, data: Any, peersock: socket.socket
    ) -> tuple[None, str, str]:
        """
        acknowledgement callback for read threads. calls self.ackcheck if we
        have one; if we don't, just sends an empty comm to the peer.

        Args:
            data: decoded object or raw bytes read from peersock.
                this is typically not passed directly but rather curried into
                a copy of _ack() constructed in _read().
            peersock: open socket to peer.

        Returns:
            stream: always None. for signature compatibility
            event: "ack attempt" if attempt failed; for successful ack, if
                we have an ackcheck function, whatever status code it returned,
                or "sent_ack" if we don't have an ackcheck
            status: empty string if we have no response; "ok" on successful
                ack; description of error on failed ack
        """
        try:
            self.sel.unregister(peersock)
            response, event = make_comm(b""), "sent_ack"
            if self.ackcheck is not None:
                response, event = self.ackcheck(peersock, data)
            if response is None:
                return None, event, ""
            waiting, unwait = timeout_factory(timeout=self.timeout)
            while len(response) > 0:
                try:
                    # attempt to send chunk of designated size...
                    payload = response[: self.chunksize]
                    sent = peersock.send(payload)
                    unwait()
                    # ...but only truncate by amount we successfullly sent
                    response = response[sent:]
                except BrokenPipeError:
                    # don't need to release peerlock here because we always
                    # release it after _ack
                    return None, "ack attempt", "broken pipe"
                except OSError:
                    waiting()
                    time.sleep(self.delay)
            time.sleep(0.1)
            return None, event, "ok"
        except (KeyError, ValueError) as kve:
            if "is not registered" in str(kve):
                # someone else got here firs
                return None, "ack attempt", f"{kve}"
            raise
        except TimeoutError as te:
            return None, "ack attempt", f"{te}"

    def _check_peerage(
        self, key: Union[selectors.SelectorKey, socket.socket]
    ) -> tuple[Optional[tuple[str, int]], bool]:
        """
        check to see if another thread is already handling a peer. essentially
        a synchronization lock function.

        Args:
            key: socket to peer, or selector key wrapping socket

        Returns:
            peername: tuple of (ip, fileno), or None if connection on socket is
                not yet accepted or socket is already closed
            peered: True if another thread is handling the peer, False if not
                (including if the socket is pending/unreadable)
        """
        try:
            if hasattr(key, "fileobj"):
                # noinspection PyUnresolvedReferences
                peer = key.fileobj.getpeername()
            else:
                peer = key.getpeername()
            return peer, peer in self.peers
        except OSError:
            # most likely indicates that socket is already closed, or that
            # this is an incoming, not-yet-accepted connection
            return None, False
locked = property(_get_locked, _set_locked) class-attribute instance-attribute

is the server locked, preventing it from communicating with peers? note that TCPTalk never locks itself. Its optional lockout behavior is intended to be handled by some sort of lock object shared with a handler application, for cases in which something needs locks for synchronization.

__init__(host, port, n_threads=4, poll=0.01, decoder=read_comm, ackcheck=None, executor=None, lock=None, chunksize=16384, delay=0.01, timeout=10)

Note: TCPTalk immediately starts running when initialized.

Parameters:

Name Type Description Default
host str

hostname for server's socket, either an ip address or a resolvable name like "localhost"

required
port int

port number for server's socket

required
n_threads int

number of i/o threads. server will always launch n_threads + 1 threads; the +1 is its selector thread.

4
poll float

poll/spool delay for threads

0.01
decoder Optional[Callable]

optional callable used to decode received messages

read_comm
ackcheck Optional[Callable]

callable for inserting message responses -- this can be used to attach a Station's responder rules to the server

None
executor Optional[ThreadPoolExecutor]

optional ThreadPoolExecutor, if the server should run in existing thread pool

None
lock Optional[Lock]

optional lock, if the tcp server should be subject to an external lockout

None
chunksize int

chunk size for reading responses from socket

16384
delay float

time to wait before checking socket again after failed read

0.01
timeout int

timeout on socket

10
Source code in hostess/station/talkie.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def __init__(
    self,
    host: str,
    port: int,
    n_threads: int = 4,
    poll: float = 0.01,
    decoder: Optional[Callable] = read_comm,
    ackcheck: Optional[Callable] = None,
    executor: Optional[ThreadPoolExecutor] = None,
    lock: Optional[threading.Lock] = None,
    chunksize: int = 16384,
    delay: float = 0.01,
    timeout: int = 10,
):
    """
    Note: `TCPTalk` immediately starts running when initialized.

    Args:
        host: hostname for server's socket, either an ip address or a
            resolvable name like "localhost"
        port: port number for server's socket
        n_threads: number of i/o threads. server will always launch
            n_threads + 1 threads; the +1 is its selector thread.
        poll: poll/spool delay for threads
        decoder: optional callable used to decode received messages
        ackcheck: callable for inserting message responses -- this can be
            used to attach a Station's responder rules to the server
        executor: optional ThreadPoolExecutor, if the server should run in
            existing thread pool
        lock: optional lock, if the tcp server should be subject to an
            external lockout
        chunksize: chunk size for reading responses from socket
        delay: time to wait before checking socket again after failed read
        timeout: timeout on socket
    """
    self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    atexit.register(self.sock.close)
    self.status = "initializing"
    self.timeout, self.delay, self.chunksize = timeout, delay, chunksize
    self.poll, self.decoder, self.ackcheck = poll, decoder, ackcheck
    self.sel = selectors.DefaultSelector()
    try:
        self.sock.bind((host, port))
        self.sock.listen()
        self.sock.setblocking(False)
        if executor is None:
            executor = ThreadPoolExecutor(n_threads + 1)
        self.exec, self._lock = executor, lock
        self.threads, self.events = {}, []
        self.data, self.peers = Mailbox(), {}
        self.queues = {i: [] for i in range(n_threads)}
        self.signals = {i: None for i in range(n_threads)} | {
            "select": None
        }
        self.sig = signal_factory(self.signals)
        self.threads["select"] = executor.submit(self.launch_selector)
        for ix in range(n_threads):
            self.threads[ix] = executor.submit(self.launch_io, ix)
        self.status = "running"
    except Exception as _ex:
        self.sock.close()
        self.status = "crashed"
        self.kill()
        raise
_accept(sock)

accept-connection callback for i/o threads.

Parameters:

Name Type Description Default
sock socket

TCP socket we've received a connection request on. in normal operation, this will always be self.sock.

required

Returns:

Name Type Description
stream None

always None. (present for signature compatibility)

event str

"blocking" if accepting connection would block; "accept" on successful accept

peername Optional[tuple[str, int]]

name of peer on successful accept (tuple of ip address, fileno). None if blocking (because peer name is not known).

status str

"blocking on self" if blocking, "ok" on successful accept

Source code in hostess/station/talkie.py
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
def _accept(
    self, sock: socket.socket
) -> tuple[None, str, Optional[tuple[str, int]], str]:
    """
    accept-connection callback for i/o threads.

    Args:
        sock: TCP socket we've received a connection request on. in normal
            operation, this will always be self.sock.

    Returns:
        stream: always None. (present for signature compatibility)
        event: "blocking" if accepting connection would block; "accept"
            on successful accept
        peername: name of peer on successful accept (tuple of ip address,
            fileno). None if blocking (because peer name is not known).
        status: "blocking on self" if blocking, "ok" on successful accept
    """
    try:
        conn, addr = sock.accept()
    except BlockingIOError:
        return None, "blocking", None, "blocking on self"
    conn.setblocking(False)
    # tell the selector the socket is ready for a `read` callback
    self.sel.register(conn, selectors.EVENT_READ, self._read)
    return None, "accept", conn.getpeername(), "ok"
_ack(data, peersock)

acknowledgement callback for read threads. calls self.ackcheck if we have one; if we don't, just sends an empty comm to the peer.

Parameters:

Name Type Description Default
data Any

decoded object or raw bytes read from peersock. this is typically not passed directly but rather curried into a copy of _ack() constructed in _read().

required
peersock socket

open socket to peer.

required

Returns:

Name Type Description
stream None

always None. for signature compatibility

event str

"ack attempt" if attempt failed; for successful ack, if we have an ackcheck function, whatever status code it returned, or "sent_ack" if we don't have an ackcheck

status str

empty string if we have no response; "ok" on successful ack; description of error on failed ack

Source code in hostess/station/talkie.py
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
def _ack(
    self, data: Any, peersock: socket.socket
) -> tuple[None, str, str]:
    """
    acknowledgement callback for read threads. calls self.ackcheck if we
    have one; if we don't, just sends an empty comm to the peer.

    Args:
        data: decoded object or raw bytes read from peersock.
            this is typically not passed directly but rather curried into
            a copy of _ack() constructed in _read().
        peersock: open socket to peer.

    Returns:
        stream: always None. for signature compatibility
        event: "ack attempt" if attempt failed; for successful ack, if
            we have an ackcheck function, whatever status code it returned,
            or "sent_ack" if we don't have an ackcheck
        status: empty string if we have no response; "ok" on successful
            ack; description of error on failed ack
    """
    try:
        self.sel.unregister(peersock)
        response, event = make_comm(b""), "sent_ack"
        if self.ackcheck is not None:
            response, event = self.ackcheck(peersock, data)
        if response is None:
            return None, event, ""
        waiting, unwait = timeout_factory(timeout=self.timeout)
        while len(response) > 0:
            try:
                # attempt to send chunk of designated size...
                payload = response[: self.chunksize]
                sent = peersock.send(payload)
                unwait()
                # ...but only truncate by amount we successfullly sent
                response = response[sent:]
            except BrokenPipeError:
                # don't need to release peerlock here because we always
                # release it after _ack
                return None, "ack attempt", "broken pipe"
            except OSError:
                waiting()
                time.sleep(self.delay)
        time.sleep(0.1)
        return None, event, "ok"
    except (KeyError, ValueError) as kve:
        if "is not registered" in str(kve):
            # someone else got here firs
            return None, "ack attempt", f"{kve}"
        raise
    except TimeoutError as te:
        return None, "ack attempt", f"{te}"
_check_peerage(key)

check to see if another thread is already handling a peer. essentially a synchronization lock function.

Parameters:

Name Type Description Default
key Union[SelectorKey, socket]

socket to peer, or selector key wrapping socket

required

Returns:

Name Type Description
peername Optional[tuple[str, int]]

tuple of (ip, fileno), or None if connection on socket is not yet accepted or socket is already closed

peered bool

True if another thread is handling the peer, False if not (including if the socket is pending/unreadable)

Source code in hostess/station/talkie.py
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
def _check_peerage(
    self, key: Union[selectors.SelectorKey, socket.socket]
) -> tuple[Optional[tuple[str, int]], bool]:
    """
    check to see if another thread is already handling a peer. essentially
    a synchronization lock function.

    Args:
        key: socket to peer, or selector key wrapping socket

    Returns:
        peername: tuple of (ip, fileno), or None if connection on socket is
            not yet accepted or socket is already closed
        peered: True if another thread is handling the peer, False if not
            (including if the socket is pending/unreadable)
    """
    try:
        if hasattr(key, "fileobj"):
            # noinspection PyUnresolvedReferences
            peer = key.fileobj.getpeername()
        else:
            peer = key.getpeername()
        return peer, peer in self.peers
    except OSError:
        # most likely indicates that socket is already closed, or that
        # this is an incoming, not-yet-accepted connection
        return None, False
_get_locked()

getter for self.locked

Source code in hostess/station/talkie.py
142
143
144
145
146
def _get_locked(self) -> bool:
    """getter for self.locked"""
    if self._lock is None:
        return False
    return self._lock.locked()
_handle_callback(callback, peername, peersock)

inner callback-handler tree for i/o threads. should only ever be called from an io thread loop (TCPTalk.launch_io()).

Parameters:

Name Type Description Default
callback Callable

one of self._read, self._ack, or self._accept. attached to peersock by self.sel, queued by a call to self.sel.register in an io or selector thread.

required
peername Optional[str]

name of peer, if known (tuple of (ip, fileno)).

required
peersock socket

open socket to peer

required

Returns:

Name Type Description
stream Any

decoded or raw bytes read from socket, if any

event str

description of event, primarily for logging

peername Optional[tuple[str, int]]

existing or newly-discovered peername (ip, fileno), or None if we still don't know it

status str

code for event, primarily for control flow

Source code in hostess/station/talkie.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
def _handle_callback(
    self,
    callback: Callable,
    peername: Optional[str],
    peersock: socket.socket,
) -> tuple[Any, str, Optional[tuple[str, int]], str]:
    """
    inner callback-handler tree for i/o threads. should only ever be
    called from an io thread loop (`TCPTalk.launch_io()`).

    Args:
        callback: one of self._read, self._ack, or self._accept. attached
            to peersock by self.sel, queued by a call to self.sel.register
            in an io or selector thread.
        peername: name of peer, if known (tuple of (ip, fileno)).
        peersock: open socket to peer

    Returns:
        stream: decoded or raw bytes read from socket, if any
        event: description of event, primarily for logging
        peername: existing or newly-discovered peername (ip, fileno), or
            None if we still don't know it
        status: code for event, primarily for control flow
    """
    if callback.__name__ == "_read":
        self.peers[peername] = True
        try:
            # callback is self._read
            stream, event, status = callback(peersock)
        except KeyError:
            # attempting to unregister an already-unregistered conn
            return None, "guard", peername, "already unregistered"
    elif callback.__name__ == "_ack":
        # callback is self._ack (usually a partially-evaluated version of
        # it constructed in self._read())
        stream, event, status = callback(peersock)
        # remove peer from peering-lock dict
        self.peers.pop(peername, None)
    elif callback.__name__ == "_accept":
        # callback is self._accept
        stream, event, peername, status = callback(peersock)
    else:
        # who attached some weirdo function?
        stream, event, status = None, "skipped", "invalid callback"
    return stream, event, peername, status
_read(peersock)

read-from-socket callback for i/o threads.

Parameters:

Name Type Description Default
peersock socket

open socket to peer

required

Returns:

Name Type Description
stream Any

output of self.decoder on successful read and decode; bytes read from peersock on successful read and failed decode; None on failed read

event str

description of read/decode length or read/decode error

status str

"ok" if everything went well, error code if not

Source code in hostess/station/talkie.py
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def _read(self, peersock: socket.socket) -> tuple[Any, str, str]:
    """
    read-from-socket callback for i/o threads.

    Args:
        peersock: open socket to peer

    Returns:
        stream: output of self.decoder on successful read and decode;
            bytes read from peersock on successful read and failed decode;
            None on failed read
        event: description of read/decode length or read/decode error
        status: "ok" if everything went well, error code if not
    """
    event, status, stream = None, "unk", b""
    try:
        self.sel.unregister(peersock)
        waiting, unwait = timeout_factory(timeout=self.timeout)
        stream, length = peersock.recv(self.chunksize), None
        length = read_header(stream)["length"]
        while waiting() >= 0:  # syntactic handwaving. breaks w/exception.
            if (length is not None) and (len(stream) >= length):
                break
            data, status = self._tryread(peersock)
            if status == "unavailable":
                time.sleep(self.delay)
                continue
            stream += data
            unwait()
        # tell the selector the socket is ready for an `ack` callback
        stream, event, status = self._trydecode(stream)
        self.sel.register(
            peersock, selectors.EVENT_WRITE, curry(self._ack)(stream)
        )
    except BrokenPipeError:
        self.peers.pop(peersock.getpeername(), None)
        status = "broken pipe"
    except TimeoutError:
        self.peers.pop(peersock.getpeername(), None)
        status = "timed out"
    except KeyError as ke:
        if "is not registered" in str(ke):
            status = f"{peersock} already unregistered"
        else:
            raise
    except BlockingIOError:
        self.peers.pop(peersock.getpeername(), None)
        status = f"cleared blocking socket {peersock.getpeername()}"
    except (IOError, OSError) as err:
        status = f"{type(err)}: {str(err)}"
    event = f"read {len(stream)}" if event is None else event
    return stream, event, status
_set_locked(_val)

intentionally nonfunctional setter for self.locked. Always raises AttributeError.

Source code in hostess/station/talkie.py
148
149
150
151
152
153
def _set_locked(self, _val: bool):
    """
    intentionally nonfunctional setter for self.locked. Always raises
    AttributeError.
    """
    raise AttributeError("server is not directly lockable")
_trydecode(stream)

inner stream-decode handler function for self.read().

Parameters:

Name Type Description Default
stream bytes

bytes received from peer.

required

Returns:

Name Type Description
stream Any

output of self.decoder, if successful; undecoded stream if not.

event str

"decoded {nbytes}" on successful decode; "read {nbytes}" on failed decode

status str

"ok" if successful; description of decode error if not

Source code in hostess/station/talkie.py
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
def _trydecode(self, stream: bytes) -> tuple[Any, str, str]:
    """
    inner stream-decode handler function for self.read().

    Args:
        stream: bytes received from peer.

    Returns:
        stream: output of self.decoder, if successful; undecoded stream
            if not.
        event: "decoded {nbytes}" on successful decode; "read {nbytes}"
            on failed decode
        status: "ok" if successful; description of decode error if not
    """
    nbytes = len(stream)
    try:
        stream = self.decoder(stream)
        event, status = f"decoded {nbytes}", "ok"
    except KeyboardInterrupt:
        raise
    except Exception as ex:
        event, status = f"read {nbytes}", f"decode error;{type(ex)};{ex}"
    return stream, event, status
_tryread(peersock)

inner read-individual-chunk-from-socket handler for read

Parameters:

Name Type Description Default
peersock socket

open socket to read chunk from

required

Returns:

Name Type Description
data Optional[bytes]

bytes read from peersock, if any

status str

"streaming" if receive operation was successful; "unavailable" if peer is temporarily unavailable

Raises:

Type Description
OSError

for any OSError other than temporary unavailability

Source code in hostess/station/talkie.py
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
def _tryread(self, peersock: socket.socket) -> tuple[Optional[bytes], str]:
    """
    inner read-individual-chunk-from-socket handler for `read`

    Args:
        peersock: open socket to read chunk from

    Returns:
        data: bytes read from peersock, if any
        status: "streaming" if receive operation was successful;
            "unavailable" if peer is temporarily unavailable

    Raises:
        OSError: for any OSError other than temporary unavailability
    """
    status = "streaming"
    try:
        data = peersock.recv(self.chunksize)
    except OSError as ose:
        if "temporarily" not in str(ose):
            raise
        return None, "unavailable"
    return data, status
kill(signal=0)

immediately shut down, closing the server's socket and attempting to terminate all its threads.

Parameters:

Name Type Description Default
signal int

termination signal to send to threads. changing this number does nothing special by default and is intended for subclasses or application-specific purposes.

0
Source code in hostess/station/talkie.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def kill(self, signal: int = 0):
    """
    immediately shut down, closing the server's socket and attempting to
    terminate all its threads.

    Args:
        signal: termination signal to send to threads. changing this
            number does nothing special by default and is intended for
            subclasses or application-specific purposes.
    """
    self.sock.close()
    if "sig" in dir(self):
        # won't be present if we encountered an error on init
        self.sig("all", signal)
    self.status = "terminated"
launch_io(name)

launch a read thread in this server's executor. must be run in a thread or it will block and be useless.

Parameters:

Name Type Description Default
name Union[str, int]

identifier for thread

required
Source code in hostess/station/talkie.py
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
def launch_io(self, name: Union[str, int]):
    """
    launch a read thread in this server's executor.
    must be run in a thread or it will block and be useless.

    Args:
        name: identifier for thread
    """
    while self.signals.get(name) is None:
        time.sleep(self.poll)
        try:
            key, id_ = self.queues[name].pop()
        except IndexError:
            continue
        # noinspection PyProtectedMember
        peername, peerage = self._check_peerage(key)
        callback, peersock = key.data, key.fileobj  # explanatory variables
        if (peerage is True) and (callback.__name__ != "_ack"):
            # connection / read already handled
            continue
        if self.locked and callback.__name__ != "_ack":
            continue
        try:
            stream, event, peername, status = self._handle_callback(
                callback, peername, peersock
            )
            # task was already handled (or unhandleable)
            if event == "guard":
                continue
        except OSError as err:
            stream, event, status = None, "oserror", str(err)
        event = {
            "event": event,
            "peer": peername,
            "status": status,
            "time": logstamp(),
            "thread": name,
            "id": id_,
            "callback": callback.__name__,
        }
        self.events.append(event)
        if (stream is None) or (len(stream) == 0):
            continue
        if not isinstance(stream["body"], bytes):  # control codes, etc.
            self.data.append(event | {"content": stream})
launch_selector()

launch the server's selector thread.

Source code in hostess/station/talkie.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
def launch_selector(self):
    """launch the server's selector thread."""
    id_, cycler = 0, cycle(self.queues.keys())
    try:
        self.sel.register(self.sock, selectors.EVENT_READ, self._accept)
    except KeyError:  # will occur on relaunch
        pass
    while self.signals.get("select") is None:
        try:
            events = self.sel.select(1)
        except TimeoutError:
            continue
        queued = self.queued_descriptors()
        for key, _mask in events:
            # try to ensure we don't have a million pending events
            if key.fd in queued:
                continue
            target = next(cycler)
            self.queues[target].append((key, id_))
            id_ += 1
        time.sleep(self.poll)
queued_descriptors()

Returns:

Type Description
set[int]

set of all file descriptors for currently-queued sockets. Primarily for selector thread loop but can also be used diagnostically.

Source code in hostess/station/talkie.py
209
210
211
212
213
214
215
216
def queued_descriptors(self) -> set[int]:
    """
    Returns:
         set of all file descriptors for currently-queued sockets.
              Primarily for selector thread loop but can also be used
              diagnostically.
    """
    return {s[0].fd for s in chain.from_iterable(self.queues.values())}
tend()

check on all threads we believe to be running. If any of them aren't, relaunch them. Never called automatically.

Returns:

Type Description
Optional[list]

None, if server is still initializing. otherwise, list of Exceptions raised by crashed threads (empty if none crashed).

Source code in hostess/station/talkie.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def tend(self) -> Optional[list]:
    """
    check on all threads we believe to be running. If any of them aren't,
    relaunch them. Never called automatically.

    Returns:
        None, if server is still initializing. otherwise, list of
            Exceptions raised by crashed threads (empty if none crashed).
    """
    if self.status == "initializing":
        return
    threads = tuple(self.threads.items())
    crashed_threads = []
    for k, v in threads:
        # will be dict if it is a crashed thread running in trywrap
        if not isinstance(v, dict) and (v._state == "RUNNING"):
            continue
        self.sig(k, 0)
        time.sleep(self.poll * 2)
        self.sig(k, None)
        thread = self.threads.pop(k, None)
        if isinstance(thread, dict):
            exception = thread["exception"]
        elif thread is not None:
            exception = thread.exception()
        else:
            exception = None
        crashed_threads.append(exception)
        if k == "select":
            self.threads["select"] = self.exec.submit(self.launch_selector)
        else:
            self.threads[k] = self.exec.submit(self.launch_io, k)
    self.status = "running"
    return crashed_threads

read_from_socket(headerread, sock, timeout)

one-shot read-all-data-from-socket function

Parameters:

Name Type Description Default
headerread Optional[Callable[[bytes], dict]]

optional function to read data header in order to determine total data size

required
sock socket

open socket to peer

required
timeout float

timeout duration

required

Returns:

Type Description
bytes

bytes received from peer

Raises:

Type Description
TimeoutError

if any attempt to read a chunk of data takes more than timeout seconds

Source code in hostess/station/talkie.py
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
def read_from_socket(
    headerread: Optional[Callable[[bytes], dict]],
    sock: socket.socket,
    timeout: float,
) -> bytes:

    """
    one-shot read-all-data-from-socket function

    Args:
        headerread: optional function to read data header in order to
            determine total data size
        sock: open socket to peer
        timeout: timeout duration

    Returns:
        bytes received from peer

    Raises:
        TimeoutError: if any attempt to read a chunk of data takes more than
            timeout seconds
    """
    # TODO: check that timeout is working
    # TODO, maybe: move _tryread?
    waiting, unwait = timeout_factory(timeout=timeout)
    data = sock.recv(16384)
    response, length = data, None
    if headerread is not None:
        try:
            length = headerread(response)["length"]
        except (IOError, KeyError):
            pass
    while True:
        if (length is not None) and (len(response) >= length):
            break
        data = sock.recv(16384)
        if len(data) == 0:
            if length is None:
                break
            waiting()
            time.sleep(0.01)
        else:
            response += data
            unwait()
        continue
    sock.close()
    return response

stsend(data, host, port, timeout=10, delay=0, chunksize=None)

wrapper for tcp_send() that autoencodes data as hostess comms. Used by Delegates to send comms to their Station. See tcp_send() for a full description of arguments and return values.

Source code in hostess/station/talkie.py
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
@wraps(tcp_send)
def stsend(
    data: bytes,
    host: str,
    port: int,
    timeout: float = 10,
    delay: float = 0,
    chunksize: Optional[int] = None,
):
    """
    wrapper for `tcp_send()` that autoencodes data as hostess comms. Used by
    Delegates to send comms to their Station.
    See `tcp_send()` for a full description of arguments and return values.
    """
    return tcp_send(
        make_comm(data),
        host,
        port,
        timeout,
        delay,
        chunksize,
        headerread=read_header,
    )

tcp_send(data, host, port, timeout=10, delay=0, chunksize=None, headerread=None)

one-shot send-data-over-TCP-and-get-response utility.

Parameters:

Name Type Description Default
data bytes

data to send

required
host str

hostname (usually ip address or 'localhost') of recipient

required
port int

port number of recipient

required
timeout float

how long to wait for successful connection / send (s)

10
delay float

delay between sends to socket (s)

0
chunksize Optional[float]

chunk size for sends; None means unchunked unless a nonzero delay is specified, in which case chunk size defaults to 16384

None
headerread Optional[Callable[[bytes], dict]]

optional function to decode response header, specifically in order to determine intended length of response

None

Returns:

Name Type Description
response Union[str, bytes]

response, if successfully received (possibly an empty bytestring); "timeout" on timeout, "connection refused" on failed connection

sockname Optional[int]

file descriptor for socket, if a connection was ever established; None if not

Source code in hostess/station/talkie.py
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
def tcp_send(
    data: bytes,
    host: str,
    port: int,
    timeout: float = 10,
    delay: float = 0,
    chunksize: Optional[float] = None,
    headerread: Optional[Callable[[bytes], dict]] = None,
) -> tuple[Union[str, bytes], Optional[int]]:
    """
    one-shot send-data-over-TCP-and-get-response utility.

    Args:
        data: data to send
        host: hostname (usually ip address or 'localhost') of recipient
        port: port number of recipient
        timeout: how long to wait for successful connection / send (s)
        delay: delay between sends to socket (s)
        chunksize: chunk size for sends; None means unchunked unless a nonzero
            `delay` is specified, in which case chunk size defaults to 16384
        headerread: optional function to decode response header, specifically
            in order to determine intended length of response

    Returns:
        response: response, if successfully received (possibly an empty
            bytestring); "timeout" on timeout, "connection refused" on failed
            connection
        sockname: file descriptor for socket, if a connection was ever
            established; None if not

    """
    sockname = None
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        try:
            sock.settimeout(timeout)
            sock.connect((host, port))
            sockname = sock.getsockname()
            if (delay > 0) or (chunksize is not None):
                chunksize = 16384 if chunksize is None else chunksize
                while len(data) > 0:
                    data, chunk = data[chunksize:], data[:chunksize]
                    sock.send(chunk)
                    time.sleep(delay)
            else:
                sock.sendall(data)
                response = read_from_socket(headerread, sock, timeout)
                return response, sockname
        except TimeoutError:
            return "timeout", sockname
        except ConnectionError:
            return "connection refused", sockname
        finally:
            sock.close()  # TODO: redundant with context manager?

subutils

utilities for executing, managing, and watching subprocesses

Processlike = Union[Viewer, invoke.runners.Runner, invoke.runners.Result] module-attribute

union of expected types for interfaces to processes.

CBuffer

wrapper class for a Dispatcher that includes the ability to yield pseudo-buffers that execute the Dispatcher on "write". streamlined alternative to objects like Invoke's Watchers.

Source code in hostess/subutils.py
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
class CBuffer:
    """
    wrapper class for a Dispatcher that includes the ability to yield
    pseudo-buffers that execute the Dispatcher on "write". streamlined
    alternative to objects like Invoke's Watchers.
    """

    def __init__(self, dispatcher: Optional[Dispatcher] = None):
        """
        Args:
            dispatcher: Dispatcher to associate with any buffers this object
                produces. the Dispatcher should have at least steps "out",
                "err", and "done". If not specified, defaults to a simple
                console handler.
        """
        if dispatcher is None:
            dispatcher = console_stream_handler()
        self.dispatcher = dispatcher
        self.caches = self.dispatcher.caches
        self.deferred_sends = None
        self.buffers = {
            stream: self.make_buffer(stream)
            for stream in self.dispatcher.steps
        }
        if not {"out", "err", "done"}.issubset(self.buffers.keys()):
            raise TypeError(
                "dispatcher must have at least out, err, and done steps."
            )

    def __getattr__(self, attr):
        return self.dispatcher.__getattr__(attr)

    def execute(
        self, *args: Any, stream: Sequence[Union[str, int]], **kwargs: Any
    ) -> Any:
        """
        execute a specified step or steps of the underlying Dispatcher.

        Args:
            *args: args to pass to dispatcher.execute()
            stream: names of steps to execute
            **kwargs: kwargs to pass to dispatcher.execute()

        Returns:
            results of dispatcher.execute()
        """
        return self.dispatcher.execute(*args, **kwargs, steps=stream)

    def __call__(self, *args, stream, **kwargs):
        return self.execute(*args, stream, **kwargs)

    def cacheoff(self):
        """Pause caching."""
        self.deferred_sends = self.dispatcher.sends
        self.dispatcher.sends = {}

    def cacheon(self):
        """Resume caching."""
        if self.deferred_sends is None:
            return
        self.dispatcher.sends = self.deferred_sends
        self.deferred_sends = None

    def make_buffer(self, step: Union[str, int]) -> DispatchBuffer:
        """
        Create a buffer for a named step of self.dispatcher.

        Args:
            step: name of step.

        Returns:
            DispatchBuffer for that step.
        """
        return DispatchBuffer(self.dispatcher, step)

    def make_callback(
        self,
        signal_generator: Optional[Callable[[], Any]],
        step: Union[str, int],
    ) -> Callable:
        """
        create a callback for a named step of self.dispatcher.

        Args:
            signal_generator: optional niladic function whose output will
                be used as a signal to the named step of dispatcher. otherwise
                the callback will execute that step with no arguments.
            step:
                name of step to execute when callback fires.

        Returns:
            callback function that takes a waitable object as an optional
                argument and executes a step of dispatcher when it completes.
        """
        return self.dispatcher.yield_callback(signal_generator, step)

__init__(dispatcher=None)

Parameters:

Name Type Description Default
dispatcher Optional[Dispatcher]

Dispatcher to associate with any buffers this object produces. the Dispatcher should have at least steps "out", "err", and "done". If not specified, defaults to a simple console handler.

None
Source code in hostess/subutils.py
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
def __init__(self, dispatcher: Optional[Dispatcher] = None):
    """
    Args:
        dispatcher: Dispatcher to associate with any buffers this object
            produces. the Dispatcher should have at least steps "out",
            "err", and "done". If not specified, defaults to a simple
            console handler.
    """
    if dispatcher is None:
        dispatcher = console_stream_handler()
    self.dispatcher = dispatcher
    self.caches = self.dispatcher.caches
    self.deferred_sends = None
    self.buffers = {
        stream: self.make_buffer(stream)
        for stream in self.dispatcher.steps
    }
    if not {"out", "err", "done"}.issubset(self.buffers.keys()):
        raise TypeError(
            "dispatcher must have at least out, err, and done steps."
        )

cacheoff()

Pause caching.

Source code in hostess/subutils.py
362
363
364
365
def cacheoff(self):
    """Pause caching."""
    self.deferred_sends = self.dispatcher.sends
    self.dispatcher.sends = {}

cacheon()

Resume caching.

Source code in hostess/subutils.py
367
368
369
370
371
372
def cacheon(self):
    """Resume caching."""
    if self.deferred_sends is None:
        return
    self.dispatcher.sends = self.deferred_sends
    self.deferred_sends = None

execute(*args, stream, **kwargs)

execute a specified step or steps of the underlying Dispatcher.

Parameters:

Name Type Description Default
*args Any

args to pass to dispatcher.execute()

()
stream Sequence[Union[str, int]]

names of steps to execute

required
**kwargs Any

kwargs to pass to dispatcher.execute()

{}

Returns:

Type Description
Any

results of dispatcher.execute()

Source code in hostess/subutils.py
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
def execute(
    self, *args: Any, stream: Sequence[Union[str, int]], **kwargs: Any
) -> Any:
    """
    execute a specified step or steps of the underlying Dispatcher.

    Args:
        *args: args to pass to dispatcher.execute()
        stream: names of steps to execute
        **kwargs: kwargs to pass to dispatcher.execute()

    Returns:
        results of dispatcher.execute()
    """
    return self.dispatcher.execute(*args, **kwargs, steps=stream)

make_buffer(step)

Create a buffer for a named step of self.dispatcher.

Parameters:

Name Type Description Default
step Union[str, int]

name of step.

required

Returns:

Type Description
DispatchBuffer

DispatchBuffer for that step.

Source code in hostess/subutils.py
374
375
376
377
378
379
380
381
382
383
384
def make_buffer(self, step: Union[str, int]) -> DispatchBuffer:
    """
    Create a buffer for a named step of self.dispatcher.

    Args:
        step: name of step.

    Returns:
        DispatchBuffer for that step.
    """
    return DispatchBuffer(self.dispatcher, step)

make_callback(signal_generator, step)

create a callback for a named step of self.dispatcher.

Parameters:

Name Type Description Default
signal_generator Optional[Callable[[], Any]]

optional niladic function whose output will be used as a signal to the named step of dispatcher. otherwise the callback will execute that step with no arguments.

required
step Union[str, int]

name of step to execute when callback fires.

required

Returns:

Type Description
Callable

callback function that takes a waitable object as an optional argument and executes a step of dispatcher when it completes.

Source code in hostess/subutils.py
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
def make_callback(
    self,
    signal_generator: Optional[Callable[[], Any]],
    step: Union[str, int],
) -> Callable:
    """
    create a callback for a named step of self.dispatcher.

    Args:
        signal_generator: optional niladic function whose output will
            be used as a signal to the named step of dispatcher. otherwise
            the callback will execute that step with no arguments.
        step:
            name of step to execute when callback fires.

    Returns:
        callback function that takes a waitable object as an optional
            argument and executes a step of dispatcher when it completes.
    """
    return self.dispatcher.yield_callback(signal_generator, step)

DispatchBuffer

A buffer-like interface to a Dispatcher.

Source code in hostess/subutils.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
class DispatchBuffer:
    """
    A buffer-like interface to a Dispatcher.
    """

    def __init__(self, dispatcher: "Dispatcher", step: Union[int, str]):
        """

        Args:
            dispatcher: Dispatcher to associate with this buffer
            step: named step of dispatcher to associate with this buffer
        """
        self.dispatcher, self.step = dispatcher, step

    def read(self, _=None) -> Any:
        """return the dispatcher's cache for the associated step"""
        return self.dispatcher.caches[self.step]

    def write(self, message: Any) -> Any:
        """execute the associated step of dispatcher with message"""
        return self.dispatcher.execute(message, steps=(self.step,))

    @staticmethod
    def flush():
        """remain flushed"""
        return

    @staticmethod
    def seek(*args, **kwargs):
        """seek nowhere"""
        return

__init__(dispatcher, step)

Parameters:

Name Type Description Default
dispatcher Dispatcher

Dispatcher to associate with this buffer

required
step Union[int, str]

named step of dispatcher to associate with this buffer

required
Source code in hostess/subutils.py
60
61
62
63
64
65
66
67
def __init__(self, dispatcher: "Dispatcher", step: Union[int, str]):
    """

    Args:
        dispatcher: Dispatcher to associate with this buffer
        step: named step of dispatcher to associate with this buffer
    """
    self.dispatcher, self.step = dispatcher, step

flush() staticmethod

remain flushed

Source code in hostess/subutils.py
77
78
79
80
@staticmethod
def flush():
    """remain flushed"""
    return

read(_=None)

return the dispatcher's cache for the associated step

Source code in hostess/subutils.py
69
70
71
def read(self, _=None) -> Any:
    """return the dispatcher's cache for the associated step"""
    return self.dispatcher.caches[self.step]

seek(*args, **kwargs) staticmethod

seek nowhere

Source code in hostess/subutils.py
82
83
84
85
@staticmethod
def seek(*args, **kwargs):
    """seek nowhere"""
    return

write(message)

execute the associated step of dispatcher with message

Source code in hostess/subutils.py
73
74
75
def write(self, message: Any) -> Any:
    """execute the associated step of dispatcher with message"""
    return self.dispatcher.execute(message, steps=(self.step,))

Dispatcher

Bases: Composition

Composition capable of skipping steps. By default creates empty caches that can be used as targets for sends but do not autocapture like the superclass's add_captures() method. intended for applications like handling console streams.

Source code in hostess/subutils.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
class Dispatcher(Composition):
    """
    Composition capable of skipping steps. By default creates empty caches
    that can be used as targets for sends but do not autocapture like the
    superclass's add_captures() method.
    intended for applications like handling console streams.
    """

    def __init__(self, *args: Any, cached: bool = True, **kwargs: Any):
        """
        See Composition's documentation for a full description of valid
        args and kwargs.

        Args:
            *args: args to pass to the Composition constructor.
            cached: if True, retain results of every step in self.caches.
            **kwargs: kwargs to pass to the composition constructor.
        """
        super().__init__(*args, **kwargs)
        self.caches = {}
        if cached is True:
            self.reset_caches()

    def _bind_special_runtime_kwargs(self, kwargs):
        """Composition method modified to permit skipping."""
        steps = listify(kwargs.get("steps"))
        if self.singular is True:
            if (len(steps) > 1) or (steps == [None]):
                raise ValueError(
                    "singular Dispatcher, must be passed exactly 1 step"
                )
        self.active_steps = steps

    def _do_step(self, step_name, state):
        """Composition method modified to permit skipping."""
        if self.active_steps != [None]:
            if step_name not in self.active_steps:
                return state
        return super()._do_step(step_name, state)

    def reset_caches(self):
        """reset and initialize caches."""
        self.sends = {}
        for s in self.steps:
            self.caches[s] = []

    def __getattr__(self, attr):
        try:
            return self.caches[attr]
        except KeyError:
            pass
        raise AttributeError(f"No attribute or cache '{attr}'")

    def yield_buffer(self, step: Union[str, int]) -> DispatchBuffer:
        """
        construct a buffer whose read and write methods access and execute a
        cache/step of this object.

        Args:
            step: name of step to associate with the DispatchBuffer.
        """
        return DispatchBuffer(self, step)

    def yield_callback(
        self, signaler: Optional[Callable[[], None]], step: Union[str, int]
    ) -> Callable:
        """
        construct a callback function that fires a step of this object.

        Args:
            signaler: an optional niladic function whose output acts as a
                signal to the step when the callback fires. Otherwise, simply
                execute the step with no signal.
            step: name of step to execute with this callback.

        Returns:
            a function that takes one or no arguments. If it is called
                with no argument, it immediately fires the callback. If it
                is called with an object with a .wait method, it first
                calls its .wait method. This function returns whatever
                executing the step returns.
        """
        return partial(dispatch_callback, self, signaler, step)

    active_steps = ()
    singular = False

__init__(*args, cached=True, **kwargs)

See Composition's documentation for a full description of valid args and kwargs.

Parameters:

Name Type Description Default
*args Any

args to pass to the Composition constructor.

()
cached bool

if True, retain results of every step in self.caches.

True
**kwargs Any

kwargs to pass to the composition constructor.

{}
Source code in hostess/subutils.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def __init__(self, *args: Any, cached: bool = True, **kwargs: Any):
    """
    See Composition's documentation for a full description of valid
    args and kwargs.

    Args:
        *args: args to pass to the Composition constructor.
        cached: if True, retain results of every step in self.caches.
        **kwargs: kwargs to pass to the composition constructor.
    """
    super().__init__(*args, **kwargs)
    self.caches = {}
    if cached is True:
        self.reset_caches()

_bind_special_runtime_kwargs(kwargs)

Composition method modified to permit skipping.

Source code in hostess/subutils.py
189
190
191
192
193
194
195
196
197
def _bind_special_runtime_kwargs(self, kwargs):
    """Composition method modified to permit skipping."""
    steps = listify(kwargs.get("steps"))
    if self.singular is True:
        if (len(steps) > 1) or (steps == [None]):
            raise ValueError(
                "singular Dispatcher, must be passed exactly 1 step"
            )
    self.active_steps = steps

_do_step(step_name, state)

Composition method modified to permit skipping.

Source code in hostess/subutils.py
199
200
201
202
203
204
def _do_step(self, step_name, state):
    """Composition method modified to permit skipping."""
    if self.active_steps != [None]:
        if step_name not in self.active_steps:
            return state
    return super()._do_step(step_name, state)

reset_caches()

reset and initialize caches.

Source code in hostess/subutils.py
206
207
208
209
210
def reset_caches(self):
    """reset and initialize caches."""
    self.sends = {}
    for s in self.steps:
        self.caches[s] = []

yield_buffer(step)

construct a buffer whose read and write methods access and execute a cache/step of this object.

Parameters:

Name Type Description Default
step Union[str, int]

name of step to associate with the DispatchBuffer.

required
Source code in hostess/subutils.py
219
220
221
222
223
224
225
226
227
def yield_buffer(self, step: Union[str, int]) -> DispatchBuffer:
    """
    construct a buffer whose read and write methods access and execute a
    cache/step of this object.

    Args:
        step: name of step to associate with the DispatchBuffer.
    """
    return DispatchBuffer(self, step)

yield_callback(signaler, step)

construct a callback function that fires a step of this object.

Parameters:

Name Type Description Default
signaler Optional[Callable[[], None]]

an optional niladic function whose output acts as a signal to the step when the callback fires. Otherwise, simply execute the step with no signal.

required
step Union[str, int]

name of step to execute with this callback.

required

Returns:

Type Description
Callable

a function that takes one or no arguments. If it is called with no argument, it immediately fires the callback. If it is called with an object with a .wait method, it first calls its .wait method. This function returns whatever executing the step returns.

Source code in hostess/subutils.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def yield_callback(
    self, signaler: Optional[Callable[[], None]], step: Union[str, int]
) -> Callable:
    """
    construct a callback function that fires a step of this object.

    Args:
        signaler: an optional niladic function whose output acts as a
            signal to the step when the callback fires. Otherwise, simply
            execute the step with no signal.
        step: name of step to execute with this callback.

    Returns:
        a function that takes one or no arguments. If it is called
            with no argument, it immediately fires the callback. If it
            is called with an object with a .wait method, it first
            calls its .wait method. This function returns whatever
            executing the step returns.
    """
    return partial(dispatch_callback, self, signaler, step)

Nullify

A fake buffer.

Source code in hostess/subutils.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class Nullify:
    """A fake buffer."""

    @staticmethod
    def read(_size=None):
        """read nothing"""
        return b""

    @staticmethod
    def write(_obj):
        """write nothing"""
        return

    @staticmethod
    def flush():
        """remain flushed"""
        return

    @staticmethod
    def seek(_hence):
        """seek nowhere"""
        return

flush() staticmethod

remain flushed

Source code in hostess/subutils.py
44
45
46
47
@staticmethod
def flush():
    """remain flushed"""
    return

read(_size=None) staticmethod

read nothing

Source code in hostess/subutils.py
34
35
36
37
@staticmethod
def read(_size=None):
    """read nothing"""
    return b""

seek(_hence) staticmethod

seek nowhere

Source code in hostess/subutils.py
49
50
51
52
@staticmethod
def seek(_hence):
    """seek nowhere"""
    return

write(_obj) staticmethod

write nothing

Source code in hostess/subutils.py
39
40
41
42
@staticmethod
def write(_obj):
    """write nothing"""
    return

RunCommand

Callable object for managed shell command execution.

Encapsulates an Invoke runner and adds additional syntax and monitoring. Often, but not always, best used as the substrate for a Viewer.

Source code in hostess/subutils.py
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
class RunCommand:
    """
    Callable object for managed shell command execution.

    Encapsulates an Invoke runner and adds additional syntax and monitoring.
    Often, but not always, best used as the substrate for a Viewer.
    """

    def __init__(
        self,
        command: Optional[str] = None,
        ctx: Optional[invoke.context.Context] = None,
        runclass: Optional[type(invoke.Runner)] = None,
        chunksize: int = 20000,
        **kwargs: Any,
    ):
        """
        Args:
            command: optional string form of a shell command to bind to
                this object. If you do not pass this, this object will be
                "generic", able to run any shell command. For instance,
                ls = RunCommand("ls") constructs a RunCommand _just_ for
                ls. ls(a=True) will run "ls -a".
            ctx: optional Context object (just makes a new one if not given)
            runclass: optional Runner subclass (uses the default of ctx if
                not given; if both are None, uses invoke.runners.Local)
            chunksize: maximum number of bytes to read at once from
                a child process's stdout/stderr. higher values will generally
                increase performance but may have undesirable effects in some
                cases.
            **kwargs: optional keyword arguments to bind to this object;
                Will be added to any kwargs passed to calls to this object.
                See __call__ for a complete description of behavior.
        """
        self.command = command
        # TODO: passing this around and directly assigning the attribute is
        #  inconvenient, but Invoke doesn't seem to explicitly expose it
        #  anywhere. Check and make sure.
        self.chunksize = chunksize
        if ctx is None:
            self.ctx = invoke.context.Context()
        else:
            self.ctx = ctx
        if runclass is None:
            self.runclass = list(self.ctx["runners"].values())[0]
        else:
            self.runclass = runclass
        self.args, self.kwargs = (), kwargs

    def bind(self, *args, **kwargs):
        self.args, self.kwargs = args, kwargs

    def cstring(
        self,
        *args: Union[int, float, str],
        args_at_end: bool = True,
        **kwargs: Union[int, float, str],
    ):
        """
        Create a shell command string from *args and **kwargs, including any
        command and kwargs curried into this object. Used as part of the
        `__call__()` workflow; can also be used to determine what shell
        command this object _would_ execute if you called it.

        See `__call__` for a complete description of arg and kwarg parsing.

        Args:
            args: args to parse into shell command
            args_at_end: place args after the first at the end of the shell
                command?
            kwargs: kwargs to parse into shell command
        """
        args = self.args + args
        if self.command is None:
            if len(args) == 0:
                # if we have no bound command, always treat the first arg as
                # the command name, not as an option
                raise ValueError("No bound command; must pass an argument.")
            command, args = args[0], args[1:]
        else:
            command = self.command
        kwargs = keyfilter(
            lambda a: (not a.startswith("_")) or a.strip("_").isnumeric(),
            self.kwargs | kwargs,
        )
        astring = "" if len(args) == 0 else f" {' '.join(map(str, args))}"
        kstring = ""
        for k, v in kwargs.items():
            k = k.strip("_").replace("_", "-")
            if v is True:
                if len(k) == 1:
                    kstring += f" -{k}"
                else:
                    kstring += f" --{k}"
            elif v is False:
                continue
            elif len(k) == 1:
                kstring += f" -{k} {v}"
            else:
                kstring += f" --{k}={v}"
        order = (kstring, astring) if args_at_end else (astring, kstring)
        return f"{command}{''.join(order)}"

    def __call__(self, *args, **kwargs) -> Optional["Processlike"]:
        """
        Execute a shell command parsed from `args` and `kwargs`.

        This method has two legal calling conventions along with a variety of
        keyword-argument meta-options that modify _how_ it executes the shell
        command.

        Calling conventions:
            These conventions are not mutually exclusive, although it is
            generally less confusing to pick one or the other.

            **1**

            Pass the shell command as a string. This can be simpler in many cases,
            and is mandatory for programs with non-standard calling conventions,
            like the `ffmpeg` command below.

            Examples:
                >>> cmd = RunCommand()
                >>> cmd('ls')
                >>> cmd('cp -r /path/to/folder /path/to/other/folder')
                >>> cmd('ffmpeg -i first.mp4 -filter:v "crop=100:10:20:200" second.mp4')

            **2**

            Construct the shell command using multiple arguments to `__call__`.
            This allows you to treat the shell command more like a Python
            function, which can be simpler and less error-prone than dynamic
            string formatting when you would like to pass variable parameters to a
            command.

            RunCommand uses the following rules to parse args and kwargs into
            shell command strings. They are compatible with most, although not
            all, shell programs:

            * RunCommand treats the first positional argument like a shell
              command name. This means that passing a positional argument is
              mandatory if you did not bind a command to the RunCommand when
              creating it. The parsed command string always starts with this
              argument.
            * Subsequent positional arguments are command parameters. By
              default, the parser places them at the end of the command string,
              after any command options. Pass `_args_at_end=False` to place
              them before command options.
            * Keyword arguments are command options. The parser transforms
              keyword argument names in order to make Python naming and calling
              conventions compatible with shell conventions.
                * It ignores `"_"` characters at the start and end of
                  names. This can be used to pass numeric options, like
                  `"_0=True"`, or options that share a name with a Python
                  reserved keyword, like `"dir_=/opt"`.
                * it treats single-character names (not counting prefixed or
                  suffixed `"_"`) as options preceded by a `"-"` and does not
                  use an `'='` to separate them from their values.
                  `cmd("ls", I="a*")` is equivalent to `"ls -I a*"`.
                * it treats longer names as options preceded by `"--"` and uses
                  an `'='` to separate them from their values.
                  `cmd("ls", width=20)` is equivalent to `"ls --width=20"`.
                * It replaces `"_"` characters within names with `"-"`.
                  `cmd("ls", time_style="iso")` is equivalent to
                  `"ls --time-style=iso"`.
                * if a keyword argument is True, RunCommand treats it as a
                  "switch". `cmd("ls", a=True)` is equivalent to `"ls -a"`.
                * if a keyword argument is False, RunCommand ignores it.
                  `cmd("ls", a=False)` is equivalent to `"ls"`.

        Meta-options:
            RunCommand understands any kwarg whose name begins with `'_'` as
            a meta-option, unless the remainder of the kwarg name is numeric
            (e.g. `'_0'`). All meta-options are optional. The meta-options are
            not defined in the signature in order to facilitate the parsing
            process. Internally-recognized meta-options are:

            * `_bg` (aliases `_asynchronous`, `_async`): if True, execute the
                 process in the background. Roughly equivalent to the '&'
                 control operator in bash. If False, block until process exit.
                 (Default False)
             * `_out` (alias `_out_stream`): Additional target for process
                stdout.  Must have a `write()` method. (If `_viewer=True`,
                defaults to `Viewer`'s default behavior. Otherwise, defaults
                to a `Nullify` object, which simply discards the stream).
             * `_err` (alias `_err_stream`): Same as `_out`, but for stderr.
             * `_viewer` (alias `_v`): if True, return a `Viewer` object
                permitting additional process inspection and management.
                `_viewer=True`  implies `_bg=True`. (default False)
             * `_args_at_end`: if True, place positional arguments other than
                the command string at the end of the parsed shell command,
                after any shell options parsed from kwargs. Otherwise, place
                them before shell options. (Default True)
             * `_done`: a niladic function to call on process exit. Valid only
                if `_viewer=True`.

             Any other meta-options are passed directly to the underlying
             `Runner` as keyword arguments with `"_"` stripped from their
             names. `_disown` is often particularly useful.

        Tip:
            In addition to these conventions, bear in mind that if you
            specified a command when you construct a RunCommand object, that
            command will always be used as the first positional argument to
            `__call__()`, and if you specified kwargs, they will be added to
            any kwargs you pass or don't pass to `__call__()`.

         Returns:
             Interface object for executed process. Of variable type: if
                 `_viewer=True`, a Viewer; if `_viewer=False` and `_bg=True`,
                 an Invoke `Result`; if `_viewer=False` and `_bg=False`, an
                 Invoke `Runner`; if `_viewer=False` and `_disown=True`,
                 `None` (`_disown=True` overrides the value of `_bg`.)
        """
        rkwargs = keyfilter(
            lambda k: k.startswith("_") and not k.strip("_").isnumeric(),
            self.kwargs | kwargs,
        )
        kwargs = keyfilter(lambda k: k not in rkwargs, self.kwargs | kwargs)
        replace_aliases(
            rkwargs,
            {
                "_out_stream": ("_out",),
                "_err_stream": ("_err",),
                "_asynchronous": ("_async", "_bg"),
                "_viewer": ("_v",),
            },
        )
        # _asynchronous is redundant with _disown, and it upsets Invoke
        if "_disown" in rkwargs:
            rkwargs.pop("_asynchronous", None)
        # do not print to stdout/stderr by default
        verbose = rkwargs.pop("verbose", False)
        if verbose is not True:
            for stream in filter(
                lambda x: x not in rkwargs, ("_out_stream", "_err_stream")
            ):
                rkwargs[stream] = Nullify()
        # simple done callback handling -- simple stdout/stderr is handled
        # by Invoke, but Invoke does not offer completion handling except
        # via the more complex Watcher system.
        dcallback = rkwargs.pop("_done", None)
        cstring = self.cstring(
            *args, args_at_end=rkwargs.pop("_args_at_end", True), **kwargs
        )
        if cstring == "":
            raise ValueError("no command specified.")
        if rkwargs.pop("_viewer", False) is True:
            output = Viewer.from_command(
                self,
                *args,
                ctx=self.ctx,
                runclass=self.runclass,
                chunksize=self.chunksize,
                **(rkwargs | kwargs),
            )
        else:
            runner = self.runclass(self.ctx)
            runner.read_chunk_size = self.chunksize
            rkwargs = {k[1:]: v for k, v in rkwargs.items()}
            output = runner.run(cstring, **rkwargs)
        # disowned case
        if output is None:
            return
        # need the runner/result to actually create a thread to watch the
        # done callback. we also never want to actually return a Promise
        # object because it tends to behave badly.
        if ("runner" in dir(output)) and (not isinstance(output, Viewer)):
            output = output.runner
        if dcallback is not None:
            _submit_callback(dcallback, output)
        output.command = cstring
        return output

    def __str__(self):
        if self.command is None:
            return f"{self.__class__.__name__} (no curried command)"
        return f"{self.__class__.__name__} ({self.cstring()})"

    def __repr__(self):
        return self.__str__()

__call__(*args, **kwargs)

Execute a shell command parsed from args and kwargs.

This method has two legal calling conventions along with a variety of keyword-argument meta-options that modify how it executes the shell command.

Calling conventions

These conventions are not mutually exclusive, although it is generally less confusing to pick one or the other.

1

Pass the shell command as a string. This can be simpler in many cases, and is mandatory for programs with non-standard calling conventions, like the ffmpeg command below.

Examples: >>> cmd = RunCommand() >>> cmd('ls') >>> cmd('cp -r /path/to/folder /path/to/other/folder') >>> cmd('ffmpeg -i first.mp4 -filter:v "crop=100:10:20:200" second.mp4')

2

Construct the shell command using multiple arguments to __call__. This allows you to treat the shell command more like a Python function, which can be simpler and less error-prone than dynamic string formatting when you would like to pass variable parameters to a command.

RunCommand uses the following rules to parse args and kwargs into shell command strings. They are compatible with most, although not all, shell programs:

  • RunCommand treats the first positional argument like a shell command name. This means that passing a positional argument is mandatory if you did not bind a command to the RunCommand when creating it. The parsed command string always starts with this argument.
  • Subsequent positional arguments are command parameters. By default, the parser places them at the end of the command string, after any command options. Pass _args_at_end=False to place them before command options.
  • Keyword arguments are command options. The parser transforms keyword argument names in order to make Python naming and calling conventions compatible with shell conventions.
    • It ignores "_" characters at the start and end of names. This can be used to pass numeric options, like "_0=True", or options that share a name with a Python reserved keyword, like "dir_=/opt".
    • it treats single-character names (not counting prefixed or suffixed "_") as options preceded by a "-" and does not use an '=' to separate them from their values. cmd("ls", I="a*") is equivalent to "ls -I a*".
    • it treats longer names as options preceded by "--" and uses an '=' to separate them from their values. cmd("ls", width=20) is equivalent to "ls --width=20".
    • It replaces "_" characters within names with "-". cmd("ls", time_style="iso") is equivalent to "ls --time-style=iso".
    • if a keyword argument is True, RunCommand treats it as a "switch". cmd("ls", a=True) is equivalent to "ls -a".
    • if a keyword argument is False, RunCommand ignores it. cmd("ls", a=False) is equivalent to "ls".
Meta-options

RunCommand understands any kwarg whose name begins with '_' as a meta-option, unless the remainder of the kwarg name is numeric (e.g. '_0'). All meta-options are optional. The meta-options are not defined in the signature in order to facilitate the parsing process. Internally-recognized meta-options are:

  • _bg (aliases _asynchronous, _async): if True, execute the process in the background. Roughly equivalent to the '&' control operator in bash. If False, block until process exit. (Default False)
  • _out (alias _out_stream): Additional target for process stdout. Must have a write() method. (If _viewer=True, defaults to Viewer's default behavior. Otherwise, defaults to a Nullify object, which simply discards the stream).
  • _err (alias _err_stream): Same as _out, but for stderr.
  • _viewer (alias _v): if True, return a Viewer object permitting additional process inspection and management. _viewer=True implies _bg=True. (default False)
  • _args_at_end: if True, place positional arguments other than the command string at the end of the parsed shell command, after any shell options parsed from kwargs. Otherwise, place them before shell options. (Default True)
  • _done: a niladic function to call on process exit. Valid only if _viewer=True.

Any other meta-options are passed directly to the underlying Runner as keyword arguments with "_" stripped from their names. _disown is often particularly useful.

Tip

In addition to these conventions, bear in mind that if you specified a command when you construct a RunCommand object, that command will always be used as the first positional argument to __call__(), and if you specified kwargs, they will be added to any kwargs you pass or don't pass to __call__().

Returns: Interface object for executed process. Of variable type: if _viewer=True, a Viewer; if _viewer=False and _bg=True, an Invoke Result; if _viewer=False and _bg=False, an Invoke Runner; if _viewer=False and _disown=True, None (_disown=True overrides the value of _bg.)

Source code in hostess/subutils.py
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
def __call__(self, *args, **kwargs) -> Optional["Processlike"]:
    """
    Execute a shell command parsed from `args` and `kwargs`.

    This method has two legal calling conventions along with a variety of
    keyword-argument meta-options that modify _how_ it executes the shell
    command.

    Calling conventions:
        These conventions are not mutually exclusive, although it is
        generally less confusing to pick one or the other.

        **1**

        Pass the shell command as a string. This can be simpler in many cases,
        and is mandatory for programs with non-standard calling conventions,
        like the `ffmpeg` command below.

        Examples:
            >>> cmd = RunCommand()
            >>> cmd('ls')
            >>> cmd('cp -r /path/to/folder /path/to/other/folder')
            >>> cmd('ffmpeg -i first.mp4 -filter:v "crop=100:10:20:200" second.mp4')

        **2**

        Construct the shell command using multiple arguments to `__call__`.
        This allows you to treat the shell command more like a Python
        function, which can be simpler and less error-prone than dynamic
        string formatting when you would like to pass variable parameters to a
        command.

        RunCommand uses the following rules to parse args and kwargs into
        shell command strings. They are compatible with most, although not
        all, shell programs:

        * RunCommand treats the first positional argument like a shell
          command name. This means that passing a positional argument is
          mandatory if you did not bind a command to the RunCommand when
          creating it. The parsed command string always starts with this
          argument.
        * Subsequent positional arguments are command parameters. By
          default, the parser places them at the end of the command string,
          after any command options. Pass `_args_at_end=False` to place
          them before command options.
        * Keyword arguments are command options. The parser transforms
          keyword argument names in order to make Python naming and calling
          conventions compatible with shell conventions.
            * It ignores `"_"` characters at the start and end of
              names. This can be used to pass numeric options, like
              `"_0=True"`, or options that share a name with a Python
              reserved keyword, like `"dir_=/opt"`.
            * it treats single-character names (not counting prefixed or
              suffixed `"_"`) as options preceded by a `"-"` and does not
              use an `'='` to separate them from their values.
              `cmd("ls", I="a*")` is equivalent to `"ls -I a*"`.
            * it treats longer names as options preceded by `"--"` and uses
              an `'='` to separate them from their values.
              `cmd("ls", width=20)` is equivalent to `"ls --width=20"`.
            * It replaces `"_"` characters within names with `"-"`.
              `cmd("ls", time_style="iso")` is equivalent to
              `"ls --time-style=iso"`.
            * if a keyword argument is True, RunCommand treats it as a
              "switch". `cmd("ls", a=True)` is equivalent to `"ls -a"`.
            * if a keyword argument is False, RunCommand ignores it.
              `cmd("ls", a=False)` is equivalent to `"ls"`.

    Meta-options:
        RunCommand understands any kwarg whose name begins with `'_'` as
        a meta-option, unless the remainder of the kwarg name is numeric
        (e.g. `'_0'`). All meta-options are optional. The meta-options are
        not defined in the signature in order to facilitate the parsing
        process. Internally-recognized meta-options are:

        * `_bg` (aliases `_asynchronous`, `_async`): if True, execute the
             process in the background. Roughly equivalent to the '&'
             control operator in bash. If False, block until process exit.
             (Default False)
         * `_out` (alias `_out_stream`): Additional target for process
            stdout.  Must have a `write()` method. (If `_viewer=True`,
            defaults to `Viewer`'s default behavior. Otherwise, defaults
            to a `Nullify` object, which simply discards the stream).
         * `_err` (alias `_err_stream`): Same as `_out`, but for stderr.
         * `_viewer` (alias `_v`): if True, return a `Viewer` object
            permitting additional process inspection and management.
            `_viewer=True`  implies `_bg=True`. (default False)
         * `_args_at_end`: if True, place positional arguments other than
            the command string at the end of the parsed shell command,
            after any shell options parsed from kwargs. Otherwise, place
            them before shell options. (Default True)
         * `_done`: a niladic function to call on process exit. Valid only
            if `_viewer=True`.

         Any other meta-options are passed directly to the underlying
         `Runner` as keyword arguments with `"_"` stripped from their
         names. `_disown` is often particularly useful.

    Tip:
        In addition to these conventions, bear in mind that if you
        specified a command when you construct a RunCommand object, that
        command will always be used as the first positional argument to
        `__call__()`, and if you specified kwargs, they will be added to
        any kwargs you pass or don't pass to `__call__()`.

     Returns:
         Interface object for executed process. Of variable type: if
             `_viewer=True`, a Viewer; if `_viewer=False` and `_bg=True`,
             an Invoke `Result`; if `_viewer=False` and `_bg=False`, an
             Invoke `Runner`; if `_viewer=False` and `_disown=True`,
             `None` (`_disown=True` overrides the value of `_bg`.)
    """
    rkwargs = keyfilter(
        lambda k: k.startswith("_") and not k.strip("_").isnumeric(),
        self.kwargs | kwargs,
    )
    kwargs = keyfilter(lambda k: k not in rkwargs, self.kwargs | kwargs)
    replace_aliases(
        rkwargs,
        {
            "_out_stream": ("_out",),
            "_err_stream": ("_err",),
            "_asynchronous": ("_async", "_bg"),
            "_viewer": ("_v",),
        },
    )
    # _asynchronous is redundant with _disown, and it upsets Invoke
    if "_disown" in rkwargs:
        rkwargs.pop("_asynchronous", None)
    # do not print to stdout/stderr by default
    verbose = rkwargs.pop("verbose", False)
    if verbose is not True:
        for stream in filter(
            lambda x: x not in rkwargs, ("_out_stream", "_err_stream")
        ):
            rkwargs[stream] = Nullify()
    # simple done callback handling -- simple stdout/stderr is handled
    # by Invoke, but Invoke does not offer completion handling except
    # via the more complex Watcher system.
    dcallback = rkwargs.pop("_done", None)
    cstring = self.cstring(
        *args, args_at_end=rkwargs.pop("_args_at_end", True), **kwargs
    )
    if cstring == "":
        raise ValueError("no command specified.")
    if rkwargs.pop("_viewer", False) is True:
        output = Viewer.from_command(
            self,
            *args,
            ctx=self.ctx,
            runclass=self.runclass,
            chunksize=self.chunksize,
            **(rkwargs | kwargs),
        )
    else:
        runner = self.runclass(self.ctx)
        runner.read_chunk_size = self.chunksize
        rkwargs = {k[1:]: v for k, v in rkwargs.items()}
        output = runner.run(cstring, **rkwargs)
    # disowned case
    if output is None:
        return
    # need the runner/result to actually create a thread to watch the
    # done callback. we also never want to actually return a Promise
    # object because it tends to behave badly.
    if ("runner" in dir(output)) and (not isinstance(output, Viewer)):
        output = output.runner
    if dcallback is not None:
        _submit_callback(dcallback, output)
    output.command = cstring
    return output

__init__(command=None, ctx=None, runclass=None, chunksize=20000, **kwargs)

Parameters:

Name Type Description Default
command Optional[str]

optional string form of a shell command to bind to this object. If you do not pass this, this object will be "generic", able to run any shell command. For instance, ls = RunCommand("ls") constructs a RunCommand just for ls. ls(a=True) will run "ls -a".

None
ctx Optional[Context]

optional Context object (just makes a new one if not given)

None
runclass Optional[type(Runner)]

optional Runner subclass (uses the default of ctx if not given; if both are None, uses invoke.runners.Local)

None
chunksize int

maximum number of bytes to read at once from a child process's stdout/stderr. higher values will generally increase performance but may have undesirable effects in some cases.

20000
**kwargs Any

optional keyword arguments to bind to this object; Will be added to any kwargs passed to calls to this object. See call for a complete description of behavior.

{}
Source code in hostess/subutils.py
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
def __init__(
    self,
    command: Optional[str] = None,
    ctx: Optional[invoke.context.Context] = None,
    runclass: Optional[type(invoke.Runner)] = None,
    chunksize: int = 20000,
    **kwargs: Any,
):
    """
    Args:
        command: optional string form of a shell command to bind to
            this object. If you do not pass this, this object will be
            "generic", able to run any shell command. For instance,
            ls = RunCommand("ls") constructs a RunCommand _just_ for
            ls. ls(a=True) will run "ls -a".
        ctx: optional Context object (just makes a new one if not given)
        runclass: optional Runner subclass (uses the default of ctx if
            not given; if both are None, uses invoke.runners.Local)
        chunksize: maximum number of bytes to read at once from
            a child process's stdout/stderr. higher values will generally
            increase performance but may have undesirable effects in some
            cases.
        **kwargs: optional keyword arguments to bind to this object;
            Will be added to any kwargs passed to calls to this object.
            See __call__ for a complete description of behavior.
    """
    self.command = command
    # TODO: passing this around and directly assigning the attribute is
    #  inconvenient, but Invoke doesn't seem to explicitly expose it
    #  anywhere. Check and make sure.
    self.chunksize = chunksize
    if ctx is None:
        self.ctx = invoke.context.Context()
    else:
        self.ctx = ctx
    if runclass is None:
        self.runclass = list(self.ctx["runners"].values())[0]
    else:
        self.runclass = runclass
    self.args, self.kwargs = (), kwargs

cstring(*args, args_at_end=True, **kwargs)

Create a shell command string from args and *kwargs, including any command and kwargs curried into this object. Used as part of the __call__() workflow; can also be used to determine what shell command this object would execute if you called it.

See __call__ for a complete description of arg and kwarg parsing.

Parameters:

Name Type Description Default
args Union[int, float, str]

args to parse into shell command

()
args_at_end bool

place args after the first at the end of the shell command?

True
kwargs Union[int, float, str]

kwargs to parse into shell command

{}
Source code in hostess/subutils.py
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
def cstring(
    self,
    *args: Union[int, float, str],
    args_at_end: bool = True,
    **kwargs: Union[int, float, str],
):
    """
    Create a shell command string from *args and **kwargs, including any
    command and kwargs curried into this object. Used as part of the
    `__call__()` workflow; can also be used to determine what shell
    command this object _would_ execute if you called it.

    See `__call__` for a complete description of arg and kwarg parsing.

    Args:
        args: args to parse into shell command
        args_at_end: place args after the first at the end of the shell
            command?
        kwargs: kwargs to parse into shell command
    """
    args = self.args + args
    if self.command is None:
        if len(args) == 0:
            # if we have no bound command, always treat the first arg as
            # the command name, not as an option
            raise ValueError("No bound command; must pass an argument.")
        command, args = args[0], args[1:]
    else:
        command = self.command
    kwargs = keyfilter(
        lambda a: (not a.startswith("_")) or a.strip("_").isnumeric(),
        self.kwargs | kwargs,
    )
    astring = "" if len(args) == 0 else f" {' '.join(map(str, args))}"
    kstring = ""
    for k, v in kwargs.items():
        k = k.strip("_").replace("_", "-")
        if v is True:
            if len(k) == 1:
                kstring += f" -{k}"
            else:
                kstring += f" --{k}"
        elif v is False:
            continue
        elif len(k) == 1:
            kstring += f" -{k} {v}"
        else:
            kstring += f" --{k}={v}"
    order = (kstring, astring) if args_at_end else (astring, kstring)
    return f"{command}{''.join(order)}"

Viewer

encapsulates an instance of a RunCommand subclass or other process abstraction. performs a variety of automated output handling, process initialization, and metadata tracking operations, and also prevents the abstraction from throwing errors or unexpectedly blocking in REPL environments. Viewer.from_command() is the preferred constructor for most purposes.

Viewer pretends to inherit most of the attributes of its encapsulated process abstraction, so in addition to attributes explicitly defined on Viewer, you can access attributes like .done and call methods like .kill().

Source code in hostess/subutils.py
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
class Viewer:
    """
    encapsulates an instance of a `RunCommand` subclass or other process
    abstraction. performs a variety of automated output handling, process
    initialization, and metadata tracking operations, and also prevents the
    abstraction from throwing errors or unexpectedly blocking in REPL
    environments. `Viewer.from_command()` is the preferred constructor for
    most purposes.

    `Viewer` pretends to inherit most of the attributes of its encapsulated
    process abstraction, so in addition to attributes explicitly defined on
    `Viewer`, you can access attributes like `.done` and call methods like
    `.kill()`.
    """

    def __init__(
        self,
        cbuffer: CBuffer,
        runner: Optional[type(invoke.Runner)] = None,
        metadata: Optional[Mapping] = None,
    ):
        self.runner, self.cbuffer = runner, cbuffer
        self.metadata = {} if metadata is None else metadata
        self.out = cbuffer.caches["out"]
        self.err = cbuffer.caches["err"]

    # TODO: untangle this a bit
    def __getattr__(self, attr):
        if attr == "runner":
            return super().__getattribute__(attr)
        elif attr == "process" and hasattr(self.runner, "process"):
            return self.runner.process
        elif attr == "process":
            # should occur on remote hosts only
            return None
        for underlying in filter(None, (self.runner, self.process)):
            try:
                return getattr(underlying, attr)
            except AttributeError:
                pass
        raise AttributeError(f"'Viewer' object has no attribute '{attr}'")

    def _is_done(self) -> bool:
        return self.runner.process_is_finished

    def _is_running(self) -> bool:
        return not self.runner.process_is_finished

    def __str__(self) -> str:
        runstring = "running" if self.running else "finished"
        cmdlines = self.command.split("\n")
        cmdstring = f"{cmdlines[0]}..." if len(cmdlines) > 1 else cmdlines[0]
        base = f"Viewer for {runstring} process {cmdstring}"
        try:
            base += f", PID {self.pid}"
        except AttributeError:
            # TODO: fetch remote PIDs with shell tricks
            pass
        outlist = self.out[-20:]
        if len(self.out) > 20:
            outlist = ["..."] + outlist
        return base + "".join([f"\n{line}" for line in outlist])

    def __repr__(self) -> str:
        return self.__str__()

    @property
    def host(self):
        return self.runner.context.host

    def wait_for_output(
        self,
        stream: Literal["out", "err", "any"] = "any",
        poll: float = 0.05,
        timeout: float = 10,
    ):
        """
        Block until the Viewer receives output on the specified stream(s) or
            its process exits.

        Args:
            stream: "out" to wait for output on stdout, "err" to wait for
                output on stderr, "any" for either.
            poll: poll rate (seconds)
            timeout: how long to wait between successive outputs before
                raising a TimeoutError (seconds)
        """
        if self.done:
            return
        streams = {
            "out": (self.out,),
            "err": (self.err,),
            "any": (self.out, self.err),
        }[stream]
        waiting, _ = timeout_factory(timeout=timeout)
        starting = [len(s) for s in streams]
        while (
            all(len(s) == l for s, l in zip(streams, starting))
            and self.running
        ):
            time.sleep(poll)
            waiting()
        return

    @classmethod
    def from_command(
        cls,
        command: Union[str, RunCommand],
        *args: Any,
        ctx: Optional[invoke.context.Context] = None,
        runclass: Optional[invoke.Runner] = None,
        cbuffer: Optional[CBuffer] = None,
        chunksize: int = 20000,
        **kwargs: Any,
    ) -> "Viewer":
        """
        Construct a `Viewer` from a command. This is the most convenient
        constructor for `Viewer` and should generally be preferred to
        `Viewer.__init__`.

        Args:
            command: Either a shell command as a string, or an existing
                `RunCommand` object.
            args: additional arguments for the executed shell command. See
                `RunCommand.__call__()` for a detailed description of behavior.
            ctx: optional Invoke `Context` for Viewer. Just creates a new one
                if not specified.
            runclass: underlying Invoke `Runner` class for this `Viewer`. if
                not specified, defaults to the default runclass of `command`,
                if it has one, and the default `runclass` of `RunCommand` if it
                does not.
            cbuffer: context buffer for `Viewer`. Creates a new `CBuffer` if
                not specified.
            chunksize: number of bytes to read at once from stdout/stderr.
            kwargs: additional keyword arguments for the executed shell
                command. See `RunCommand.__call__()` for a detailed
                description of behavior.

        Returns:
            a `Viewer` constructed from `command` .
        """
        if cbuffer is None:
            cbuffer = CBuffer()
        if not isinstance(command, RunCommand):
            command = RunCommand(command, ctx, runclass, chunksize=chunksize)
        # note that Viewers _only_ run commands asynchronously. use the wait
        # or wait_for_output methods if you want to block.
        base_kwargs = {
            "_bg": True,
            "_out": cbuffer.buffers["out"],
            "_err": cbuffer.buffers["err"],
        }
        viewer = object.__new__(cls)
        if "_done" in kwargs:
            kwargs["_done"] = cbuffer.make_callback(kwargs["_done"], "done")
        viewer.__init__(cbuffer)
        viewer.runner = command(*args, **kwargs | base_kwargs, _viewer=False)
        return viewer

    # TODO: implement children property

    done = property(_is_done)
    running = property(_is_running)
    initialized = False
    _pid_records = None

from_command(command, *args, ctx=None, runclass=None, cbuffer=None, chunksize=20000, **kwargs) classmethod

Construct a Viewer from a command. This is the most convenient constructor for Viewer and should generally be preferred to Viewer.__init__.

Parameters:

Name Type Description Default
command Union[str, RunCommand]

Either a shell command as a string, or an existing RunCommand object.

required
args Any

additional arguments for the executed shell command. See RunCommand.__call__() for a detailed description of behavior.

()
ctx Optional[Context]

optional Invoke Context for Viewer. Just creates a new one if not specified.

None
runclass Optional[Runner]

underlying Invoke Runner class for this Viewer. if not specified, defaults to the default runclass of command, if it has one, and the default runclass of RunCommand if it does not.

None
cbuffer Optional[CBuffer]

context buffer for Viewer. Creates a new CBuffer if not specified.

None
chunksize int

number of bytes to read at once from stdout/stderr.

20000
kwargs Any

additional keyword arguments for the executed shell command. See RunCommand.__call__() for a detailed description of behavior.

{}

Returns:

Type Description
Viewer

a Viewer constructed from command .

Source code in hostess/subutils.py
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
@classmethod
def from_command(
    cls,
    command: Union[str, RunCommand],
    *args: Any,
    ctx: Optional[invoke.context.Context] = None,
    runclass: Optional[invoke.Runner] = None,
    cbuffer: Optional[CBuffer] = None,
    chunksize: int = 20000,
    **kwargs: Any,
) -> "Viewer":
    """
    Construct a `Viewer` from a command. This is the most convenient
    constructor for `Viewer` and should generally be preferred to
    `Viewer.__init__`.

    Args:
        command: Either a shell command as a string, or an existing
            `RunCommand` object.
        args: additional arguments for the executed shell command. See
            `RunCommand.__call__()` for a detailed description of behavior.
        ctx: optional Invoke `Context` for Viewer. Just creates a new one
            if not specified.
        runclass: underlying Invoke `Runner` class for this `Viewer`. if
            not specified, defaults to the default runclass of `command`,
            if it has one, and the default `runclass` of `RunCommand` if it
            does not.
        cbuffer: context buffer for `Viewer`. Creates a new `CBuffer` if
            not specified.
        chunksize: number of bytes to read at once from stdout/stderr.
        kwargs: additional keyword arguments for the executed shell
            command. See `RunCommand.__call__()` for a detailed
            description of behavior.

    Returns:
        a `Viewer` constructed from `command` .
    """
    if cbuffer is None:
        cbuffer = CBuffer()
    if not isinstance(command, RunCommand):
        command = RunCommand(command, ctx, runclass, chunksize=chunksize)
    # note that Viewers _only_ run commands asynchronously. use the wait
    # or wait_for_output methods if you want to block.
    base_kwargs = {
        "_bg": True,
        "_out": cbuffer.buffers["out"],
        "_err": cbuffer.buffers["err"],
    }
    viewer = object.__new__(cls)
    if "_done" in kwargs:
        kwargs["_done"] = cbuffer.make_callback(kwargs["_done"], "done")
    viewer.__init__(cbuffer)
    viewer.runner = command(*args, **kwargs | base_kwargs, _viewer=False)
    return viewer

wait_for_output(stream='any', poll=0.05, timeout=10)

Block until the Viewer receives output on the specified stream(s) or its process exits.

Parameters:

Name Type Description Default
stream Literal['out', 'err', 'any']

"out" to wait for output on stdout, "err" to wait for output on stderr, "any" for either.

'any'
poll float

poll rate (seconds)

0.05
timeout float

how long to wait between successive outputs before raising a TimeoutError (seconds)

10
Source code in hostess/subutils.py
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
def wait_for_output(
    self,
    stream: Literal["out", "err", "any"] = "any",
    poll: float = 0.05,
    timeout: float = 10,
):
    """
    Block until the Viewer receives output on the specified stream(s) or
        its process exits.

    Args:
        stream: "out" to wait for output on stdout, "err" to wait for
            output on stderr, "any" for either.
        poll: poll rate (seconds)
        timeout: how long to wait between successive outputs before
            raising a TimeoutError (seconds)
    """
    if self.done:
        return
    streams = {
        "out": (self.out,),
        "err": (self.err,),
        "any": (self.out, self.err),
    }[stream]
    waiting, _ = timeout_factory(timeout=timeout)
    starting = [len(s) for s in streams]
    while (
        all(len(s) == l for s, l in zip(streams, starting))
        and self.running
    ):
        time.sleep(poll)
        waiting()
    return

_submit_callback(callback, waitable)

syntactic sugar for running a function in a thread.

Parameters:

Name Type Description Default
callback Callable[[Any], Any]

function to run in the thread. must take at least one argument.

required
waitable Any

object to call callback with, presumably something it's waiting on.

required

Returns:

Type Description
Future

A Future object for the threaded callback(waitable) call.

Source code in hostess/subutils.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def _submit_callback(callback: Callable[[Any], Any], waitable: Any) -> Future:
    """
    syntactic sugar for running a function in a thread.

    Args:
        callback: function to run in the thread. must take at least one
            argument.
        waitable: object to call `callback` with, presumably something it's
            waiting on.

    Returns:
        A Future object for the threaded callback(waitable) call.
    """
    return ThreadPoolExecutor(1).submit(callback, waitable)

_wait_on(it)

Block until it completes.

Parameters:

Name Type Description Default
it Any

immediately call it.wait(). if it has no .wait(), do nothing.

required
Source code in hostess/subutils.py
88
89
90
91
92
93
94
95
96
97
98
def _wait_on(it: Any):
    """
    Block until it completes.

    Args:
        it: immediately call it.wait(). if it has no .wait(), do nothing.
    """
    try:
        it.wait()
    except AttributeError:
        return

console_stream_handler(out=None, err=None, done=None, handle_out=None, handle_err=None, handle_done=None)

produce a Dispatcher suited for capturing stdout, stderr, and process completion, optionally with inline callbacks. Used by Viewer.

Parameters:

Name Type Description Default
out Optional[MutableSequence]

optional existing list to use as cache for stdout; if none specified, creats a new list.

None
err Optional[MutableSequence]

same but for stderr.

None
done Optional[MutableSequence]

same but for results of done callback.

None
handle_out Optional[Callable[[str], Any]]

optional inline formatter/callback for stdout. by default just strips newlines.

None
handle_err Optional[Callable[[str], Any]]

same but for stderr.

None
handle_done Optional[Callable]

optional callback for process completion; defaults to simply returning the results of the Dispatcher's "done" step.

None

Returns:

Type Description
Dispatcher

a Dispatcher ready to handle console streams.

Source code in hostess/subutils.py
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
def console_stream_handler(
    out: Optional[MutableSequence] = None,
    err: Optional[MutableSequence] = None,
    done: Optional[MutableSequence] = None,
    handle_out: Optional[Callable[[str], Any]] = None,
    handle_err: Optional[Callable[[str], Any]] = None,
    handle_done: Optional[Callable] = None,
) -> Dispatcher:
    """
    produce a Dispatcher suited for capturing stdout, stderr, and process
    completion, optionally with inline callbacks. Used by Viewer.

    Args:
        out: optional existing list to use as cache for stdout; if none
            specified, creats a new list.
        err: same but for stderr.
        done: same but for results of done callback.
        handle_out: optional inline formatter/callback for stdout. by default
            just strips newlines.
        handle_err: same but for stderr.
        handle_done: optional callback for process completion; defaults to
            simply returning the results of the Dispatcher's "done" step.

    Returns:
        a Dispatcher ready to handle console streams.
    """
    out, err, done = map(_nonelist, (out, err, done))
    handler = Dispatcher(
        steps={"out": strip_newline, "err": strip_newline, "done": identity}
    )
    handler.caches['out'] = out
    handler.caches['err'] = err
    handler.caches['done'] = done
    handler.add_send("out", pipe=handle_out, target=out)
    handler.add_send("err", pipe=handle_err, target=err)
    handler.add_send("done", pipe=handle_done, target=done)
    handler.singular = True
    return handler

defer(func, *args, **kwargs)

Defer a function call.

Parameters:

Name Type Description Default
func Callable

function whose call to defer

required
args Any

positional arguments to deferred call

()
kwargs Any

keyword arguments to deferred call

{}

Returns:

Type Description
Callable[[], Any]

A niladic function that, when called, executes func(*args, **kwargs).

Source code in hostess/subutils.py
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
def defer(func: Callable, *args: Any, **kwargs: Any) -> Callable[[], Any]:
    """
    Defer a function call.

    Args:
        func: function whose call to defer
        args: positional arguments to deferred call
        kwargs: keyword arguments to deferred call

    Returns:
        A niladic function that, when called, executes `func(*args, **kwargs)`.
    """

    def deferred():
        return func(*args, **kwargs)

    return deferred

deferinto(func, *args, _target, **kwargs)

Defer a function call and redirect its result.

Parameters:

Name Type Description Default
func Callable

function whose call to defer

required
*args Any

positional arguments for the deferred call

()
_target MutableSequence

object to append the call's return value to (must have an .append() method; a list is suitable)

required
**kwargs Any

keyword arguments for the deferred call

{}

Returns:

Type Description
Callable[[], None]

A niladic function that, when called, executes func(args, *kwargs), but appends its return value to _target rather than returning it.

Source code in hostess/subutils.py
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
def deferinto(
    func: Callable, *args: Any, _target: MutableSequence, **kwargs: Any
) -> Callable[[], None]:
    """
    Defer a function call and redirect its result.

    Args:
        func: function whose call to defer
        *args: positional arguments for the deferred call
        _target: object to append the call's return value to (must have an
            .append() method; a list is suitable)
        **kwargs: keyword arguments for the deferred call

    Returns:
        A niladic function that, when called, executes func(*args, **kwargs),
            but appends its return value to _target rather than returning it.
    """

    def deferred_into():
        _target.append(func(*args, **kwargs))

    return deferred_into

dispatch_callback(dispatch, callback=None, step=None, to_wait_on=None)

simple callback for a Dispatcher. See Dispatcher.yield_callback for full documentation.

Source code in hostess/subutils.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def dispatch_callback(
    dispatch: "Dispatcher",
    callback: Optional[Callable] = None,
    step: Optional[Hashable] = None,
    to_wait_on: Any = None,
) -> Any:
    """
    simple callback for a Dispatcher. See Dispatcher.yield_callback for full
    documentation.
    """
    _wait_on(to_wait_on)
    if callback is None:
        return dispatch.execute(steps=(step,))
    else:
        return dispatch.execute(callback(), steps=(step,))

done_callback(dispatch, runner)

simple Dispatcher callback for invoke.runner / result completion.

Parameters:

Name Type Description Default
dispatch Dispatcher

Dispatcher to trigger with callback

required
runner Union[Result, Runner]

Result or Runner object. fire callback with information about process result when it completes.

required

Returns:

Type Description
Future

result of dispatcher "done" step executed with return value of locally-defined callback() function.

Source code in hostess/subutils.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def done_callback(
    dispatch: "Dispatcher",
    runner: Union[invoke.runners.Result, invoke.runners.Runner],
) -> Future:
    """
    simple Dispatcher callback for invoke.runner / result completion.

    Args:
        dispatch: Dispatcher to trigger with callback
        runner: Result or Runner object. fire callback with information about
            process result when it completes.

    Returns:
         result of dispatcher "done" step executed with return value of
              locally-defined callback() function.
    """

    def callback():
        if "returncode" in dir(runner):
            returncode = runner.returncode
        else:
            # noinspection PyUnresolvedReferences
            returncode = runner.process.returncode
        return {
            "success": not runner.failed,
            "returncode": returncode,
            "command": runner.command,
        }

    return dispatch_callback(dispatch, callback, "done", runner)

make_call_redirect(func, fork=False)

the middle sibling of the piped() / make_call_redirect() / watched_process() family.

Modify a function so that, when called, it runs in a subprocess, directing its output into a dict of pipes. Intended for longer-term, speculative, or callback-focused functions than piped(). The calling process is always responsible for polling the pipes; watched_process() provides a more automated alternative.

This mutates the return signature of the modified function so that it always returns None. It redirects its return value to the 'result' Pipe.

Note that this function cannot be used with the @ decorator syntax because it returns a tuple.

Parameters:

Name Type Description Default
func Callable

function to be modified.

required
fork bool

if True, execute func in a double-forked, mostly-daemonized process when called.

False

Returns:

Name Type Description
redirected_func Callable

the modified function

pipes dict[str, Pipe]

a dict of Pipe objects redirected_func will redirect its output to. keys are "out" (stdout), "err" (stderr), and "result" (return value).

Source code in hostess/subutils.py
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
def make_call_redirect(
    func: Callable, fork: bool = False
) -> tuple[Callable, dict[str, Pipe]]:
    """
    the middle sibling of the piped() / make_call_redirect() /
    watched_process() family.

    Modify a function so that, when called, it runs in a subprocess,
    directing its output into a dict of pipes. Intended for longer-term,
    speculative, or callback-focused functions than piped(). The calling
    process is always responsible for polling the pipes; watched_process()
    provides a more automated alternative.

    This mutates the return signature of the modified function so that it
    always returns None. It redirects its return value to the 'result' Pipe.

    Note that this function cannot be used with the `@` decorator syntax
    because it returns a `tuple`.

    Args:
        func: function to be modified.
        fork: if True, execute `func` in a double-forked, mostly-daemonized
            process when called.

    Returns:
        redirected_func: the modified function
        pipes: a dict of `Pipe` objects `redirected_func` will redirect its
            output to. keys are "out" (stdout), "err" (stderr), and "result"
            (return value).
    """
    r_here, r_there = Pipe()
    o_here, o_there = Pipe()
    p_here, p_there = Pipe()
    e_here, e_there = Pipe()

    # noinspection PyTypeChecker
    @wraps(func)
    def run_redirected(*args, **kwargs):
        if fork is True:
            if os.fork() != 0:
                return
            p_there.send(os.getpid())
        with (
            redirect_stdout(Aliased(o_there, ("write",), "send")),
            redirect_stderr(Aliased(e_there, ("write",), "send")),
        ):
            try:
                result = func(*args, **kwargs)
            except Exception as ex:
                result = ex
            return r_there.send(result)

    proximal = {"result": r_here, "out": o_here, "err": e_here}
    if fork is True:
        proximal["pids"] = p_here
    return run_redirected, proximal

make_piped_callback(func)

Turn a function into a callback. The decorated function sends its result to a Pipe rather than returning it, allowing its caller to receive output from it even if it is executed in a subprocess.

Note that this is not usable with the @ decorator syntax because it returns a tuple.

Parameters:

Name Type Description Default
func Callable

function to turn into a callback.

required

Returns:

Type Description
Pipe
  • a Pipe the decorated function will send its output to if called
Callable
  • the decorated function
Source code in hostess/subutils.py
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
def make_piped_callback(func: Callable) -> tuple[Pipe, Callable]:
    """
    Turn a function into a callback. The decorated function sends its result
    to a Pipe rather than returning it, allowing its caller to receive output
    from it even if it is executed in a subprocess.

    Note that this is not usable with the @ decorator syntax because it
    returns a tuple.

    Args:
        func: function to turn into a callback.

    Returns:
        * a Pipe the decorated function will send its output to if called
        * the decorated function
    """
    here, there = Pipe()

    def sendback(*args, **kwargs):
        try:
            result = func(*args, **kwargs)
        except Exception as ex:
            result = ex
        return there.send(result)

    return here, sendback

make_watch_caches()

construct an empty 'caches' structure for watched_process()

Returns:

Type Description
dict[str, list]

dictionary of lists suitable for capturing output of a watched process

Source code in hostess/subutils.py
1044
1045
1046
1047
1048
1049
1050
1051
def make_watch_caches() -> dict[str, list]:
    """
    construct an empty 'caches' structure for watched_process()

    Returns:
        dictionary of lists suitable for capturing output of a watched process
    """
    return {"result": [], "out": [], "err": []}

piped(func, block=True)

the youngest sibling of the piped() / make_call_redirect() / watched_process() family.

decorator that modifies a function so that, when called, it executes in a subprocess rather than the calling interpreter's process.

Parameters:

Name Type Description Default
func Callable

function to decorate

required
block bool

if True, the decorated function blocks until completion when called (like a regular function). If False, calling the decorated function returns a Process object rather than its normal return value. In this case, the caller is responsible for polling the Process if it wishes to receive a return value from the function.

True
Source code in hostess/subutils.py
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
def piped(func: Callable, block: bool = True) -> Callable:
    """
    the youngest sibling of the piped() / make_call_redirect() /
    watched_process() family.

    decorator that modifies a function so that, when called, it executes in
    a subprocess rather than the calling interpreter's process.

    Args:
        func: function to decorate
        block: if True, the decorated function blocks until completion when
            called (like a regular function). If False, calling the decorated
            function returns a Process object rather than its normal return
            value. In this case, the caller is responsible for polling the
            Process if it wishes to receive a return value from the function.
    """

    @wraps(func)
    def through_pipe(*args, **kwargs):
        here, sendback = make_piped_callback(func)
        proc = Process(target=sendback, args=args, kwargs=kwargs)
        proc.start()
        if block is True:
            proc.join()
            result = here.recv()
            proc.close()
            return result
        return proc

    return through_pipe

replace_aliases(mapping, aliasdict)

replace keys of a mapping with aliases. intended primarily as a helper for aliased kwargs. impure; mutates mapping if any keys match aliasdict.

Source code in hostess/subutils.py
419
420
421
422
423
424
425
426
427
428
429
430
def replace_aliases(
    mapping: MutableMapping, aliasdict: Mapping[str, Collection[str]]
):
    """
    replace keys of a mapping with aliases. intended primarily as a helper for
    aliased kwargs. impure; mutates mapping if any keys match aliasdict.
    """
    for target, aliases in aliasdict.items():
        for a in filter(lambda k: k in mapping, aliases):
            trydelete(mapping, target)
            mapping[target] = mapping[a]
            del mapping[a]

run(*args, **kwargs)

run a command not in a Viewer

Source code in hostess/subutils.py
1135
1136
1137
1138
1139
def run(*args, **kwargs) -> Optional[str]:
    """run a command not in a Viewer"""
    # return value should be None iff caller passes _disown=True
    if (cmd := RunCommand(*args, **kwargs)()) is not None:
        return cmd.stdout

runv(*args, **kwargs)

run a command in a Viewer

Source code in hostess/subutils.py
1130
1131
1132
def runv(*args, **kwargs):
    """run a command in a Viewer"""
    return Viewer.from_command(*args, **kwargs)

strip_newline(text)

just strip newlines. helper function for console streams.

Parameters:

Name Type Description Default
text str

string to strip.

required

Returns:

Type Description
str

text stripped of newlines.

Source code in hostess/subutils.py
258
259
260
261
262
263
264
265
266
267
268
def strip_newline(text: str) -> str:
    """
    just strip newlines. helper function for console streams.

    Args:
        text: string to strip.

    Returns:
        text stripped of newlines.
    """
    return text.strip("\n")

trydelete(obj, target)

attempt to delete the key or item named "obj" from target. Do nothing if it doesn't exist.

Source code in hostess/subutils.py
408
409
410
411
412
413
414
415
416
def trydelete(obj: Union[MutableSequence, MutableMapping], target: Hashable):
    """
    attempt to delete the key or item named "obj" from target. Do nothing if
    it doesn't exist.
    """
    try:
        del obj[target]
    except (KeyError, IndexError):
        pass

watched_process(func, *, caches, fork=False)

the eldest sibling of the piped() / make_call_redirect() / watched_process() family.

decorate a function so that calling it will execute it in a subprocess rather than the calling interpreter's process, redirecting its stdout, stderr, and any return value to 'caches'.

This mutates the decorated function's call and return signatures. It adds the kwargs _blocking (default True) and _poll (default 0.05), which set auto-join/poll and poll interval (s) respectively.

If _blocking is True, calling the decorated function blocks until the subprocess exits and returns a tuple whose first element is its normal return value and whose second argument is the caches dict. This should generally be used in a thread, unless you have some unusual reason to execute a subprocessed function serially.

If _blocking is False, the decorated function returns a tuple whose first value is a Process object and whose second value is a dict of pipes. Note that in this case, the calling process is responsible for polling the pipes if it wishes to receive output.

Note that caches is a mandatory argument with no default value. To use this function with the @ decorator syntax, do something like:

IMAGE_OUTPUT = make_watch_caches()

@watched_process(caches=IMAGE_OUTPUT)
def process_image( ...

Parameters:

Name Type Description Default
func Callable

function to run in subprocess

required
caches MutableMapping[str, MutableSequence]

dict of lists for func's output. 'result' will contain any return value; 'out', its stdout; 'err', its stderr. make_watch_caches() constructs a suitable set of empty caches.

required
fork bool

if True, double-fork the subprocess so it will not terminate if the calling process exits.

False

Returns:

Type Description
Callable

function with mutated signature that, when called, executes in a subprocess.

Source code in hostess/subutils.py
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
@curry
def watched_process(
    func: Callable,
    *,
    caches: MutableMapping[str, MutableSequence],
    fork: bool = False,
) -> Callable:
    """
    the eldest sibling of the piped() / make_call_redirect() /
    watched_process() family.

    decorate a function so that calling it will execute it in a subprocess
    rather than the calling interpreter's process, redirecting its stdout,
    stderr, and any return value to 'caches'.

    This mutates the decorated function's call and return signatures.
    It adds the kwargs _blocking (default True) and _poll (default 0.05),
    which set auto-join/poll and poll interval (s) respectively.

    If _blocking is True, calling the decorated function blocks until the
    subprocess exits and returns a tuple whose first element is its normal
    return value and whose second argument is the caches dict. This should
    generally be used in a thread, unless you have some unusual reason to
    execute a subprocessed function serially.

    If _blocking is False, the decorated function returns a tuple whose
    first value is a Process object and whose second value is a dict of pipes.
    Note that in this case, the calling process is responsible for
    polling the pipes if it wishes to receive output.

    Note that caches is a mandatory argument with no default value. To use
    this function with the @ decorator syntax, do something like:
    ```
    IMAGE_OUTPUT = make_watch_caches()

    @watched_process(caches=IMAGE_OUTPUT)
    def process_image( ...
    ```

    Args:
        func: function to run in subprocess
        caches: dict of lists for func's output. 'result' will contain
            any return value; 'out', its stdout; 'err', its stderr.
            make_watch_caches() constructs a suitable set of empty caches.
        fork: if True, double-fork the subprocess so it will not terminate
            if the calling process exits.

    Returns:
        function with mutated signature that, when called, executes in a
            subprocess.
    """
    assert len(intersection(caches.keys(), {"result", "out", "err"})) == 3
    target, proximal = make_call_redirect(func, fork)

    @wraps(func)
    def run_and_watch(*args, _blocking=True, _poll=0.05, **kwargs):
        process = Process(target=target, args=args, kwargs=kwargs)
        process.start()
        caches["pids"] = [process.pid]
        if _blocking is False:
            return process, caches
        while True:
            for k, v in proximal.items():
                if v.poll():
                    caches[k].append(v.recv())
            if not process.is_alive():
                break
            time.sleep(_poll)
        return process, proximal

    return run_and_watch

utilities

generic utility objects for hostess

HOSTESS_CONSOLE = rich.console.Console() module-attribute

convenient shared rich console

Aliased

generic wrapper for aliasing a class method.

Examples:

If you'd like a library function to append to a list, but it's only willing to write:

>>> import json
>>> my_list = []
>>> writeable_list = Aliased(my_list, ("write",), "append")
>>> json.dump([1, 2, 3], writeable_list)
>>> print(writeable_list)
Aliased: ('write',) -> append:
['[1', ', 2', ', 3', ']']
Source code in hostess/utilities.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
class Aliased:
    """
    generic wrapper for aliasing a class method.

    Examples:
        If you'd like a library function to `append` to a list, but it's only
        willing to `write`:

        ```
        >>> import json
        >>> my_list = []
        >>> writeable_list = Aliased(my_list, ("write",), "append")
        >>> json.dump([1, 2, 3], writeable_list)
        >>> print(writeable_list)
        Aliased: ('write',) -> append:
        ['[1', ', 2', ', 3', ']']
        ```
    """

    def __init__(self, wrapped: Any, aliases: Sequence[str], referent: str):
        self.obj = wrapped
        self.method = referent
        self.aliases = aliases
        for alias in aliases:
            setattr(self, alias, self._aliased)

    def _aliased(self, *args, **kwargs):
        return getattr(self.obj, self.method)(*args, **kwargs)

    def __getattr__(self, attr):
        return getattr(self.obj, attr)

    def __str__(self):
        return f"Aliased: {self.aliases} -> {self.method}:\n" + str(self.obj)

    def __repr__(self):
        return f"Aliased: {self.aliases} -> {self.method}:\n" + repr(self.obj)

configured(func, config)

decorator that permits dynamic partial evaluation of a function. configured splats config into all calls to the decorated function, so that its behavior can change along with changes to the contents of config.

Parameters:

Name Type Description Default
func Callable

function to configure

required
config Mapping[str, Any]

mapping to use as extra kwargs to func

required

Returns:

Type Description
Callable

version of func that splats config into every call.

Source code in hostess/utilities.py
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
@curry
def configured(func: Callable, config: Mapping[str, Any]) -> Callable:
    """
    decorator that permits dynamic partial evaluation of a function.
    `configured` splats `config` into all calls to the decorated
    function, so that its behavior can change along with changes to the
    contents of `config`.

    Args:
        func: function to configure
        config: mapping to use as extra kwargs to func

    Returns:
        version of `func` that splats `config` into every call.
    """

    @wraps(func)
    def with_configuration(*args, **kwargs):
        return func(*args, **kwargs, **config)

    return with_configuration

console_and_log(message, level='info', style=None)

print a message to console and log it with this module's default logger.

Parameters:

Name Type Description Default
message Any

object to print and log. must be compatible with both default logger and rich.console.Console.print. strings or numbers are recommended.

required
level str

logging level as a string ("info", "warning", etc.)

'info'
style Optional[Union[str, Style]]

optional rich Style or string description of one, e.g. "red"

None
Source code in hostess/utilities.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def console_and_log(
    message: Any,
    level: str = "info",
    style: Optional[Union[str, Style]] = None,
):
    """
    print a message to console and log it with this module's default logger.

    Args:
        message: object to print and log. must be compatible with both default
            logger and rich.console.Console.print. strings or numbers are
            recommended.
        level: logging level as a string ("info", "warning", etc.)
        style: optional rich Style or string description of one, e.g. "red"
    """
    HOSTESS_CONSOLE.print(message, style=style)
    getattr(logging, level)(message)

curry(func, *args, **kwargs)

alias for cytoolz.curry with type hinting. this is a hack to improve PyCharm's static analysis.

Source code in hostess/utilities.py
256
257
258
259
260
261
262
263
def curry(func: Callable, *args, **kwargs) -> Callable:
    """
    alias for cytoolz.curry with type hinting. this is a hack to
    improve PyCharm's static analysis.
    """
    from cytoolz import curry as _curry

    return _curry(func, *args, **kwargs)

dcom(string, sep=';', bad=(',', '\n'))

simple string sanitization function. the default values assume that you want to jam the string into a CSV field. always assumes you don't care about distinguishing different forbidden characters from one another in the output.

Parameters:

Name Type Description Default
string str

string to sanitize

required
sep str

separator to replace 'bad' characters with

';'
bad Collection[str]

characters to replace with sep

(',', '\n')

Returns:

Type Description
str

string washed clean of bad characters.

Source code in hostess/utilities.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
def dcom(
    string: str, sep: str = ";", bad: Collection[str] = (",", "\n")
) -> str:
    """
    simple string sanitization function. the default values assume that you
    want to jam the string into a CSV field. always assumes you don't care
    about distinguishing different forbidden characters from one another in
    the output.

    Args:
        string: string to sanitize
        sep: separator to replace 'bad' characters with
        bad: characters to replace with sep

    Returns:
        `string` washed clean of bad characters.
    """
    return re.sub(rf"[{re.escape(''.join(bad))}]", sep, string.strip())

filestamp()

shorthand for standardized event stamp that is also a legal filename

Source code in hostess/utilities.py
42
43
44
def filestamp() -> str:
    """shorthand for standardized event stamp that is also a legal filename"""
    return re.sub(r"[-: ]", "_", stamp()[:-2])

gb(b, round_to=2)

utility function to convert B to GB.

Parameters:

Name Type Description Default
b float

how many bytes?

required
round_to Optional[int]

if not None, round output to this many digits.

2

Returns:

Type Description
float

b converted from B to GB

Source code in hostess/utilities.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def gb(b: float, round_to: Optional[int] = 2) -> float:
    """
    utility function to convert B to GB.

    Args:
        b: how many bytes?
        round_to: if not None, round output to this many digits.

    Returns:
        `b` converted from B to GB
    """
    value = int(b) / 10 ** 9
    if round_to is not None:
        return round(value, round_to)
    return value

get_module(module_name)

dynamically import a module by name. check to see if it's already in sys.modules; if not, just try to import it; if that doesn't work, try to interpret module_name as a path.

Parameters:

Name Type Description Default
module_name str

name of or path to a Python module.

required

Returns:

Type Description
ModuleType

a module, hopefully.

Source code in hostess/utilities.py
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
def get_module(module_name: str) -> ModuleType:
    """
    dynamically import a module by name. check to see if it's already in
    sys.modules; if not, just try to import it; if that doesn't work, try to
    interpret module_name as a path.

    Args:
        module_name: name of or path to a Python module.

    Returns:
        a module, hopefully.
    """
    if module_name in sys.modules:
        return sys.modules[module_name]
    try:
        return import_module(module_name)
    except ModuleNotFoundError:
        pass
    spec = spec_from_file_location(Path(module_name).stem, module_name)
    module = module_from_spec(spec)
    spec.loader.exec_module(module)
    sys.modules[Path(module_name).stem] = module
    return module

infer_stream_length(stream)

attempts to infer the size of a potential read from an object.

Parameters:

Name Type Description Default
stream Union[BufferedReader, BinaryIO, Path, str, Response]

may be a buffered reader (like the result of calling open()), a buffer like io.BytesIO, or a Path

required

Returns:

Type Description
Optional[int]

an estimate of its size based on best available method, or None if

Optional[int]

impossible.

Source code in hostess/utilities.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def infer_stream_length(
    stream: Union[
        _io.BufferedReader, _io.BinaryIO, Path, str, requests.Response
    ],
) -> Optional[int]:
    """
    attempts to infer the size of a potential read from an object.

    Args:
        stream: may be a buffered reader (like the result of calling open()),
            a buffer like io.BytesIO, or a Path

    Returns:
        an estimate of its size based on best available method, or None if
        impossible.
    """

    def filesize() -> Optional[int]:
        try:
            if isinstance(stream, _io.BufferedReader):
                path = Path(stream.name)
            elif isinstance(stream, (str, Path)):
                path = Path(stream)
            else:
                return
            return path.stat().st_size
        except FileNotFoundError:
            pass

    def buffersize() -> Optional[int]:
        if "getbuffer" in dir(stream):
            try:
                return len(stream.getbuffer())
            except (TypeError, ValueError, AttributeError):
                pass

    def responsesize() -> Optional[int]:
        if "headers" in dir(stream):
            try:
                return stream["headers"].get("content-length")
            except (KeyError, TypeError, ValueError, AttributeError):
                pass

    methods = (filesize, buffersize, responsesize)
    length = None
    for method in methods:
        length = method()
        if length is not None:
            break
    return length

is_any(obj, coll)

like obj in coll, for use in cases when obj and coll do not, or might not, support use of in.

Parameters:

Name Type Description Default
obj Any

an object

required
coll Iterable

a collection

required

Returns:

Type Description
bool

True if obj is in coll; False if not

Source code in hostess/utilities.py
466
467
468
469
470
471
472
473
474
475
476
477
478
def is_any(obj: Any, coll: Iterable) -> bool:
    """
    like `obj in coll`, for use in cases when `obj` and `coll` do not, or
    might not, support use of `in`.

    Args:
        obj: an object
        coll: a collection

    Returns:
        True if `obj` is in `coll`; False if not
    """
    return any(map(lambda item: is_(obj, item), coll))

logstamp(extra=0)

shorthand for standardized text timestamp only (no hostname)

Source code in hostess/utilities.py
47
48
49
def logstamp(extra: int = 0) -> str:
    """shorthand for standardized text timestamp only (no hostname)"""
    return f"{dt.datetime.now(dt.UTC).isoformat()[:(-13 + extra)]}"

mb(b, round_to=2)

utility function to convert B to MB.

Parameters:

Name Type Description Default
b int

how many bytes?

required
round_to Optional[int]

if not None, round output to this many digits.

2

Returns:

Type Description
float

b converted from B to MB

Source code in hostess/utilities.py
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def mb(b: int, round_to: Optional[int] = 2) -> float:
    """
    utility function to convert B to MB.

    Args:
        b: how many bytes?
        round_to: if not None, round output to this many digits.

    Returns:
        `b` converted from B to MB
    """
    value = int(b) / 10 ** 6
    if round_to is not None:
        return round(value, round_to)
    return value

notary(cache=None, be_loud=False, resolution=0)

create a function that records, timestamps, and optionally prints messages. if you pass eject=True to that function, it will return its note cache.

Parameters:

Name Type Description Default
cache Optional[MutableMapping]

cache for notes (if None, creates a dict)

None
be_loud bool

if True, makes output function verbose by default. individual calls can override this setting.

False
resolution int

time resolution in significant digits after the second. collisions can occur if entries are sent faster than the time resolution.

0

Returns:

Name Type Description
note Callable[[Any], Optional[MutableMapping]]

function for notetaking

Source code in hostess/utilities.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def notary(
    cache: Optional[MutableMapping] = None,
    be_loud: bool = False,
    resolution: int = 0,
) -> Callable[[Any], Optional[MutableMapping]]:
    """
    create a function that records, timestamps, and optionally prints messages.
    if you pass eject=True to that function, it will return its note cache.

    Args:
        cache: cache for notes (if None, creates a dict)
        be_loud: if True, makes output function verbose by default. individual
            calls can override this setting.
        resolution: time resolution in significant digits after the second.
            collisions can occur if entries are sent faster than the time
            resolution.

    Returns:
        note: function for notetaking
    """
    if cache is None:
        cache = {}

    resolution = resolution if resolution == 0 else resolution + 1

    def note(
        message: str = "", loud: bool = be_loud, eject: bool = False
    ) -> Optional[MutableMapping]:
        """
        Args:
            message: message to record in `cache` and optionally print.
            loud: print message as well?
            eject: if True, return `cache` ignore all other arguments, and
                do not log this call.

        Returns:
            usually `None`; if `eject` is True, instead `cache`
        """
        if eject is True:
            return cache
        return record_and_yell(message, cache, loud, resolution)

    return note

record_and_yell(message, cache, loud=False, extra=0)

place message into a cache object with a timestamp; optionally print it

Source code in hostess/utilities.py
174
175
176
177
178
179
180
181
182
def record_and_yell(
    message: str, cache: MutableMapping, loud: bool = False, extra: int = 0
):
    """
    place message into a cache object with a timestamp; optionally print it
    """
    if loud is True:
        print(message)
    cache[logstamp(extra)] = message

signal_factory(threads)

creates a 'signaler' function that simply assigns values to a mapping bound in enclosing scope. this is primarily intended as a simple inter-thread communication utility.

Parameters:

Name Type Description Default
threads MutableMapping[Hashable, Optional[int]]

mapping from thread names to None or ints. In normal usage, named threads will poll the key of this mapping corresponding to their name to check for received signals.

required

Returns:

Type Description
Callable[[Hashable, Optional[int]], None]

a process that takes a thread name and an optional integer (default 0) and assigns that integer to the corresponding key of the threads mapping.

Source code in hostess/utilities.py
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
def signal_factory(
    threads: MutableMapping[Hashable, Optional[int]]
) -> Callable[[Hashable, Optional[int]], None]:
    """
    creates a 'signaler' function that simply assigns values to a mapping
    bound in enclosing scope. this is primarily intended as a simple
    inter-thread communication utility.

    Args:
        threads: mapping from thread names to None or ints. In normal usage,
            named threads will poll the key of this mapping corresponding
            to their name to check for received signals.

    Returns:
        a process that takes a thread name and an optional integer (default
            0) and assigns that integer to the corresponding key of the
            `threads` mapping.
    """

    def signaler(name, signal=0):
        if name == "all":
            for k in threads.keys():
                threads[k] = signal
            return
        if name not in threads.keys():
            raise KeyError
        threads[name] = signal

    return signaler

stamp()

create standardized text event stamp

Source code in hostess/utilities.py
37
38
39
def stamp() -> str:
    """create standardized text event stamp"""
    return f"{gethostname()} {dt.datetime.now(dt.UTC).isoformat()[:-13]}: "

timeout_factory(raise_timeout=True, timeout=5)

returns a tuple of functions. calling the first starts a wait timer if not started, and also returns current wait time. calling the second resets the wait timer.

Parameters:

Name Type Description Default
raise_timeout bool

if True, raises TimeoutError if waiting > timeout. otherwise, this is basically just a stopwatch.

True
timeout float

timeout in seconds. Used only if raise_timeout is True.

5
Source code in hostess/utilities.py
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
def timeout_factory(
    raise_timeout: bool = True, timeout: float = 5
) -> tuple[Callable[[], int], Callable[[], None]]:
    """
    returns a tuple of functions. calling the first starts a wait timer if not
    started, and also returns current wait time. calling the second resets the
    wait timer.

    Args:
        raise_timeout: if True, raises TimeoutError if waiting > timeout.
            otherwise, this is basically just a stopwatch.
        timeout: timeout in seconds. Used only if raise_timeout is True.
    """
    starts = []

    def waiting():
        """call me to start and check/raise timeout."""
        if len(starts) == 0:
            starts.append(time.time())
            return 0
        delay = time.time() - starts[-1]
        if (raise_timeout is True) and (delay > timeout):
            raise TimeoutError
        return delay

    def unwait():
        """call me to reset timeout."""
        try:
            starts.pop()
        except IndexError:
            pass

    return waiting, unwait

unix2dt(epoch)

alias for dt.datetime.fromtimestamp().

Source code in hostess/utilities.py
250
251
252
def unix2dt(epoch: float) -> dt.datetime:
    """alias for `dt.datetime.fromtimestamp()`."""
    return dt.datetime.fromtimestamp(epoch)

yprint(obj, indent=0, replace_null=True, maxlen=256)

lazy way to pretty-print many objects by using pyyaml's excellent YAML formatter. Doesn't work well for everything.

Parameters:

Name Type Description Default
obj Any

object to pretty-print

required
indent int

indentation in spaces

0
replace_null bool

if True, replace the YAML value 'null' with 'None'

True
maxlen int

maximum length of output

256

Returns:

Type Description
str

mildly stylized YAML representation of obj

Source code in hostess/utilities.py
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
def yprint(
    obj: Any, indent: int = 0, replace_null: bool = True, maxlen: int = 256
) -> str:
    """
    lazy way to pretty-print many objects by using `pyyaml`'s excellent YAML
    formatter. Doesn't work well for everything.

    Args:
        obj: object to pretty-print
        indent: indentation in spaces
        replace_null: if True, replace the YAML value 'null' with 'None'
        maxlen: maximum length of output

    Returns:
        mildly stylized YAML representation of `obj`
    """
    try:
        text = yaml.dump(obj)
    except TypeError:
        text = f"***pretty-print failed*** {obj}"
    if replace_null is True:
        text = text.replace("null", "None")
    return "\n".join(
        " " * indent + line[:maxlen] for line in text.splitlines()
    )