#!/usr/bin/env python3
"""
Run integration-e2e inside the shadow network simulator. This can be helpful
vs running it directly for several reasons.

* shadow simulates time, and can collapse idle time. This speeds up the
  network bootstrapping step in particular.
* shadow tries to be deterministic. There are some gaps, but in general
  there *should* be less nondeterministic flakiness under shadow
  than when running natively.
"""

import argparse
import os
import pathlib
import shutil
import subprocess
import yaml

from typing import Any

SHADOW_DATA_DIR = "shadow.chutney.data"
SHADOW_CONFIG_FILE = "shadow.chutney.yaml"
SHADOW_LOG_FILE = "shadow.log"
# must be syncd with `TEST_DOMAIN` in `tests/chutney/test`.
TEST_DOMAIN = "example.com"


def gen_shadow_config(seed: int) -> dict[str, Any]:
    """
    Generate a shadow config file, as a string, for the given parameters.
    """

    env = {
        "RUNNING_IN_SHADOW": "yes",
        # AF_UNIX sockets aren't supported in shadow
        "CHUTNEY_ENABLE_CONTROLSOCKET": "no",
        # ipv6 isn't supported in shadow
        "CHUTNEY_DISABLE_IPV6": "yes",
        # sandboxing isn't supported in shadow
        "CHUTNEY_TOR_SANDBOX": "no",
        # re-export PATH. The test scripts assume that
        # usual shell utilities are on it.
        "PATH": os.getenv("PATH"),
    }

    return {
        "general": {
            "stop_time": "10m",
            "model_unblocked_syscall_latency": True,
            "seed": seed,
        },
        "network": {
            "graph": {"type": "1_gbit_switch"},
        },
        "experimental": {
            # shadow only actually increments simulated time (and potentially
            # switches threads) if this much time would have been consumed by an
            # unbroken sequence of unblocked syscalls. Using a relatively large
            # value here (vs the default 1us) makes the simulation scheduling
            # more stable and predictable; e.g. adding additional logging to
            # debug an issue is less likely to make the issue disappear.
            #
            # The primary tradeoffs are:
            # * Larger values can result in managed processes measuring elapsed
            #   time where not much happens as *zero*, which may not be handled
            #   gracefully. e.g. in c-tor, using values of 1 ms or more here can
            #   result in a flood of warnings "compute_drain_rate(): Bug:
            #   Computing stream drain rate with zero time delta".
            # * Time will move forward at a larger granularity when unblocked syscall
            #   latency is applied. 10ms is still small enough though that this
            #   shouldn't be terribly strange; e.g. larger time jumps are likely
            #   to be observed on over-loaded systems with normal preemptive
            #   scheduling.
            # * when the simulation does hit a
            #   busy loop, it may spend a bit longer "spinning" before moving
            #   time forward, potentially causing the simulation to take a bit
            #   longer to run. (if it would have otherwise timed out earlier than 10ms)
            "max_unapplied_cpu_latency": "100us",
        },
        "hosts": {
            "host": {
                "network_node_id": 0,
                "processes": [
                    {
                        "path": "./tests/chutney/integration-e2e",
                        "environment": env,
                        # Give the web server below a little time to start.
                        "start_time": "5s",
                    }
                ],
            },
            TEST_DOMAIN: {
                "network_node_id": 0,
                "processes": [
                    {
                        "path": "python3",
                        "args": "-m http.server 80",
                        "start_time": "0",
                        "expected_final_state": "running",
                    }
                ],
            },
        },
    }


def main():
    parser = argparse.ArgumentParser(
        prog="integration-e2e-shadow",
        description="Runs integration-e2e inside a shadow simulation",
    )
    parser.add_argument(
        "-s", "--seed", type=int, default=1, help="Simulation PRNG seed"
    )
    args = parser.parse_args()

    toplevel = pathlib.Path(
        os.fsdecode(
            subprocess.check_output("git rev-parse --show-toplevel", shell=True)
        ).strip()
    )
    os.chdir(toplevel)

    # Write out shadow config. We could just pipe it directly to the shadow
    # process below, but writing it out is useful for debugging.
    shadow_config = gen_shadow_config(args.seed)
    with open(SHADOW_CONFIG_FILE, "w") as f:
        f.write(yaml.safe_dump(shadow_config))

    # Remove shadow's data dir. (It will bail if the directory already exists)
    if os.path.isdir(SHADOW_DATA_DIR):
        shutil.rmtree(SHADOW_DATA_DIR)

    shadow_args = [
        "shadow",
        "--data-directory=" + SHADOW_DATA_DIR,
        "--progress=true",
        SHADOW_CONFIG_FILE,
    ]
    with open(SHADOW_LOG_FILE, "w") as shadow_log_file:
        subprocess.run(shadow_args, check=True, stdout=shadow_log_file)


if __name__ == "__main__":
    main()
