#!/usr/bin/env python3
#
# Copyright (c) 2024-2025 The NetBSD Foundation, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#

"""require_user_login is a hook to verify new changeset authorship
matches login name as determined by the LOGNAME environment variable.

Usage:
  [netbsd_hooks]
  domain = pkgsrc.org

  [hooks]
  pretxnchangegroup.require_user_login = python:....require_user_login.hook

"""

import os

from typing import Set
from typing import Optional

from mercurial.i18n import _
from mercurial import (
    error,
    pycompat,
    registrar,
)
from mercurial.utils import stringutil


configtable = {}
configitem = registrar.configitem(configtable)

# [netbsd_hooks]
# domain = pkgsrc.org
configitem(
    b'netbsd_hooks',
    b'domain',
    default=None,
)


MAX_ERRORS = 3


def emaildomain(email: bytes) -> Optional[bytes]:
    """Given an email address b'foo@example.com', return b'example.com'.

    If there's no at-sign, return None instead.
    """
    at = email.find(b'@')
    if at < 0:
        return None
    return email[at + 1:]


def valid_graft(repo, rev, okparents, newrevset) -> bool:
    """True if rev is a valid graft.

    A changeset is a valid graft if:
    - one of its descendants is already accepted (it is in okparents),
    - it is a graft of a single original, and
    - its author matches the original's author.

    This allows releng to push rebased grafts, headed by a changeset
    that identifies the releng member who pushed them.  E.g., if
    developer Alice commits A1 and A2 and grafts them onto the release
    branch as G1 and G2, then Bob can rebase R1 and R2 as R1' and R2'
    provided he also commits a record of the pullup P (say, adding a
    note to doc/CHANGES-X.Y):

    [trunk]     [release-X]     [release-X after rebase]
    o A2             o R2       o P (author: Bob, adds note to doc/CHANGES-X.Y)
    |                |          |
    o A1        C2 o o R1       o R2' (author: Alice, grafted from A2)
    |              |/           |
    .           C1 o            o R1' (author: Alice, grafted from A1)
    .              .            |
    .              .            o C2
                   .            |
                                o C1
                                .
                                .
                                .

    Provided the authors of A1 and A2 (Alice) match the authors of
    their grafts R1' and R2' (also Alice), and the author of P (Bob)
    matches pusher's login name, this push is allowed.

    We examine revisions newest to oldest, i.e., descendants first, and
    add their parents to `okparents' as we validate them.
    """
    # Must be already OK as a parent.
    if rev not in okparents:
        return False

    # If one of the predecessors is already in the tree, it's OK as
    # long as the predecessor's author is the same -- but if the
    # predecessor's author has changed, ding it.
    for predecessor in repo.unfiltered().revs('predecessors(%s)' % (rev,)):
        if predecessor not in newrevset:
            if rev.user() != repo.unfiltered()[predecessor].user():
                return False
            return True

    # Must have exactly one origin.
    origins = repo.revs('origin(%s)' % (rev,))
    if len(origins) != 1:
        # Must have exactly one origin.
        return False
    originid = next(iter(origins))
    origin = repo.unfiltered()[originid]

    # The author must be the same as the origin's author.
    if rev.user() != origin.user():
        return False

    # All criteria matched.  Success!
    return True


def hook(ui, repo, hooktype, node=None, **kwargs):
    # pretxnchangegroup doesn't work -- we don't have access to
    # predecessors.
    if hooktype != b'pretxnclose':
        raise error.Abort(
            _(b'Unsupported hook type %r') % pycompat.bytestr(hooktype)
        )
    accepted_domains = ui.configlist(b'netbsd_hooks', b'domain')
    if not accepted_domains:
        raise error.Abort(_(b'netbsd_hooks.domain not configured'))
    accepted_user = os.getenvb(b'LOGNAME') or os.getenvb(b'USER')
    if not accepted_user:
        raise error.Abort(_(b'LOGNAME and USER unset or empty'))
    ctx = repo.unfiltered()[node]
    email_errors = []
    domain_errors = []
    seen_domains = set()
    user_errors = []
    seen_users = set()
    okparents = set()
    newrevs = list(repo.changelog.revs(start=ctx.rev()))
    newrevset = set(newrevs)
    for revid in reversed(newrevs):
        rev = repo[revid]
        email = stringutil.email(rev.user())
        user = stringutil.emailuser(email)
        domain = emaildomain(email)
        if not domain:
            if len(email_errors) <= MAX_ERRORS:
                if len(email_errors) == MAX_ERRORS:
                    email_errors.append(b'...')
                else:
                    email_errors.append(rev.user())
                continue
        if domain not in accepted_domains:
            if domain not in seen_domains:
                seen_domains.add(domain)
                if len(domain_errors) <= MAX_ERRORS:
                    if len(domain_errors) == MAX_ERRORS:
                        domain_errors.append(b'...')
                    else:
                        domain_errors.append(b'@' + domain)
        elif user != accepted_user and \
             not valid_graft(repo, rev, okparents, newrevset):
            if user not in seen_users:
                seen_users.add(user)
                if len(user_errors) <= MAX_ERRORS:
                    if len(user_errors) == MAX_ERRORS:
                        user_errors.append(b'...')
                    else:
                        user_errors.append(rev.user())
        for parent in rev.parents():
            okparents.add(parent)
    errors = []
    if email_errors:
        errors.append(_(
            b'Changeset authors missing email domain: %s' % (
                b', '.join(email_errors)
            )
        ))
    if domain_errors:
        errors.append(_(
            b'Changesets must have author address %s, not %s' % (
                b' or '.join(b'@' + domain for domain in accepted_domains),
                b' or '.join(domain_errors),
            )
        ))
    if user_errors:
        errors.append(_(
            b'User %s must not impersonate others: %s' % (
                accepted_user,
                b', '.join(user_errors)
            )
        ))
    if errors:
        for errorstr in errors:
            ui.error(errorstr + b'\n')
        raise error.Abort(_(b'user login authorship mismatches'))
