//
// Syd: rock-solid application kernel
// src/kernel/exec.rs: exec(3) handlers
//
// Copyright (c) 2023, 2024, 2025 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::{fs::File, io::Seek, os::fd::AsRawFd};

use libseccomp::ScmpNotifResp;
use nix::{
    errno::Errno,
    fcntl::{AtFlags, OFlag},
    sys::signal::{kill, Signal},
    unistd::Pid,
    NixPath,
};

use crate::{
    config::{PROC_FILE, PTRACE_DATA_EXECVE, PTRACE_DATA_EXECVEAT},
    elf::{ElfError, ElfFileType, ElfType, ExecutableFile, LinkingType},
    err2no, error,
    fs::{is_executable, safe_open_magicsym, FileType, FsFlags, MaybeFd, AT_EXECVE_CHECK},
    hook::{RemoteProcess, SysArg, SysFlags, UNotifyEventRequest},
    kernel::{sandbox_path, to_atflags},
    path::XPathBuf,
    ptrace::ptrace_syscall_info,
    sandbox::{Action, Capability, IntegrityError, SandboxGuard},
    warn,
};

// Note, sysenter_exec is a ptrace(2) hook, not a seccomp hook!
// The seccomp hooks are only used with trace/allow_unsafe_ptrace:1.
#[allow(clippy::cognitive_complexity)]
pub(crate) fn sysenter_exec(
    pid: Pid,
    sandbox: &SandboxGuard,
    info: ptrace_syscall_info,
) -> Result<(File, ExecutableFile), Errno> {
    let data = if let Some(data) = info.seccomp() {
        data
    } else {
        unreachable!("BUG: Invalid system call information returned by kernel!");
    };

    #[allow(clippy::cast_possible_truncation)]
    let (syscall_name, arg, chk) = match data.ret_data as u16 {
        PTRACE_DATA_EXECVE => (
            "execve",
            SysArg {
                path: Some(0),
                fsflags: FsFlags::MUST_PATH,
                ..Default::default()
            },
            false,
        ),
        PTRACE_DATA_EXECVEAT => {
            // SAFETY: Reject undefined/invalid flags.
            let flags = to_atflags(
                data.args[4],
                AtFlags::AT_SYMLINK_NOFOLLOW | AtFlags::AT_EMPTY_PATH | AT_EXECVE_CHECK,
            )?;

            let mut fsflags = FsFlags::MUST_PATH;
            if flags.contains(AtFlags::AT_SYMLINK_NOFOLLOW) {
                fsflags.insert(FsFlags::NO_FOLLOW_LAST);
            }

            let empty_path = flags.contains(AtFlags::AT_EMPTY_PATH);
            (
                "execveat",
                SysArg {
                    dirfd: Some(0),
                    path: Some(1),
                    flags: if empty_path {
                        SysFlags::EMPTY_PATH
                    } else {
                        SysFlags::empty()
                    },
                    fsflags,
                    ..Default::default()
                },
                flags.contains(AT_EXECVE_CHECK),
            )
        }
        data => unreachable!("BUG: invalid syscall data {data}!"),
    };

    // Read remote path.
    let process = RemoteProcess::new(pid);

    // SAFETY: This is a ptrace hook, the PID cannot be validated.
    let (mut path, _, _, empty_path) =
        unsafe { process.read_path(sandbox, data.args, arg, false, None) }?;

    // Call sandbox access checker if Exec sandboxing is on.
    let caps = sandbox.getcaps(Capability::CAP_EXEC | Capability::CAP_TPE);
    if caps.contains(Capability::CAP_EXEC) {
        let hide = !(empty_path && arg.flags.contains(SysFlags::EMPTY_PATH))
            && sandbox.enabled(Capability::CAP_STAT);
        sandbox_path(
            None,
            sandbox,
            pid,
            path.abs(),
            Capability::CAP_EXEC,
            hide,
            syscall_name,
        )?;
    }

    if !arg.fsflags.follow_last()
        && path
            .typ
            .as_ref()
            .map(|typ| typ.is_symlink() || typ.is_magic_link())
            .unwrap_or(false)
    {
        // SAFETY: AT_SYMLINK_NOFOLLOW: If the file identified by dirfd
        // and a non-NULL pathname is a symbolic link, then the call
        // fails with the error ELOOP.
        return Err(Errno::ELOOP);
    }

    // SAFETY: Return EACCES without any more processing
    // if the file is not a regular file or a memory fd.
    // Mfd check depends on trace/allow_unsafe_memfd option.
    //
    // Note, attempting to execute directories on Linux
    // return EACCES, not EISDIR like the manual page
    // claims. GNU make has a test checking this errno.
    match path.typ.as_ref() {
        Some(FileType::Reg) => {}
        Some(FileType::Mfd) if sandbox.flags.allow_unsafe_memfd() => {}
        //Some(FileType::Dir) => return Err(Errno::EISDIR),
        _ => return Err(Errno::EACCES),
    };

    // SAFETY:
    // 1. Return EACCES without any more processing
    //    if the file is not executable.
    // 2. We set MUST_PATH in FsFlags, path.dir is always Some.
    // This uses AT_EXECVE_CHECK on Linux>=6.14.
    #[allow(clippy::disallowed_methods)]
    if !is_executable(path.dir.as_ref().unwrap()) {
        return Err(Errno::EACCES);
    }

    // Check SegvGuard.
    if let Some(action) = sandbox.check_segvguard(path.abs()) {
        if action != Action::Filter {
            let (_, bin) = path.abs().split();
            error!("ctx": "segvguard",
                "err": format!("max crashes {} exceeded, execution of `{bin}' denied",
                    sandbox.segvguard_maxcrashes),
                "tip": "increase `segvguard/maxcrashes'",
                "pid": pid.as_raw(), "path": path.abs());
        }

        match action {
            Action::Allow | Action::Warn => {}
            Action::Deny | Action::Filter => return Err(Errno::EACCES),
            Action::Panic => panic!(),
            Action::Exit => std::process::exit(libc::EACCES),
            Action::Stop => {
                let _ = kill(process.pid, Some(Signal::SIGSTOP));
                return Err(Errno::EACCES);
            }
            Action::Abort => {
                let _ = kill(process.pid, Some(Signal::SIGABRT));
                return Err(Errno::EACCES);
            }
            Action::Kill => {
                let _ = kill(process.pid, Some(Signal::SIGKILL));
                return Err(Errno::EACCES);
            }
        }
    }

    // Trusted Path Execution.
    if caps.contains(Capability::CAP_TPE) {
        // MUST_PATH ensures path.dir is Some.
        #[allow(clippy::disallowed_methods)]
        let file = path.dir.as_ref().unwrap();
        let (action, msg) = sandbox.check_tpe(file, path.abs());
        if !matches!(action, Action::Allow | Action::Filter) {
            // TODO: Fix proc_mmap to work in ptrace hooks.
            let msg = msg.as_deref().unwrap_or("?");
            error!("ctx": "trusted_path_execution",
                "err": format!("exec from untrusted path blocked: {msg}"),
                "pid": pid.as_raw(), "path": path.abs(),
                "sys": syscall_name, "arch": info.arch, "args": data.args,
                "tip": "move the binary to a safe location or use `sandbox/tpe:off'");
        }
        match action {
            Action::Allow | Action::Warn => {}
            Action::Deny | Action::Filter => return Err(Errno::EACCES),
            Action::Panic => panic!(),
            Action::Exit => std::process::exit(libc::EACCES),
            Action::Stop => {
                let _ = kill(pid, Some(Signal::SIGSTOP));
                return Err(Errno::EACCES);
            }
            Action::Abort => {
                let _ = kill(pid, Some(Signal::SIGABRT));
                return Err(Errno::EACCES);
            }
            Action::Kill => {
                let _ = kill(pid, Some(Signal::SIGKILL));
                return Err(Errno::EACCES);
            }
        }
    }

    // Open a new instance of the file for read.
    // We prefer this over WANT_READ to ensure
    // we do not share OFD with the sandbox process.
    assert!(
        path.base.is_empty(),
        "BUG: MUST_PATH returned a directory for exec, report a bug!"
    );
    let mut file = match path.dir.take() {
        Some(MaybeFd::Owned(fd)) => {
            let pfd = XPathBuf::from_self_fd(fd.as_raw_fd());
            safe_open_magicsym(PROC_FILE(), &pfd, OFlag::O_RDONLY).map(File::from)?
        }
        _ => return Err(Errno::ENOEXEC),
    };

    // Parse ELF as necessary for restrictions.
    let deny_script = sandbox.flags.deny_script();
    let restrict_32 = sandbox.flags.deny_elf32();
    let restrict_dyn = sandbox.flags.deny_elf_dynamic();
    let restrict_sta = sandbox.flags.deny_elf_static();
    let restrict_pie = !sandbox.flags.allow_unsafe_nopie();
    let restrict_xs = !sandbox.flags.allow_unsafe_stack();

    // Shared library execution depends on trace/allow_unsafe_exec:1.
    let restrict_ldd = !sandbox.flags.allow_unsafe_exec();
    let check_linking = restrict_ldd || restrict_dyn || restrict_sta || restrict_pie || restrict_xs;

    let exe = match ExecutableFile::parse(&file, check_linking) {
        Ok(exe) => exe,
        Err(ElfError::IoError(err)) => {
            let errno = err2no(&err);
            if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
                error!("ctx": "parse_elf",
                    "err": format!("io error: {errno}"),
                    "pid": pid.as_raw(), "path": path.abs());
            }
            return Err(errno);
        }
        Err(ElfError::BadMagic) => {
            if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
                error!("ctx": "parse_elf",
                    "err": "invalid ELF file",
                    "pid": pid.as_raw(), "path": path.abs());
            }
            return Err(Errno::ENOEXEC);
        }
        Err(ElfError::Malformed) => {
            if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
                error!("ctx": "parse_elf",
                    "err": "malformed ELF file",
                    "pid": pid.as_raw(), "path": path.abs());
            }
            return Err(Errno::ENOEXEC);
        }
    };

    let is_script = exe == ExecutableFile::Script;
    if is_script && deny_script {
        if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
            error!("ctx": "deny_script",
                "err": "script execution denied",
                "pid": pid.as_raw(), "path": path.abs(),
                "exe": format!("{exe}"));
        }
        return Err(Errno::EACCES);
    }

    if !is_script
        && restrict_ldd
        && !matches!(
            exe,
            ExecutableFile::Elf {
                file_type: ElfFileType::Executable,
                ..
            }
        )
    {
        if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
            error!("ctx": "check_elf",
                "err": "ld.so exec-indirection",
                "pid": pid.as_raw(), "path": path.abs(),
                "exe": format!("{exe}"));
        }
        return Err(Errno::EACCES);
    }

    if !is_script && restrict_pie && matches!(exe, ExecutableFile::Elf { pie: false, .. }) {
        if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
            error!("ctx": "check_elf", "err": "not PIE",
                "pid": pid.as_raw(), "path": path.abs(),
                "tip": "configure `trace/allow_unsafe_nopie:1'",
                "exe": format!("{exe}"));
        }
        return Err(Errno::EACCES);
    }

    if !is_script && restrict_xs && matches!(exe, ExecutableFile::Elf { xs: true, .. }) {
        if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
            error!("ctx": "check_elf", "err": "execstack",
                "pid": pid.as_raw(), "path": path.abs(),
                "tip": "configure `trace/allow_unsafe_stack:1'",
                "exe": format!("{exe}"));
        }
        return Err(Errno::EACCES);
    }

    if !is_script
        && restrict_32
        && matches!(
            exe,
            ExecutableFile::Elf {
                elf_type: ElfType::Elf32,
                ..
            }
        )
    {
        if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
            error!("ctx": "check_elf", "err": "32-bit",
                "pid": pid.as_raw(), "path": path.abs(),
                "tip": "configure `trace/deny_elf32:0'",
                "exe": format!("{exe}"));
        }
        return Err(Errno::EACCES);
    }

    if !is_script
        && restrict_dyn
        && matches!(
            exe,
            ExecutableFile::Elf {
                linking_type: Some(LinkingType::Dynamic),
                ..
            }
        )
    {
        if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
            error!("ctx": "check_elf", "err": "dynamic-link",
                "pid": pid.as_raw(), "path": path.abs(),
                "tip": "configure `trace/deny_elf_dynamic:0'",
                "exe": format!("{exe}"));
        }
        return Err(Errno::EACCES);
    }

    if !is_script
        && restrict_sta
        && matches!(
            exe,
            ExecutableFile::Elf {
                linking_type: Some(LinkingType::Static),
                ..
            }
        )
    {
        if !sandbox.filter_path(Capability::CAP_EXEC, path.abs()) {
            error!("ctx": "check_elf", "err": "static-link",
                "pid": pid.as_raw(), "path": path,
                "tip": "configure `trace/deny_elf_static:0'",
                "exe": format!("{exe}"));
        }
        return Err(Errno::EACCES);
    }

    // Check for Force sandboxing.
    if sandbox.enabled(Capability::CAP_FORCE) {
        // Reset the file offset and calculate checksum.
        file.rewind().or(Err(Errno::EACCES))?;

        match sandbox.check_force2(path.abs(), &mut file) {
            Ok(Action::Allow) => {}
            Ok(Action::Warn) => {
                warn!("ctx": "verify_elf", "act": Action::Warn,
                    "pid": pid.as_raw(), "path": path.abs(),
                    "tip": format!("configure `force+{path}:<checksum>'"));
            }
            Ok(Action::Filter) => return Err(Errno::EACCES),
            Ok(Action::Deny) => {
                warn!("ctx": "verify_elf", "act": Action::Deny,
                    "pid": pid.as_raw(), "path": path.abs(),
                    "tip": format!("configure `force+{path}:<checksum>'"));
                return Err(Errno::EACCES);
            }
            Ok(Action::Stop) => {
                warn!("ctx": "verify_elf", "act": Action::Stop,
                    "pid": pid.as_raw(), "path": path.abs(),
                    "tip": format!("configure `force+{path}:<checksum>'"));
                let _ = kill(pid, Some(Signal::SIGSTOP));
                return Err(Errno::EACCES);
            }
            Ok(Action::Abort) => {
                warn!("ctx": "verify_elf", "act": Action::Abort,
                    "pid": pid.as_raw(), "path": path.abs(),
                    "tip": format!("configure `force+{path}:<checksum>'"));
                let _ = kill(pid, Some(Signal::SIGABRT));
                return Err(Errno::EACCES);
            }
            Ok(Action::Kill) => {
                warn!("ctx": "verify_elf", "act": Action::Kill,
                    "pid": pid.as_raw(), "path": path.abs(),
                    "tip": format!("configure `force+{path}:<checksum>'"));
                let _ = kill(pid, Some(Signal::SIGKILL));
                return Err(Errno::EACCES);
            }
            Ok(Action::Exit) => {
                error!("ctx": "verify_elf", "act": Action::Exit,
                    "pid": pid.as_raw(), "path": path.abs(),
                    "tip": format!("configure `force+{path}:<checksum>'"));
                std::process::exit(libc::EACCES);
            }
            Ok(Action::Panic) => panic!(),
            Err(IntegrityError::Sys(errno)) => {
                error!("ctx": "verify_elf",
                    "err": format!("system error during ELF checksum calculation: {errno}"),
                    "pid": pid.as_raw(), "path": path.abs(),
                    "tip": format!("configure `force+{path}:<checksum>'"));
                return Err(Errno::EACCES);
            }
            Err(IntegrityError::Hash {
                action,
                expected,
                found,
            }) => {
                if !matches!(action, Action::Allow | Action::Filter) {
                    error!("ctx": "verify_elf", "act": action,
                        "err": format!("ELF checksum mismatch: {found} is not {expected}"),
                        "pid": pid.as_raw(), "path": path.abs(),
                        "tip": format!("configure `force+{path}:<checksum>'"));
                }
                match action {
                    Action::Allow | Action::Warn => {}
                    Action::Filter | Action::Deny => return Err(Errno::EACCES),
                    Action::Stop | Action::Abort | Action::Kill => {
                        let _ = kill(
                            pid,
                            Some(
                                Signal::try_from(
                                    action
                                        .signal()
                                        .map(|sig| sig as i32)
                                        .unwrap_or(libc::SIGKILL),
                                )
                                .unwrap_or(Signal::SIGKILL),
                            ),
                        );
                        return Err(Errno::EACCES);
                    }
                    Action::Panic => panic!(),
                    Action::Exit => std::process::exit(libc::EACCES),
                };
            }
        }
    }

    if chk {
        // AT_EXECVE_CHECK, no need to execute the file.
        // Caller checks this errno value to before calling
        // ptrace_skip_syscall.
        return Err(Errno::ECANCELED);
    }

    Ok((file, exe))
}

pub(crate) fn sys_execve(request: UNotifyEventRequest) -> ScmpNotifResp {
    let arg = SysArg {
        path: Some(0),
        fsflags: FsFlags::MUST_PATH,
        ..Default::default()
    };
    syscall_exec_handler(request, "execve", arg, false)
}

pub(crate) fn sys_execveat(request: UNotifyEventRequest) -> ScmpNotifResp {
    let req = request.scmpreq;

    // SAFETY: Reject undefined/invalid flags.
    let flags = match to_atflags(
        req.data.args[4],
        AtFlags::AT_SYMLINK_NOFOLLOW | AtFlags::AT_EMPTY_PATH | AT_EXECVE_CHECK,
    ) {
        Ok(flags) => flags,
        Err(errno) => return request.fail_syscall(errno),
    };

    let mut fsflags = FsFlags::MUST_PATH;
    if flags.contains(AtFlags::AT_SYMLINK_NOFOLLOW) {
        fsflags.insert(FsFlags::NO_FOLLOW_LAST);
    }

    let chk = flags.contains(AT_EXECVE_CHECK);
    let empty_path = flags.contains(AtFlags::AT_EMPTY_PATH);
    let arg = SysArg {
        dirfd: Some(0),
        path: Some(1),
        flags: if empty_path {
            SysFlags::EMPTY_PATH
        } else {
            SysFlags::empty()
        },
        fsflags,
        ..Default::default()
    };
    syscall_exec_handler(request, "execveat", arg, chk)
}

// SAFETY: This handler only runs with trace/allow_unsafe_ptrace:1,
// and it's vulnerable to TOCTOU. With ptrace on, this is mitigated
// using the TOCTOU-mitigator, see the wait() function for context.
// See: https://bugzilla.kernel.org/show_bug.cgi?id=218501
#[allow(clippy::cognitive_complexity)]
fn syscall_exec_handler(
    request: UNotifyEventRequest,
    syscall_name: &str,
    arg: SysArg,
    chk: bool,
) -> ScmpNotifResp {
    syscall_handler!(request, |request: UNotifyEventRequest| {
        // We perform the exec checks even if exec sandboxing is off.
        // In this case, the path check will be skipped,
        // and only the file executability check (aka AT_EXECVE_CHECK)
        // will be performed.
        let sandbox = request.get_sandbox();
        let restrict_memfd = !sandbox.flags.allow_unsafe_memfd();

        // Read remote path.
        let (path, _) = request.read_path(&sandbox, arg, false)?;

        // Call sandbox access checker if Exec sandboxing is on.
        if sandbox.enabled(Capability::CAP_EXEC) {
            sandbox_path(
                Some(&request),
                &sandbox,
                request.scmpreq.pid(), // Unused when request.is_some()
                path.abs(),
                Capability::CAP_EXEC,
                sandbox.enabled(Capability::CAP_STAT),
                syscall_name,
            )?;
        }
        drop(sandbox); // release the read-lock.

        if !arg.fsflags.follow_last()
            && path
                .typ
                .as_ref()
                .map(|typ| typ.is_symlink() || typ.is_magic_link())
                .unwrap_or(false)
        {
            // SAFETY: AT_SYMLINK_NOFOLLOW: If the file identified by dirfd
            // and a non-NULL pathname is a symbolic link, then the call
            // fails with the error ELOOP.
            return Err(Errno::ELOOP);
        }

        // SAFETY: Return EACCES without any more processing
        // if the file is not a regular file or a memory fd.
        // Mfd check depends on trace/allow_unsafe_memfd option.
        //
        // Note, attempting to execute directories on Linux
        // return EACCES, not EISDIR like the manual page
        // claims. GNU make has a test checking this errno.
        match path.typ.as_ref() {
            Some(FileType::Reg) => {}
            Some(FileType::Mfd) if !restrict_memfd => {}
            //Some(FileType::Dir) => return Err(Errno::EISDIR),
            _ => return Err(Errno::EACCES),
        };

        // SAFETY:
        // 1. Return EACCES without any more processing
        //    if the file is not executable.
        // 2. We set MUST_PATH in FsFlags, path.dir is always Some.
        // This uses AT_EXECVE_CHECK on Linux>=6.14.
        #[allow(clippy::disallowed_methods)]
        if !is_executable(path.dir.as_ref().unwrap()) {
            return Err(Errno::EACCES);
        }

        if chk {
            // AT_EXECVE_CHECK, no need to execute the file.
            return Ok(request.return_syscall(0));
        }

        // SAFETY: This is vulnerable to TOCTOU,
        // See the comment at function header.
        Ok(unsafe { request.continue_syscall() })
    })
}
