#!/bin/bash # Retrust - a script demonstrate "diverse double-compiling" # to counter the "trusting trust" attack. # David A. Wheeler # May 2005 #MONITOR="set; strace -e trace=file" # strace-info is just "strace -e trace=open" (or trace=file); # I made that a separate file, because the tcc makefiles split on "=". MONITOR="strace-info" function copystage { # Copy directory $1 to $2, patch $2, and cd into the new directory of $2. # $1 = original directory with source code # $2 = new directory; start with cp -pr $1 $2 echo echo "#### Copying $1 into $2" cp -pr $1 $2 cd $2 echo "### Now patching for 8-bit casting problem, 0.0 difference" # We don't try to compile c67 any more, so no need to patch around it: # echo "### Have trouble generating c67, so disable c67 generation." # sed -i 's/PROGS=tcc$(EXESUF) c67-tcc$(EXESUF) arm-tcc$(EXESUF)/PROGS=tcc$(EXESUF) arm-tcc$(EXESUF)/' Makefile # Patch: 8-bit casting problem. # tcc and gcc have DIFFERENT semantics about casting, and that # causes serious problems for this process. In this case, it's a bug in tcc. # A subtle bug in tcc causes casts from 8-bit signed to 32-bit unsigned # to go wrong in tcc (gcc does it correctly). # This could result in horrendously wrong code being generated by tcc, # but tcc gets lucky & merely generates inefficient code in some cases. # # The problem shows up in i386-asm.c: # if (sib_reg1 == -1) { # sib_reg1 = 5; # mod = 0x00; # } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) { # mod = 0x00; # } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) { # mod = 0x40; # } else { # mod = 0x80; # } # # More specifically, this line: # } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) { # where op->e.v is of type uint32_t # (This is forced on WIN32 to typedef unsigned int uint32_t;) # (but is usually defined via #include ) # # This is supposed to do sign-extension from an 8-bit value, but doesn't. # Here's a short demo program, which also demos the work-around: # add a temp variable. # # /* Demo of tcc bug */ # # #include # #include # # main() { # // This shows what the current tcc code does. 1,0,1 is the expected answer # // and is what gcc produces; but tcc produces 1,1,0. # // tcc 0.9.22 doesn't properly do sign-extension from a value cast to int8_t # uint32_t num; # int8_t tmp; # # printf("Testing tcc and gcc\n"); # num = 1; # printf("0x%x: result=%d (correct=1)\n", (int) num, num == (int8_t) num); # num = 0xfe; # printf("0x%x: result=%d (correct=0)\n", (int) num, num == (int8_t) num); # num = 0xffffffdc; # printf("0x%x: result=%d (correct=1)\n", (int) num, num == (int8_t) num); # # // By inserting an extra temporary variable, the tcc bug is worked around # printf("\nTesting workaround for tcc (excess temporary var)\n"); # num = 1; # tmp = (int8_t) num; # printf("0x%x: result=%d (correct=1)\n", (int) num, num == tmp); # num = 0xfe; # tmp = (int8_t) num; # printf("0x%x: result=%d (correct=0)\n", (int) num, num == tmp); # num = 0xffffffdc; # tmp = (int8_t) num; # printf("0x%x: result=%d (correct=1)\n", (int) num, num == tmp); # } # # # The impact of this bug can actually be quite serious; # since tinycc uses this construct, it will occasionally result # in generating the WRONG ModR/M bytecode when generating i386 code. # "Luckily" (?), what happens when compiling tcc with itself is simply # that the "long" form is chosen, even though the tcc code is written # to try to select a "short" form (and tccc DOES use the short form # if it's been compiled by gcc and not itself). # Here's code that shows the difference; it hits when compiling the # runtime library when compiling an in-line asm (sub and sbb): # 1ac: 89 45 c8 mov %eax,0xffffffc8(%ebp) 1ac: 89 45 c8 mov %eax,0xffffffc8(%ebp) # 1af: 8b 45 d0 mov 0xffffffd0(%ebp),%eax 1af: 8b 45 d0 mov 0xffffffd0(%ebp),%eax # 1b2: 8b 4d d4 mov 0xffffffd4(%ebp),%ecx 1b2: 8b 4d d4 mov 0xffffffd4(%ebp),%ecx # 1b5: 2b 4d dc sub 0xffffffdc(%ebp),%ecx | 1b5: 2b 8d dc ff ff ff sub 0xffffffdc(%ebp),%ecx # 1b8: 1b 45 d8 sbb 0xffffffd8(%ebp),%eax | 1bb: 1b 85 d8 ff ff ff sbb 0xffffffd8(%ebp),%eax # 1bb: 89 45 d0 mov %eax,0xffffffd0(%ebp) | 1c1: 89 45 d0 mov %eax,0xffffffd0(%ebp) # 1be: 89 4d d4 mov %ecx,0xffffffd4(%ebp) | 1c4: 89 4d d4 mov %ecx,0xffffffd4(%ebp) # 1c1: e9 08 00 00 00 jmp 1ce <__udivmoddi4+0x1c | 1c7: e9 08 00 00 00 jmp 1d4 <__udivmoddi4+0x1d # # # Modify i386-asm.c to insert an extra temporary # variable, so that we never hit the problem. # Modify static void parse_operand(TCCState *s1, Operand *op) perl -p -i -e 's/int reg, indir;/int reg, indir; int8_t tmp8;/;' i386-asm.c perl -p -i -e 's/if \(op->e\.v == \(int8_t\)op->e\.v\)/if (tmp8 = op->e.v, op->e.v == tmp8)/;' i386-asm.c # static inline void asm_modrm(int reg, Operand *op) # Version .20: perl -p -i -e 's/int mod, reg1, reg2;/int mod, reg1, reg2; int8_t tmp8;/;' i386-asm.c # Versions .21 and .22: perl -p -i -e 's/int mod, reg1, reg2, sib_reg1;/int mod, reg1, reg2, sib_reg1; int8_t tmp8;/;' i386-asm.c perl -p -i -e 's/} else if \(op->e\.v == \(int8_t\)op->e\.v && !op->e.sym\) {/} else if (tmp8 = op->e.v, op->e.v == tmp8 && !op->e.sym) {/;' i386-asm.c # static void asm_opcode(TCCState *s1, int opcode) perl -p -i -e 's/int i, modrm_index, reg, v, op1, is_short_jmp;/int i, modrm_index, reg, v, op1, is_short_jmp; int8_t tmp8;/;' i386-asm.c perl -p -i -e 's/if \(jmp_disp == \(int8_t\)jmp_disp\) {/if (tmp8=jmp_disp, jmp_disp == tmp8) {/;' i386-asm.c # Patch for "0.0" disagreement. # The tcc.c line: # if (f2 == 0.0) { # caused problems in making everything the same, because the "0.0" # caused VERY slightly different data to be placed in the data section. # The tcc generated by gcc generates that as 11 ASCIIZ and hex c9, # while tcc compiled thereafter generates 12 ASCIIZ. # Here's an example (the - is from tcc by gcc; the + is tcc by tcc) # 0474520 o n o f ' % s ' \0 m e m o r # 0474540 y f u l l \0 d i v i s i o n # 0474560 b y z e r o i n c o n s t #-0474600 a n t \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 311 \0 #+0474600 a n t \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 \0 # 0474620 d i v i s i o n b y z e r o # 0474640 i n c o n s t a n t \0 c o m # 0474660 p a r i s o n b e t w e e n # # I strongly suspect that the problem is this section of tcc; # note the "XXX: Not portable yet" comment: # if (is_float(vtop->type.t) && # (vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { # Sym *sym; # int *ptr; # unsigned long offset; # # /* XXX: unify with initializers handling ? */ # /* CPUs usually cannot use float constants, so we store them # generically in data segment */ # printf("DEBUG: Suspicious.\n"); # size = type_size(&vtop->type, &align); # offset = (data_section->data_offset + align - 1) & -align; # data_section->data_offset = offset; # /* XXX: not portable yet */ # ptr = section_ptr_add(data_section, size); # size = size >> 2; # for(i=0;ic.tab[i]; # sym = get_sym_ref(&vtop->type, data_section, offset, size << 2); # vtop->r |= VT_LVAL | VT_SYM; # vtop->sym = sym; # vtop->c.ul = 0; # # The solution for now is to replace 0.0 with (f1-f1), which should # have the same result. perl -p -i -e 's/ 0\.0\)/ (f1-f1) )/;' tcc.c # We DO NOT "cd ..", it is easier this way. } # function copystage { # # $1 = original directory with source code # # $2 = new directory; start with cp -pr $1 $2 # # if [ "gcc" = $3 ]; then # ./configure --cc="$MONITOR gcc" # else # ./configure --cc="$MONITOR ../$3/tcc -B../$3 -I../$3 " # fi # # # DO NOT just run "make", it'll do lots of excess work and # # not necessarily what we want. # echo # echo "## Making libtcc1.a" # make libtcc1.a # echo # echo "## Making tcc" # make tcc # # # echo # # echo "### Here the results:" # # echo # # ls -l # # sha1sum * # # md5sum * # # echo # echo "### Result - $2 for tcc (length, SHA1, MD5):" # echo # ls -l tcc # sha1sum tcc # md5sum tcc # echo # echo "### For libtcc1.o (length, SHA1, MD5):" # ls -l libtcc1.o # sha1sum libtcc1.o # md5sum libtcc1.o # # if [ ! -f tcc ]; then # echo "BIG PROBLEM!" # exit 1 # fi # # cd .. # } ############### BEGIN PROCESS ############# echo echo "#### Retrust beginning" echo # Remove old stuff. rm -fr tcc-0.9.2?-* rm -fr tcc-0.9.20 tcc-0.9.21 tcc-0.9.22 # Let's show platform information cat /proc/version rpm -qi gcc gcc --version set # Using "tiny C Compiler" http://fabrice.bellard.free.fr/tcc/ # for i in `seq 0 22` # do # wget http://fabrice.bellard.free.fr/tcc/tcc-0.9.$i.tar.gz # done echo echo "### Hashes of source files:" echo sha1sum *.tar.gz md5sum *.tar.gz ls -l *.tar.gz # Create the unchanged subdirectories. tar xvzf tcc-0.9.20.tar.gz tar xvzf tcc-0.9.21.tar.gz tar xvzf tcc-0.9.22.tar.gz # Do traditional chaining. # Note that output (param 2) become the next param 3. echo echo "### Creating chain of tiny C compilers. First, bootstrap tcc." echo # compilestage tcc-0.9.20 tcc-0.9.20-bootstrap gcc # compilestage tcc-0.9.20 tcc-0.9.20-byself tcc-0.9.20-bootstrap # compilestage tcc-0.9.21 tcc-0.9.21-update tcc-0.9.20-byself # compilestage tcc-0.9.21 tcc-0.9.21-byself tcc-0.9.21-update # compilestage tcc-0.9.22 tcc-0.9.22-update tcc-0.9.21-byself # compilestage tcc-0.9.22 tcc-0.9.22-byself tcc-0.9.22-update # This should produce exactly the same results: # compilestage tcc-0.9.22 tcc-0.9.22-redemo tcc-0.9.22-byself NEWDIR=tcc-0.9.20-chain-bootstrap copystage tcc-0.9.22 $NEWDIR echo "### PATCH!" echo "### gcc 3.4.3 won't compile tcc-0.9.20 directly, because" echo "### gcc's checking is more stringent now. In tcc-0.9.20," echo "### functions are defined like this:" echo "### void *__bound_ptr_add(void *p, int offset) __attribute__((regparm(2)));" echo "### but in others the __attribute__((regparm(2)) is omitted." echo "### gcc now perceives that as inconsistent." echo "### We can pretend we're part of the bootstrap compiler (a preprocessor)" echo "### and just throw away the regparm() stuff; it's just an optimization." echo perl -p -i -e 's/__attribute__\(\(regparm\(.\)\)\)//g;' *.[ch] ./configure --cc="$MONITOR gcc" make libtcc1.a make tcc cd .. PREVIOUSLIB=$NEWDIR PREVIOUSCOMPILER=$NEWDIR NEWDIR=tcc-0.9.20-chain-stage2 copystage tcc-0.9.20 $NEWDIR ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B../$PREVIOUSLIB -I../$PREVIOUSLIB " make libtcc1.a ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B. -I. " make tcc cd .. PREVIOUSLIB=$NEWDIR PREVIOUSCOMPILER=$NEWDIR NEWDIR=tcc-0.9.21-chain-update copystage tcc-0.9.21 $NEWDIR ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B../$PREVIOUSLIB -I../$PREVIOUSLIB " make libtcc1.a make tcc cd .. PREVIOUSLIB=$NEWDIR PREVIOUSCOMPILER=$NEWDIR NEWDIR=tcc-0.9.21-chain-stage2 copystage tcc-0.9.21 $NEWDIR ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B../$PREVIOUSLIB -I../$PREVIOUSLIB " make libtcc1.a ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B. -I. " make tcc cd .. PREVIOUSLIB=$NEWDIR PREVIOUSCOMPILER=$NEWDIR NEWDIR=tcc-0.9.22-chain-update copystage tcc-0.9.22 $NEWDIR ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B../$PREVIOUSLIB -I../$PREVIOUSLIB " make libtcc1.a make tcc cd .. PREVIOUSLIB=$NEWDIR PREVIOUSCOMPILER=$NEWDIR NEWDIR=tcc-0.9.22-chain-stage2 copystage tcc-0.9.22 $NEWDIR ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B../$PREVIOUSLIB -I../$PREVIOUSLIB " make libtcc1.a ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B. -I. " make tcc cd .. # Okay. Now tcc-0.9.22-byself was compiled through a chain. echo echo "#### Okay! Now let's do diverse double-compiling!" echo # compilestage tcc-0.9.22 tcc-0.9.22-bybootstrap gcc # compilestage tcc-0.9.22 tcc-0.9.22-bytg tcc-0.9.22-bootstrap # compilestage tcc-0.9.22 tcc-0.9.22-byttg tcc-0.9.22-bytg # compilestage tcc-0.9.22 tcc-0.9.22-bytttg tcc-0.9.22-byttg # compilestage tcc-0.9.22 tcc-0.9.22-byttttg tcc-0.9.22-bytttg # if [ "gcc" = $3 ]; then # ./configure --cc="$MONITOR gcc" # else # ./configure --cc="$MONITOR ../$3/tcc -B../$3 -I../$3 " # fi # echo "## Making libtcc1.a" # make libtcc1.a # echo # echo "## Making tcc" # make tcc NEWDIR=tcc-0.9.22-bootstrap copystage tcc-0.9.22 $NEWDIR ./configure --cc="$MONITOR gcc" make libtcc1.a make tcc cd .. PREVIOUSLIB=$NEWDIR PREVIOUSCOMPILER=$NEWDIR NEWDIR=tcc-0.9.22-stage2 copystage tcc-0.9.22 $NEWDIR ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B../$PREVIOUSLIB -I../$PREVIOUSLIB " make libtcc1.a ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B. -I. " make tcc cd .. PREVIOUSLIB=$NEWDIR PREVIOUSCOMPILER=$NEWDIR NEWDIR=tcc-0.9.22-stage3 copystage tcc-0.9.22 $NEWDIR ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B../$PREVIOUSLIB -I../$PREVIOUSLIB " make libtcc1.a make tcc cd .. PREVIOUSLIB=$NEWDIR PREVIOUSCOMPILER=$NEWDIR NEWDIR=tcc-0.9.22-stage4 copystage tcc-0.9.22 $NEWDIR ./configure --cc="$MONITOR ../$PREVIOUSCOMPILER/tcc -B../$PREVIOUSLIB -I../$PREVIOUSLIB " make libtcc1.a make tcc cd .. echo echo "### Here are the hash results for compiler (tcc) and runtime (libtcc1.o)" echo sha1sum */tcc | sort echo echo sha1sum */libtcc1.o | sort