Saved some more memory by introducing a multi stage boot process:

1. The STARTUP segment handles control over immediately to the BOOT segment.

2. The BOOT segment will be overwritten by the BSS segment. It relocates the CODE segment into the language card and the INIT segment beyond the BSS segment. Then it handles control over to the INIT segment.

3. The INIT segment will be overwritten by the heap. Is initializes the BSS segment (thus destroying the BOOT segment) and the C-Library. Then it handles control over to main().

With this change a very recent cc65 snapshot with INIT segment support is necessary for building.
diff --git a/contiki-apple2/apple2.cfg b/contiki-apple2/apple2.cfg
index fb618c6..3f5d1bd 100644
--- a/contiki-apple2/apple2.cfg
+++ b/contiki-apple2/apple2.cfg
@@ -1,30 +1,32 @@
 MEMORY {
-    ZP: start = $00, size = $1A, file = "", define = yes;
+    ZP:     start = $0000, size = $001A, file = "", define = yes;
     BUFFER: start = $0800, size = $0400, file = "";
-    HEADER: start = $0, size = $4, file = "contiki";
-    RAM: start = $0C00, size = $8A00, file = "contiki";
-    LC: start = $D400, size = $0C00, file = "contiki";
-    PIC: start = $0, size = $FFFF, file = "contiki";
+    HEADER: start = $0000, size = $0004, file = "contiki";
+    RAM:    start = $0C00, size = $8A00, file = "contiki";
+    PIC:    start = $0000, size = $FFFF, file = "contiki";
+    TMP:    start = $0000, size = $FFFF, file = "contiki";
+    LC:     start = $D400, size = $0C00, file = "contiki";
 }
 SEGMENTS {
-    ZEROPAGE: load = ZP, type = zp;
-    UIPBUF: load = BUFFER, type = bss;
-    EXEHDR: load = HEADER, type = ro;
-    STARTUP: load = RAM, type = ro, define = yes;
-    CONTIKI: load = RAM, type = ro;
-    RODATA: load = RAM, type = ro;
-    DATA: load = RAM, type = rw;
-    BSS: load = RAM, type = bss, define = yes;
-    CODE: load = LC, type = ro, define = yes;
-    TEMP: load = PIC, type = ro, define = yes;
+    ZEROPAGE: load = ZP,             type = zp;
+    UIPBUF:   load = BUFFER,         type = bss;
+    EXEHDR:   load = HEADER,         type = ro;
+    STARTUP:  load = RAM,            type = ro,  define = yes;
+    CONTIKI:  load = RAM,            type = ro;
+    RODATA:   load = RAM,            type = ro;
+    DATA:     load = RAM,            type = rw;
+    BSS:      load = RAM,            type = bss, define = yes;
+    BOOT:     load = PIC,            type = ro,  define = yes;
+    INIT:     load = TMP, run = RAM, type = ro,  define = yes;
+    CODE:     load = LC,             type = ro,  define = yes;
 }
 FEATURES {
-    CONDES: segment = RODATA,
-	    type = constructor,
+    CONDES: segment = INIT,
+	    type  = constructor,
 	    label = __CONSTRUCTOR_TABLE__,
 	    count = __CONSTRUCTOR_COUNT__;
     CONDES: segment = RODATA,
-	    type = destructor,
+	    type  = destructor,
 	    label = __DESTRUCTOR_TABLE__,
 	    count = __DESTRUCTOR_COUNT__;
 }
diff --git a/contiki-apple2/lib/crt0.S b/contiki-apple2/lib/crt0.S
index 21bda0c..9decf48 100644
--- a/contiki-apple2/lib/crt0.S
+++ b/contiki-apple2/lib/crt0.S
@@ -5,11 +5,11 @@
 ;
 
 	.export		_exit
-	.import	   	initlib, donelib
-       	.import	       	__STARTUP_RUN__, __TEMP_SIZE__	; Linker generated
+	.import	   	zerobss, initlib, callmain, donelib
+       	.import	       	__STARTUP_RUN__, __BOOT_SIZE__	; Linker generated
        	.import	       	__BSS_RUN__, __BSS_SIZE__	; Linker generated
+       	.import	       	__INIT_RUN__, __INIT_SIZE__	; Linker generated
        	.import	       	__CODE_RUN__, __CODE_SIZE__	; Linker generated
-	.import		callmain
 	.importzp	sp
 
 ; ------------------------------------------------------------------------
@@ -17,36 +17,17 @@
 
 .segment	"EXEHDR"
 
-       	.word  	__STARTUP_RUN__							; Start address
-       	.word  	__BSS_RUN__ + __CODE_SIZE__ + __TEMP_SIZE__ - __STARTUP_RUN__	; Size
+       	.word  	__STARTUP_RUN__									; Start address
+       	.word  	__BSS_RUN__ + __BOOT_SIZE__ + __INIT_SIZE__ + __CODE_SIZE__ - __STARTUP_RUN__	; Size
 
 ; ------------------------------------------------------------------------
 ; Place the startup code in a special segment.
 
 .segment       	"STARTUP"
 
-; ProDOS TechRefMan, chapter 5.2.1:
-; "For maximum interrupt efficiency, a system program should not use more
-;  than the upper 3/4 of the stack."
+; Forward control to the code in the "BOOT" segment
 
-	ldx	#$FF
-	txs	       		; Init stack pointer
-
-; Call code in the "TEMP" segment
-
-	jsr	__BSS_RUN__ + __CODE_SIZE__
-
-; Switch in LC bank 2 for R/O
-
-	bit	$C080
-
-; Call module constructors
-
-	jsr	initlib
-
-; Push arguments and call main()
-
-	jsr	callmain
+	jmp	__BSS_RUN__
 
 ; Avoid re-entrance of donelib. This is also the _exit entry
 
@@ -78,7 +59,14 @@
 ; ------------------------------------------------------------------------
 ; The linker doesn't calculate where this code runs so it has to be PIC
 
-.segment	"TEMP"
+.segment	"BOOT"
+
+; ProDOS TechRefMan, chapter 5.2.1:
+; "For maximum interrupt efficiency, a system program should not use more
+;  than the upper 3/4 of the stack."
+
+	ldx	#$FF
+	txs	       		; Init stack pointer
 
 ; Switch to 80 column mode
 
@@ -120,15 +108,15 @@
 
 ; Set source start
 
-	lda	#<__BSS_RUN__
-	ldx	#>__BSS_RUN__
+	lda	#<(__BSS_RUN__ + __BOOT_SIZE__ + __INIT_SIZE__)
+	ldx	#>(__BSS_RUN__ + __BOOT_SIZE__ + __INIT_SIZE__)
 	sta	$3C
 	stx	$3D
 
 ; Set source end
 
-	lda	#<(__BSS_RUN__ + __CODE_SIZE__)
-	ldx	#>(__BSS_RUN__ + __CODE_SIZE__)
+	lda	#<(__BSS_RUN__ + __BOOT_SIZE__ + __INIT_SIZE__ + __CODE_SIZE__)
+	ldx	#>(__BSS_RUN__ + __BOOT_SIZE__ + __INIT_SIZE__ + __CODE_SIZE__)
 	sta	$3E
 	stx	$3F
 
@@ -139,38 +127,61 @@
 	sta	$42
 	stx	$43
 
-; Reset index and call MOVE to relocate the CODE segment
+; Reset index and call MOVE to relocate the "CODE" segment
 
 	ldy	#$00
 	jsr	$FE2C
 
 ; Set source start
 
-	lda	#<__BSS_RUN__
-	ldx	#>__BSS_RUN__
+	lda	#<(__BSS_RUN__ + __BOOT_SIZE__)
+	ldx	#>(__BSS_RUN__ + __BOOT_SIZE__)
 	sta	$3C
 	stx	$3D
 
 ; Set source end
 
-	lda	#<(__BSS_RUN__ + __BSS_SIZE__ - 1)
-	ldx	#>(__BSS_RUN__ + __BSS_SIZE__ - 1)
+	lda	#<(__BSS_RUN__ + __BOOT_SIZE__ + __INIT_SIZE__)
+	ldx	#>(__BSS_RUN__ + __BOOT_SIZE__ + __INIT_SIZE__)
 	sta	$3E
 	stx	$3F
 
 ; Set destination
 
-	lda	#<(__BSS_RUN__ + 1)
-	ldx	#>(__BSS_RUN__ + 1)
+	lda	#<__INIT_RUN__
+	ldx	#>__INIT_RUN__
 	sta	$42
 	stx	$43
 
-; Reset index, set source and goto MOVE to clear the BSS
+; Reset index and call MOVE to relocate the "INIT" segment
 
 	ldy	#$00
-	tya
-	sta	($3C),y
-	jmp	$FE2C
+	jsr	$FE2C
+
+; Switch in LC bank 2 for R/O
+
+	bit	$C080
+
+; Forward control to code in the "INIT" segment
+
+	jmp	init
+
+; ------------------------------------------------------------------------
+; The "INIT" segment will be overwritten by the heap
+
+.segment	"INIT"
+
+; Clear the BSS data (and thus overwrite the "BOOT" segment)
+
+init:	jsr	zerobss
+
+; Call module constructors
+
+	jsr	initlib
+
+; Push arguments and call main()
+
+	jmp	callmain
 
 ; ------------------------------------------------------------------------
 ; Data