PulkoMandy | 17fc759 | 2022-07-28 18:27:54 +0200 | [diff] [blame^] | 1 | This chapter documents the Backend for the PowerPC processor family. |
| 2 | |
| 3 | @section Additional options for this version |
| 4 | |
| 5 | This backend provides the following additional options: |
| 6 | |
| 7 | @table @option |
| 8 | |
| 9 | @item -amiga-align |
| 10 | |
| 11 | Do not require any alignments greater than 2 bytes. |
| 12 | This is needed when accessing Amiga system-structures, but |
| 13 | can cause a performance penalty. |
| 14 | |
| 15 | @item -baserel32mos |
| 16 | |
| 17 | Use 32bit base-relative addressing as used by MorphOS. |
| 18 | |
| 19 | @item -baserel32os4 |
| 20 | |
| 21 | Use 32bit base-relative addressing as used by AmigaOS 4. |
| 22 | |
| 23 | @item -const-in-data |
| 24 | |
| 25 | By default constant data will be placed in the @code{.rodata} |
| 26 | section. Using this option it will be placed in the |
| 27 | @code{.data} section. |
| 28 | Note that on operating systems with memory protection this |
| 29 | option will disable write-protection of constant data. |
| 30 | |
| 31 | @item -eabi |
| 32 | Use the PowerPC Embedded ABI (eabi). |
| 33 | |
| 34 | @item -elf |
| 35 | |
| 36 | Do not prefix symbols with '_'. Prefix labels with '.'. |
| 37 | |
| 38 | @item -fsub-zero |
| 39 | |
| 40 | Use fsub to load a floating-point-register with zero. |
| 41 | This is faster but requires all registers to always contain |
| 42 | valid values (i.e. no NaNs etc.) which may not be the case |
| 43 | depending on startup-code, libraries etc. |
| 44 | |
| 45 | @item -gas |
| 46 | Create code suitable for the GNU assembler. |
| 47 | |
| 48 | @item -madd |
| 49 | Use the @code{fmadd/fmsub} instructions for combining |
| 50 | multiplication with addition/subtraction in one instruction. |
| 51 | As these instructions do not round between the operations, |
| 52 | they have increased precision over separate addition and |
| 53 | multiplication. |
| 54 | |
| 55 | While this usually does no harm, it is not ISO conforming |
| 56 | and therefore not the default behaviour. |
| 57 | |
| 58 | @item -merge-constants |
| 59 | |
| 60 | Place identical floating point constants at the same |
| 61 | memory location. This can reduce program size. |
| 62 | |
| 63 | @item -no-align-args |
| 64 | |
| 65 | Do not align function arguments on the stack stricter |
| 66 | than 4 bytes. Default with @option{-poweropen}. |
| 67 | |
| 68 | @item -no-peephole |
| 69 | |
| 70 | Do not perform several peephole optimizations. |
| 71 | Currently includes: |
| 72 | @itemize @minus |
| 73 | @item better use of d16(r) addressing |
| 74 | @item use of indexed addressing modes |
| 75 | @item use of update-flag |
| 76 | @item use of record-flag |
| 77 | @item use of condition-code-registers to avoid certain branches |
| 78 | @end itemize |
| 79 | |
| 80 | @item -no-regnames |
| 81 | |
| 82 | Do not use register names but only numbers in the assembly |
| 83 | output. This is necessary |
| 84 | to avoid name-conflicts when using @option{-elf}. |
| 85 | |
| 86 | @item -poweropen |
| 87 | |
| 88 | Generate code for the PowerOpen ABI like used in AIX. |
| 89 | This does not work correctly yet. |
| 90 | |
| 91 | @item -sc |
| 92 | |
| 93 | Generate code for the modified PowerOpen ABI used in the |
| 94 | StormC compiler (aka WarpOS ABI). |
| 95 | |
| 96 | @item -sd |
| 97 | Place all objects in small data-sections. |
| 98 | |
| 99 | @item -setccs |
| 100 | |
| 101 | The V.4 ABI requires signalling (in a bit of the condition code |
| 102 | register) when arguments to varargs-functions |
| 103 | are passed in floating-point registers. |
| 104 | vbcc usually does not make use of this and |
| 105 | therefore does not set that bit by default. |
| 106 | This may lead to problems when linking objects compiled by |
| 107 | vbcc to objects/libraries created by other |
| 108 | compilers and calling varargs-functions with floating-point |
| 109 | arguments. |
| 110 | @option{-setccs} will fix this problem. |
| 111 | |
| 112 | @item -use-commons |
| 113 | |
| 114 | Use real common symbols instead of bss symbols for |
| 115 | non-initialized external variables. |
| 116 | |
| 117 | @item -use-lmw |
| 118 | |
| 119 | Use @code{lmw/stmw}-instructions. This can significantly reduce |
| 120 | code-size. However these instructions may be slower on |
| 121 | certain PPCs. |
| 122 | |
| 123 | @end table |
| 124 | |
| 125 | @section ABI |
| 126 | |
| 127 | This backend supports the following registers: |
| 128 | |
| 129 | @itemize @minus |
| 130 | @item @code{r0} through @code{r31} for the general purpose registers, |
| 131 | @item @code{f0} through @code{f31} for the floating point registers and |
| 132 | @item @code{cr0} through @code{cr7} for the condition-code registers. |
| 133 | @end itemize |
| 134 | |
| 135 | Additionally, the register pairs @code{r3/r4, r5/r6, r7/r8, r9/r10, |
| 136 | r14/r15, r16/r17, r18/r19, |
| 137 | r20/r21, r22/r23, r24/r25, r26/r27, r28/r29} and @code{r30/r31} are |
| 138 | available. |
| 139 | |
| 140 | @code{r0, r11, r12, f0, f12} and @code{f13} are reserved by the |
| 141 | backend. |
| 142 | |
| 143 | |
| 144 | |
| 145 | The current version generates assembly output for use with @file{vasmppc} |
| 146 | or the GNU assembler. The generated code should |
| 147 | work on 32bit systems based on a PowerPC CPU using the V.4 ABI or the |
| 148 | PowerPC Embedded ABI (eabi). |
| 149 | |
| 150 | |
| 151 | The registers r0, r3-r12, f0-f13 and cr0-cr1 are used as scratch registers |
| 152 | (i.e. they can be destroyed in function calls), all other registers are |
| 153 | preserved. r1 is the stack-pointer and r13 is the small-data-pointer if |
| 154 | small-data-mode is used. |
| 155 | |
| 156 | The first 8 function arguments which have integer or pointer types |
| 157 | are passed in registers r3 through r10 and the first 8 floating-point |
| 158 | arguments are passed in registers f1 through f8. All other arguments |
| 159 | are passed on the stack. |
| 160 | |
| 161 | Integers and pointers are returned in r3 (and r4 for long long), |
| 162 | floats and doubles in f1. |
| 163 | All other types are returned by passing the function the address |
| 164 | of the result as a hidden argument - so when you call such a function |
| 165 | without a proper declaration in scope you can expect a crash. |
| 166 | |
| 167 | The elementary data types are represented like: |
| 168 | |
| 169 | @example |
| 170 | type size in bits alignment in bytes (-amiga-align) |
| 171 | |
| 172 | char 8 1 (1) |
| 173 | short 16 2 (2) |
| 174 | int 32 4 (2) |
| 175 | long 32 4 (2) |
| 176 | long long 64 8 (2) |
| 177 | all pointers 32 4 (2) |
| 178 | float 32 4 (2) |
| 179 | double 64 8 (2) |
| 180 | @end example |
| 181 | |
| 182 | @section Target-specific variable-attributes |
| 183 | |
| 184 | The PPC-backend offers the following variable-attributes: |
| 185 | |
| 186 | @table @code |
| 187 | |
| 188 | @item __saveds |
| 189 | Load the pointer to the small data segment at |
| 190 | function-entry. Applicable only to functions. |
| 191 | |
| 192 | @item __chip |
| 193 | Place variable in chip-memory. Only applicable on |
| 194 | AmigaOS to variables with static storage-duration. |
| 195 | |
| 196 | @item __far |
| 197 | Do not place this variable in the small-data segment |
| 198 | in small-data-mode. No effect in large-data-mode. |
| 199 | Only applicable to variables with static storage- |
| 200 | duration. |
| 201 | |
| 202 | @item __near |
| 203 | Currently ignored. |
| 204 | |
| 205 | @item __saveall |
| 206 | Make sure all registers are saved by this function. On lower |
| 207 | optimization levels, all volatile registers will be saved |
| 208 | additionally. On higher levels, only the ones that may be |
| 209 | destroyed, are saved. |
| 210 | |
| 211 | @item __interrupt |
| 212 | Return with en @code{rfi}-instruction rather than @code{blr}. |
| 213 | |
| 214 | @item __section("name","attr") |
| 215 | Place this function/object in section "name" with |
| 216 | attributes "attr". |
| 217 | @end table |
| 218 | |
| 219 | |
| 220 | @section Target-specific pragmas |
| 221 | |
| 222 | The PPC-backend offers the following #pragmas: |
| 223 | |
| 224 | @table @code |
| 225 | |
| 226 | @item #pragma amiga-align |
| 227 | Set alignment like -amiga-alignment option. |
| 228 | |
| 229 | @item #pragma natural-align |
| 230 | Align every type to its own size. |
| 231 | |
| 232 | @item #pragma default-align |
| 233 | Set alignment according to command-line options. |
| 234 | |
| 235 | @end table |
| 236 | |
| 237 | @section Predefined Macros |
| 238 | |
| 239 | This backend defines the following macros: |
| 240 | |
| 241 | @table @code |
| 242 | @item __PPC__ |
| 243 | |
| 244 | @item __AMIGADATE__ |
| 245 | This is set to current date as @code{"(DD.MM.YYYY)"}, |
| 246 | useful with version strings. |
| 247 | @end table |
| 248 | |
| 249 | @section Stack |
| 250 | |
| 251 | If the @option{-stack-check} option is used, every function-prologue will |
| 252 | call the function @code{__stack_check} with the stacksize needed by this |
| 253 | function in register r12. This function has to consider its own |
| 254 | stacksize and must restore all registers. |
| 255 | |
| 256 | @section Stdarg |
| 257 | |
| 258 | A possible <stdarg.h> for V.4 ABI could look like this: |
| 259 | |
| 260 | @example |
| 261 | |
| 262 | typedef struct @{ |
| 263 | int gpr; |
| 264 | int fpr; |
| 265 | char *regbase; |
| 266 | char *membase; |
| 267 | @} va_list; |
| 268 | |
| 269 | char *__va_start(void); |
| 270 | char *__va_regbase(void); |
| 271 | int __va_fixedgpr(void); |
| 272 | int __va_fixedfpr(void); |
| 273 | |
| 274 | #define va_start(vl,dummy) \ |
| 275 | ( \ |
| 276 | vl.gpr=__va_fixedgpr(), \ |
| 277 | vl.fpr=__va_fixedfpr(), \ |
| 278 | vl.regbase=__va_regbase(), \ |
| 279 | vl.membase=__va_start() \ |
| 280 | ) |
| 281 | |
| 282 | #define va_end(vl) ((vl).regbase=(vl).membase=0) |
| 283 | |
| 284 | #define va_copy(new,old) ((new)=(old)) |
| 285 | |
| 286 | #define __va_align(type) (__alignof(type)>=4?__alignof(type):4) |
| 287 | |
| 288 | #define __va_do_align(vl,type) ((vl).membase=(char *)((((unsigned int)((vl).membase))+__va_align(type)-1)/__va_align(type)*__va_align(type))) |
| 289 | |
| 290 | #define __va_mem(vl,type) (__va_do_align((vl),type),(vl).membase+=sizeof(type),((type*)((vl).membase))[-1]) |
| 291 | |
| 292 | #define va_arg(vl,type) \ |
| 293 | ( \ |
| 294 | (__typeof(type)&127)>10? \ |
| 295 | __va_mem((vl),type) \ |
| 296 | : \ |
| 297 | ( \ |
| 298 | (((__typeof(type)&127)>=6&&(__typeof(type)&127)<=8)) ? \ |
| 299 | ( \ |
| 300 | ++(vl).fpr<=8 ? \ |
| 301 | ((type*)((vl).regbase+32))[(vl).fpr-1] \ |
| 302 | : \ |
| 303 | __va_mem((vl),type) \ |
| 304 | ) \ |
| 305 | : \ |
| 306 | ( \ |
| 307 | ++(vl).gpr<=8 ? \ |
| 308 | ((type*)((vl).regbase+0))[(vl).gpr-1] \ |
| 309 | : \ |
| 310 | __va_mem((vl),type) \ |
| 311 | ) \ |
| 312 | ) \ |
| 313 | ) |
| 314 | |
| 315 | @end example |
| 316 | |
| 317 | A possible <stdarg.h> for PowerOpen ABI could look like this: |
| 318 | |
| 319 | @example |
| 320 | |
| 321 | typedef unsigned char *va_list; |
| 322 | |
| 323 | #define __va_align(type) (4) |
| 324 | |
| 325 | #define __va_do_align(vl,type) ((vl)=(char *)((((unsigned int)(vl))+__va_align(type)-1)/__va_align(type)*__va_align(type))) |
| 326 | |
| 327 | #define __va_mem(vl,type) (__va_do_align((vl),type),(vl)+=sizeof(type),((type*)(vl))[-1]) |
| 328 | |
| 329 | #define va_start(ap, lastarg) ((ap)=(va_list)(&lastarg+1)) |
| 330 | |
| 331 | #define va_arg(vl,type) __va_mem(vl,type) |
| 332 | |
| 333 | #define va_end(vl) ((vl)=0) |
| 334 | |
| 335 | #define va_copy(new,old) ((new)=(old)) |
| 336 | |
| 337 | @end example |
| 338 | |
| 339 | |
| 340 | @section Known problems |
| 341 | |
| 342 | @itemize @minus |
| 343 | @item composite types are put on the stack rather than passed via pointer |
| 344 | @item indication of fp-register-args with bit 6 of cr is not done well |
| 345 | @end itemize |
| 346 | |
| 347 | |
| 348 | |
| 349 | |