Tars介绍
Tars是将腾讯内部使用的微服务架构TAF(Total Application Framework)多年的实践成果总结而成的开源项目。是基于名字服务使用Tars协议的高性能RPC开发框架,同时配套一体化的服务治理平台,帮助个人或者企业快速的以微服务的方式构建自己稳定可靠的分布式应用。
源码获取
git clone https://github.com/TarsCloud/...
cd TarsFramework
git submodule update --init --recursive
移植过程
1 原子操作实现
在tarscpp/util/include/util/tc_atomic.h下的几个原子操作函数(inc_fast,dec_and_test,add_and_return)都是基于x86汇编实现,在ARM64平台下,使用gcc内置函数实现,示例如下:
原x86嵌汇编实现:
int add_and_return(int i)
{
/* Modern 486+ processor */
int __i = i;
__asm__ __volatile__(
TARS_LOCK "xaddl %0, %1;"
:"=r"(i)
:"m"(_value.counter), "0"(i));
return i + __i;
}
支持ARM64平台后的实现:
int add_and_return(int i)
{
#if defined(__aarch64__)
return __atomic_add_fetch(&_value.counter,i,__ATOMIC_ACQ_REL);
#else
/* Modern 486+ processor */
int __i = i;
__asm__ __volatile__(
TARS_LOCK "xaddl %0, %1;"
:"=r"(i)
:"m"(_value.counter), "0"(i));
return i + __i;
#endif
}
2 高精度计时器实现
在tarscpp/util/include/util/tc_timeprovider.h下实现了基于x86汇编的高精度计时器。其中
Rdtsc是x86下一条读取TSC的指令。在ARM64平台下,我们可以通过mrs指令来读取CNTVCT_EL0计时器来实现,具体实现如下。
原x86嵌汇编实现:
#define rdtsc(low,high) \
__asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
支持ARM64平台后的实现:
#if defined(__aarch64__)
#define rdtsc(var) \
asm volatile("mrs %0, CNTVCT_EL0" : "=r"(var))
#elif defined(__x86_64__)
#define rdtsc(low,high) \
__asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
#endif
3 协程实现
协程是一种用户态的轻量级线程,其调度完全由用户控制。因此,协程调度切换时需要用户自己将寄存器和栈保存到其他地方,再切回来的时候,恢复先前保存的寄存器上下文和栈。在tarscpp/util/src/下的tc_make_x86_64_sysv_elf_gas.s和tc_jump_x86_64_sysv_elf_gas.s实现了基于x86_64架构的协程堆栈初始化和寄存器上下文切换操作。具体实现如下:
make_fcontext:
leaq -0x58(%rdi), %rax /* reserve space for fcontext_t at top of context stack */
/* shift address in RAX to lower 16 byte boundary */
/* == pointer to fcontext_t and address of context stack */
andq $-16, %rax
movq %rdi, 0x40(%rax) /* save address of context stack pointer (base) in fcontext_t */
movq %rsi, 0x48(%rax) /* save context stack size in fcontext_t */
movq %rdx, 0x38(%rax) /* save address of context function in fcontext_t */
stmxcsr 0x50(%rax) /* save MMX control and status word */
fnstcw 0x54(%rax) /* save x87 control word */
leaq -0x8(%rax), %rdx /* reserve space for the return address on context stack, (RSP - 0x8) % 16 == 0 */
movq %rdx, 0x30(%rax) /* save address in RDX as stack pointer for context function */
leaq finish(%rip), %rcx /* compute abs address of label finish */
movq %rcx, (%rdx) /* save address of finish as return address for context function */
/* entered after context function returns */
ret /* return pointer to fcontext_t placed on context stack */
finish:
/* RSP points to same address as RSP on entry of context function + 0x8 */
xorq %rdi, %rdi /* exit code is zero */
call _exit@PLT /* exit application */
hlt
jump_fcontext:
movq %rbx, (%rdi) /* save RBX */
movq %r12, 0x8(%rdi) /* save R12 */
movq %r13, 0x10(%rdi) /* save R13 */
movq %r14, 0x18(%rdi) /* save R14 */
movq %r15, 0x20(%rdi) /* save R15 */
movq %rbp, 0x28(%rdi) /* save RBP */
cmp $0, %rcx
je 1f
stmxcsr 0x50(%rdi) /* save MMX control and status word */
fnstcw 0x54(%rdi) /* save x87 control word */
ldmxcsr 0x50(%rsi) /* restore MMX control and status word */
fldcw 0x54(%rsi) /* restore x87 control word */
1:
leaq 0x8(%rsp), %rax /* exclude the return address and save as stack pointer */
movq %rax, 0x30(%rdi) /* save as stack pointer */
movq (%rsp), %rax /* save return address */
movq %rax, 0x38(%rdi) /* save return address as RIP */
movq (%rsi), %rbx /* restore RBX */
movq 0x8(%rsi), %r12 /* restore R12 */
movq 0x10(%rsi), %r13 /* restore R13 */
movq 0x18(%rsi), %r14 /* restore R14 */
movq 0x20(%rsi), %r15 /* restore R15 */
movq 0x28(%rsi), %rbp /* restore RBP */
movq 0x30(%rsi), %rsp /* restore RSP */
movq 0x38(%rsi), %rcx /* fetch the address to return to */
movq %rdx, %rax /* use third arg as return value after jump */
movq %rdx, %rdi /* use third arg as first arg in context function */
jmp %rcx /* indirect jump to context */
通过理解以上x86实现并结合Procedure Call Standard for the Arm规范我们做了如下支持ARM64平台的实现:
make_fcontext:
mov x5, x0
sub x5, x5 , 0x88
and x5, x5 , #-16
str x0, [x5, 0x70]
str x1, [x5, 0x78]
str x2, [x5, 0x80]
/*sub x6, x5, 0x10 */
str x5, [x5, 0x68]
mov x0, x5
LDR x7, =finish
str x7, [x5, 0x60]
ret
finish:
/* RSP points to same address as RSP on entry of context function + 0x8 */
eor x0, x0, x0 /* exit code is zero */
bl exit /* exit application */
jump_fcontext:
stp x18, x19, [x0]
stp x20, x21, [x0, 0x10]
stp x22, x23, [x0, 0x20]
stp x24, x25, [x0, 0x30]
stp x26, x27, [x0, 0x40]
stp x28, x29, [x0, 0x50]
str x30, [x0, 0x60]
mov x5, sp
str x5, [x0, 0x68]
str x30, [x0, 0x80]
ldp x18, x19, [x1]
ldp x20, x21, [x1, 0x10]
ldp x22, x23, [x1, 0x20]
ldp x24, x25, [x1, 0x30]
ldp x26, x27, [x1, 0x40]
ldp x28, x29, [x1, 0x50]
ldr x30, [x1, 0x60]
ldr x5, [x1, 0x68]
mov sp, x5
ldr x6, [x1, 0x80]
mov x0, x2
br x6
如想详细了解Procedure Call Standard for the Arm规范请参考如下链接:https://developer.arm.com/doc...
总结
Tars的移植没有牵扯到太多跟CPU架构相关的代码,因此工作量不大,但需要我们对ARM64架构有一定的了解,才能确保移植的准确性。该文的几个移植点具有通用性,对于其他应用移植到ARM64平台具有借鉴意义。