借鉴网上的一个例子,分析一下到底什么是memory reordering内存乱序。
源码 & 编译指令:g++ -lpthread -g -o fence fence.cpp
#include
#include volatile int x, y, r1, r2;
void start()
{x = y = r1 = r2 = 0;
}
void end()
{assert(!(r1 == 0 && r2 == 0));
}
void run1()
{x = 1;r1 = y;
}
void run2()
{y = 1;r2 = x;
}static pthread_barrier_t barrier_start;
static pthread_barrier_t barrier_end;
static void *thread1(void *)
{while (1){pthread_barrier_wait(&barrier_start);run1();pthread_barrier_wait(&barrier_end);}return NULL;
}
static void *thread2(void *)
{while (1){pthread_barrier_wait(&barrier_start);run2();pthread_barrier_wait(&barrier_end);}return NULL;
}
int main()
{pthread_t t1;pthread_t t2;cpu_set_t cs;assert(pthread_barrier_init(&barrier_start, NULL, 3) == 0);assert(pthread_barrier_init(&barrier_end, NULL, 3) == 0);assert(pthread_create(&t1, NULL, thread1, NULL) == 0);assert(pthread_create(&t2, NULL, thread2, NULL) == 0);CPU_ZERO(&cs);CPU_SET(0, &cs);assert(pthread_setaffinity_np(t1, sizeof(cs), &cs) == 0);CPU_ZERO(&cs);CPU_SET(1, &cs);assert(pthread_setaffinity_np(t2, sizeof(cs), &cs) == 0);while (1){start();pthread_barrier_wait(&barrier_start);pthread_barrier_wait(&barrier_end);end();}return 0;
}
假设两个并发的执行情况
void run1()
{x = 1; // A1r1 = y; // A2
}
void run2()
{y = 1; // B1r2 = x; // B2
}/* ********** 初始状态 ********** */
x = y = r1 = r2 = 0;// 无乱序run1和run2如何并发,会出现下述六种情况,无论哪种情况,r1、r2不会出现同时为零。
/* ********** 情况一:A1 A2 B1 B2 ********** */
x = 1;
r1 = y; // r1 = 0;y = 1;
r2 = x; // r2 = 1;
/* ********** 情况二:B1 A1 A2 B2 ********** */
y = 1;x = 1;
r1 = y; // r1 = 1;r2 = x; // r2 = 1;
/* ********** 情况三:B1 B2 A1 A2 ********** */
y = 1;
r2 = x; // r2 = 0;x = 1;
r1 = y; // r1 = 1;
/* ********** 情况四:A1 B1 B2 A2 ********** */
x = 1; y = 1;
r2 = x; // r2 = 1;r1 = y; // r1 = 1;/* ********** 情况五:A1 B1 A2 B2 ********** */
x = 1;
y = 1;
r1 = y;
r2 = x;/* ********** 情况五:B1 A1 B2 A2 ********** */
y = 1;
x = 1;
r2 = x;
r1 = y;
可以看到无论哪种情况,都不应该出现r1 == 0 && r2 == 0
但是实际上会发生r1、r2同时为零:
$ ll
total 316
-rw------- 1 x root 17412096 Mar 15 22:12 core.1229 <<----<<----
-rwxr-xr-x 1 x root 17088 Mar 15 22:16 fence
-rw-r--r-- 1 x root 1361 Mar 15 22:16 fence.cpp
core内容:
#0 0x00007fa1910853d7 in raise () from /lib64/libc.so.6
#1 0x00007fa191086ac8 in abort () from /lib64/libc.so.6
#2 0x00007fa19107e1a6 in __assert_fail_base () from /lib64/libc.so.6
#3 0x00007fa19107e252 in __assert_fail () from /lib64/libc.so.6
#4 0x00000000004011cb in end () at fence.cpp:12
#5 0x00000000004014a7 in main () at fence.cpp:67
发生了执行时乱序的情况:
void run1()
{x = 1;r1 = y;
}
void run2()
{y = 1;r2 = x;
}
/* ********** 乱序情况 逻辑上不应该发生 ********** */
r1 = y // r1 == 0
r2 = x // r2 == 0
x = 1
y = 1
在X86架构下,使用mfence指令即可解决。不再core。
void run1()
{x = 1;__asm__ __volatile__("mfence" : : : "memory");r1 = y;
}
void run2()
{y = 1;__asm__ __volatile__("mfence" : : : "memory");r2 = x;
}
问题:只使用编译器fence可行吗,例如:
void run1()
{x = 1;__asm__ __volatile__("" : : : "memory");r1 = y;
}
void run2()
{y = 1;__asm__ __volatile__("" : : : "memory");r2 = x;
}
结果:还是会core,不带mfence指令只能保证无编译乱序,无法解决内存乱序。
上一篇:离婚后的女人是不是都很幸福?
下一篇:求网游,同人小说