11. های کتابخانه و ها چارچوب
ساختThreadها
FunctionalityFramework
Asynchronous functions; only C++C++11 Threads
Fork/join; C/C++/Fortran; LinuxPOSIX Threads
Async tasks, loops, reducers, load balance; C/C++Cilk Plus
Trees of tasks, complex patterns; only C++TBB
Tasks, loops, reduction, load balancing, affinity,
nesting, C/C++/Fortran (+SIMD, offload)
OpenMP
12. ساده مثال“Hello World”درOpenMPدر
زبانC++
Directiveصورت به مختلف زبانهای های
زیراست
- C/C++: #pragma omp
- Fortran free form: !$omp
برایهای سویچ از کردن کامپایل
کنید استفاده زیر
- GNU: -fopenmp
- Intel: -qopenmp
- Cray: -h omp
- PGI: -mp[=nonuma,align,allcorembind]
#include <omp.h>
#include <cstdio>
int main(){
// This code is executed by 1 thread
const int nt=omp_get_max_threads();
printf("OpenMP with %d threadsn", nt);
#pragma omp parallel
{ // This code is executed in parallel
// by multiple threads
printf("Hello World from thread %dn",
omp_get_thread_num());
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
13. برنامه کامپایلHello Worldدر
OpenMP
amin@astek% icpc -qopenmp hello_omp.cc
amin@astek% export OMP_NUM_THREADS=5
amin@astek% ./a.out
OpenMPwith 5 threads
Hello World from thread 0 Hello World from
thread 3 Hello World from thread 1 Hello World
from thread 2 Hello World from thread 4
OMP_NUM_THREADS controls number of OpenMP threads (default: logical CPU count)
22. Method 1: using clauses in pragma omp parallel (C, C++, Fortran):
int A, B; // Variables declared at the beginning of a function
#pragma omp parallel private(A) shared(B)
{
// Each thread has its own copy of A, but B is shared
}
int B; // Variable declared outside of parallel scope - shared by default
#pragma omp parallel
{
int A; // Variable declared inside the parallel scope - always private
// Each thread has its own copy of A, but B is shared
}
Method 2: using scoping (only C and C++):
1
2
3
4
5
1
2
3
4
5
6
26. حلقه بر مبتنی سازی موازی:های حلقه
ForدرOpenMP
کتابخانهOpenMPاز ها نخ بین حلقه تکرارهای توزیع برای
#pragma omp parallel forزبان درc/c++و!$omp parallel doدر
Fortranکند می استفاده
!$omp parallel shared(x,y,z) private(i)
!$omp do
do i = 1, n
z(i) = x(i) + y(i) end do
!$omp end do
!$omp end parallel
#pragma omp parallel for
for (int i = 0; i < n; i++) {
printf("Iteration %dis processed by thread %dn", i,
omp_get_thread_num());
// ... iterations will be distributed across available threads...
}
!$omp parallel shared(x,y,z) private(i)
!$omp do
do i = 1, n
z(i) = x(i) + y(i) end do
!$omp end do
!$omp end parallel
1
2
3
4
5
6
27. حلقه بر مبتنی سازی موازی:های حلقه
ForدرOpenMP
#pragma omp parallel
{
// Code placed here will be executed by all threads.
// Alternative way to specify private variables:
// declare them in the scope of pragma omp parallel
int private_number=0;
#pragma omp for
for (int i = 0; i < n; i++) {
// ... iterations will be distributed across available threads...
}
// ... code placed here will be executed by all threads
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
30. مختلف حالتهای از مثالهایی
حلقه بندی زمان
!$omp parallel shared(x,y,z) private(i)
!$omp do schedule(dynamic,chunk)
do i = 1, n
z(i) = x(i) + y(i) end do
!$omp end do
!$omp end parallel
#pragma omp parallel shared(x,y,z) private(i)
{
#pragma omp for schedule(dynamic,chunk)
for (i = 0; i < n; i++)
z[i] = x[i] + y[i]
}
Fortran
C/C++
34. Race Conditionsپیش قابل غیر رفتارهای و
برنامه بینی
int total = 0;
#pragma omp parallel for
for (int i = 0; i < n; i++) {
// Race condition
total = total + i;
}
زمانیرخمیدهدکه2یابیشتراز
Threadهابخواهدبهیکآدرس
یکسانازحافظهدسترسییابندو
40. Reductionمقابله برای
بهRC
مشکل رفع برای کد نمونهRCفرترن در
!$omp parallel do shared(x,y,n) private(i) reduction(+:asum)
do i = 1, n
asum = asum + x(i)*y(i) end do
!$omp end parallel
41. Built-In Reduction
مشکل رفع برای کد نمونهRCدرC/C++
int total = 0;
#pragma omp parallel for reduction(+: total)
for (int i = 0; i < n; i++) { total += i
42. Reductionاختصاصی فضای با
Thread
int total = 0;
#pragma omp parallel
{
int total_thr = 0;
#pragma omp for
for (int i=0; i<n; i++)
total_thr += i;
#pragma omp atomic
total += total_thr;
}
1
2
3
4
5
6
7
8
9
10
11
61. مثال:reduction using critical
section
فرترن
!$OMP PARALLEL SHARED(x,y,n,asum) PRIVATE(i, psum)
psum = 0.0d
!$OMP DO
do i = 1, n
psum = psum + x(i)*y(i) end do
!$OMP END DO
!$OMP CRITICAL(dosum)
asum = asum + psum
!$OMP END CRITICAL(dosum)
!$OMP END PARALLEL DO
62. مثال:initialization and
output
C/C++
#pragma omp parallel
while (err > tolerance) {
#pragma omp master
{
err = 0.0;
}
#pragma omp barrier
// Compute err
…
#pragma omp single
printf(“Error is now: %5.2fn”, err);
}