// cppamp1.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include <amp.h> #include <iostream> using namespace concurrency; const int size = 5; inline unsigned long Log2 (unsigned long num) restrict(amp) { unsigned long index = 0; while (num > 1) { index++; num = (num+1)>>1; } return index; } void CppAmpMethod() { int aCPP[] = {1, 2, 3, 4, 5}; int bCPP[] = {6, 7, 8, 9, 10}; int sumCPP[size]; // Create C++ AMP objects. array_view<const int, 1> a(size, aCPP); array_view<const int, 1> b(size, bCPP); array_view<int, 1> sum(size, sumCPP); sum.discard_data(); parallel_for_each( // Define the compute domain, which is the set of threads that are created. sum.extent, // Define the code to run on each thread on the accelerator. [=](index<1> idx) restrict(amp) { sum[idx] = a[idx] + b[idx]; } ); // Print the results. The expected output is "7, 9, 11, 13, 15". for (int i = 0; i < size; i++) { std::cout << sum[i] << "\n"; } } void AddElements(index<1> idx, array_view<int, 1> sum, array_view<int, 1> a, array_view<int, 1> b) restrict(amp) { sum[idx] = a[idx] + b[idx] + Log2(b[idx]); } void AddArraysWithFunction() { int aCPP[] = {1, 2, 3, 4, 5}; int bCPP[] = {6, 7, 8, 9, 10}; int sumCPP[5] = {0, 0, 0, 0, 0}; array_view<int, 1> a(5, aCPP); array_view<int, 1> b(5, bCPP); array_view<int, 1> sum(5, sumCPP); parallel_for_each( sum.extent, [=](index<1> idx) restrict(amp) { AddElements(idx, sum, a, b); } ); for (int i = 0; i < 5; i++) { std::cout << sum[i] << "\n"; } } //METHOD 1: tile_static int lds[2][3]; void Idx_TiledAmp(tiled_index<2,3> idx,array_view<int, 2> input, array_view<int, 2> gid, array_view<int, 2> tid, array_view<int, 2> lid, int lds[2][3]) restrict(amp) { lds[idx.local[0]][idx.local[1]] = idx.global[0] | idx.global[1]* 100; idx.barrier.wait(); gid[idx.global] = lds[idx.local[0]][idx.local[1]] ; tid[idx.global] = idx.tile[0] | idx.tile[1] * 10000; lid[idx.global] = idx.local[0] | idx.local[1] * 10000; } //convert to 1d array void Idx_TiledAmp2(tiled_index<2,3> idx,array_view<int, 2> input, array_view<int, 2> gid, array_view<int, 2> tid, array_view<int, 2> lid, int* lds) restrict(amp) { lds[idx.local[0]*2 +idx.local[1]] = idx.global[0] | idx.global[1]* 100; idx.barrier.wait(); gid[idx.global] = lds[idx.local[0]*2+idx.local[1]]; tid[idx.global] = idx.tile[0] | idx.tile[1] * 10000; lid[idx.global] = idx.local[0] | idx.local[1] * 10000; } void TiledAmp() { // Sample data: int sampledata[] = { 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16, 21, 22, 23, 24, 25, 26, 31, 32, 33, 34, 35, 36,}; // The tiles: // 2 2 9 7 1 4 // 4 4 8 8 3 4 // // 1 5 1 2 5 2 // 6 8 3 2 7 2 // Averages: int averagedata[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; int gid_data[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; int tid_data[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; int lid_data[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; array_view<int, 2> sample(4, 6, sampledata); array_view<int, 2> gid(4, 6, gid_data); array_view<int, 2> tid(4, 6, tid_data); array_view<int, 2> lid(4, 6, lid_data); array_view<int, 2> average(4, 6, averagedata); parallel_for_each( // Create threads for sample.extent and divide the extent into 2 x 2 tiles. sample.extent.tile<2,3>(), [=](tiled_index<2,3> idx) restrict(amp) { tile_static int sample2[2][3]; //Idx_TiledAmp(idx, sample, gid, tid, lid, sample2); Idx_TiledAmp2(idx, sample, gid, tid, lid, &sample2[0][0]); } ); std::cout << "sample\n"; for (int i = 0; i < 4; i++) { for (int j = 0; j < 6; j++) { std::cout << sample(i,j) << " "; } std::cout << "\n"; } std::cout << "gid\n"; for (int i = 0; i < 4; i++) { for (int j = 0; j < 6; j++) { std::cout << gid(i,j) << " "; } std::cout << "\n"; } std::cout << "\ntid\n"; for (int i = 0; i < 4; i++) { for (int j = 0; j < 6; j++) { std::cout << tid(i,j) << " "; } std::cout << "\n"; } std::cout << "\nlid\n"; for (int i = 0; i < 4; i++) { for (int j = 0; j < 6; j++) { std::cout << lid(i,j) << " "; } std::cout << "\n"; } } int _tmain(int argc, _TCHAR* argv[]) { //CppAmpMethod(); //AddArraysWithFunction(); int a=0x01020304; BYTE* b; b = (BYTE*)&a; int bb= (int)*b; TiledAmp(); char c = getc(stdin); c = getc(stdin); return 0; }