# 设计一个高效的计数器

### 1. 简单的计数器

``````int count = 0;

void fun() {
//do something
count++;
}

int main() {
//do something
fun();
//do something
printf("%d\n", count);
}
``````

### 2. 并发情况下的计数器

``````#include <pthread.h>
#include <semaphore.h>
#include <stdio.h>
#include "myspinlock.h"

// 是否使用spinlock包含全局变量，默认是不使用，改为1则为使用

#define USE_SPINLOCK 0

sem_t beginSemaArr[500];

sem_t endSema;

int global_count;

spinlock slock = 0;

{

for (;;)
{

for (int i = 0; i < add_loops; i++)
{
#if USE_SPINLOCK
spin_lock(&slock);
global_count++;
spin_unlock(&slock);
#else
global_count++;
#endif
}

sem_post(&endSema);  // Notify transaction complete
}
return NULL;  // Never returns
};

int main()
{
// Initialize the semaphores
for (int i = 0; i < 500; i++)
sem_init(&beginSemaArr[i], 0, 0);

sem_init(&endSema, 0, 0);

for (int i = 0; i < 500; i++)

// Repeat the experiment ad infinitum
for (int iterations = 1; ; iterations++)
{
// 重设 global_count
global_count = 0;

for (int j = 0; j < 500; j++)
sem_post(&beginSemaArr[j]);

for (int j = 0; j < 500; j++)
sem_wait(&endSema);

// threads all finish, print global_count
if (global_count != 5000000)
printf("iter %d: global_count = %d\n", iterations, global_count);
}
return 0;  // Never returns
}
``````

### 3. 原子操作

``````#include <stdio.h>

int main(int argc, char* argv[])
{
int a = 0;
int i = 100;
for ( i = 0; i < 100; i++)
{
}
printf("%d\n", a);
return 0;
}
``````

### 4. 如果竞争很激烈呢？

``````const int N = 7;
int globalcount[N];

void fun() {
//do something
}
``````

### 5. 还能不能继续优化呢？

``````/**************************************************//**
@file include/ut0counter.h

Counter utility class

Created 2012/04/12 by Sunny Bains
*******************************************************/

#ifndef UT0COUNTER_H
#define UT0COUNTER_H

#include "univ.i"
#include <string.h>

/** CPU cache line size */
#define CACHE_LINE_SIZE		64

/** Default number of slots to use in ib_counter_t */
#define IB_N_SLOTS		64

/** Get the offset into the counter array. */
template <typename Type, int N>
struct generic_indexer_t {
/** Default constructor/destructor should be OK. */

/** @return offset within m_counter */
size_t offset(size_t index) const UNIV_NOTHROW {
return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type)));
}
};

/** Class for using fuzzy counters. The counter is not protected by any
mutex and the results are not guaranteed to be 100% accurate but close
enough. Creates an array of counters and separates each element by the
CACHE_LINE_SIZE bytes */
template <
typename Type,
int N = IB_N_SLOTS,
template<typename, int> class Indexer = thread_id_indexer_t>
class ib_counter_t {
public:
ib_counter_t() { memset(m_counter, 0x0, sizeof(m_counter)); }

~ib_counter_t()
{
}

/** If you can't use a good index id. Increment by 1. */
void inc() UNIV_NOTHROW { add(1); }

/** If you can't use a good index id.
* @param n  - is the amount to increment */
size_t	i = m_policy.offset(m_policy.get_rnd_index());

m_counter[i] += n;
}

/** Use this if you can use a unique indentifier, saves a
call to get_rnd_index().
@param i - index into a slot
@param n - amount to increment */
void add(size_t index, Type n) UNIV_NOTHROW {
size_t	i = m_policy.offset(index);

m_counter[i] += n;
}

/** If you can't use a good index id. Decrement by 1. */
void dec() UNIV_NOTHROW { sub(1); }

/** If you can't use a good index id.
* @param - n is the amount to decrement */
void sub(Type n) UNIV_NOTHROW {
size_t	i = m_policy.offset(m_policy.get_rnd_index());

m_counter[i] -= n;
}

/** Use this if you can use a unique indentifier, saves a
call to get_rnd_index().
@param i - index into a slot
@param n - amount to decrement */
void sub(size_t index, Type n) UNIV_NOTHROW {
size_t	i = m_policy.offset(index);

m_counter[i] -= n;
}

/* @return total value - not 100% accurate, since it is not atomic. */
operator Type() const UNIV_NOTHROW {
Type	total = 0;

for (size_t i = 0; i < N; ++i) {
total += m_counter[m_policy.offset(i)];
}

return(total);
}

private:
/** Indexer into the array */
Indexer<Type, N>m_policy;

/** Slot 0 is unused. */
Type		m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
};

#endif /* UT0COUNTER_H */
``````

``````Type m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
``````

1. 为什么CACHE_LINE_SIZE定义成64？因为，高速缓存Cache的读写单位是64字节
2. 为什么需要N+1个元素？因为第一个元素不用
3. 为什么第一个元素不使用？为了与CACHE_LINE_SIZE对齐
4. 多了一个元素，为什么就能与CACHE_LINE_SIZE对齐？

``````byte* srv_pad1[64];
mutex_t* kernel_mutex;