Tuesday, August 27, 2013

Setops


SETOPS: A tiny C library for set operations

Wrote this piece of code for my own use. Like a very good programmer, I googled around a bit before writing this but couldn't find anything as simple and lightweight.
About the code:
Set elements are represented by bits in an array of 64-bit integers. Set operations- union/intersection/set difference etc are performed as bit-wise operations making it pretty fast. My favorite is the bitcnt() function that computes number of bits that are =1 in the given 64-bit number. It uses divide and conquer approach(same as merge sort) to do the job in O(log n) where n is number of bits in given number (Here it is 64: hence 6 steps). Alternately, it computes Hamming weight of a given 64-bit number.
Compilation:
gcc -o <your application> <yourfile>.c setops.c -lm
If you find it useful, please let me know with comments. 

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
/*****************************************************************/
/**************Jayant Apte. Drexel University.8/27/2013***************************************/
/**********************setops.h*********************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <stdint.h>
#include <inttypes.h>
#include <math.h>

typedef struct
{
 uint64_t* elements;
 int nchunks;
} set;

typedef set set_t[1];

void setops_insert_ul(set_t, unsigned long newel);
void setops_delete_ul(set_t, unsigned long delel);
void setops_intersection(set_t,set_t,set_t);
void setops_union(set_t,set_t,set_t);
void setops_setdiff(set_t,set_t,set_t);
void setops_init(set_t);
void setops_printset_bits(set_t);
void setops_printset_pretty(set_t);
unsigned int bitcnt(uint64_t);
unsigned int setops_cardinality(set_t);
int setops_ismember_ul(set_t, unsigned long);
void setops_clear(set_t);
int* setops_set2intarray1(set_t ,unsigned int);

/**********************setops.c*********************************************************/
#include "setops.h"

void setops_init(set_t set1)
{
 set1->elements=calloc(1,sizeof(uint64_t));
 set1->nchunks=1;
}

void setops_printset_bits(set_t set1)
{
 int i,j;
 uint64_t n;
 for(i=0;i<set1->nchunks;i++)
 {
  n=set1->elements[i];
  for(j=0;j<64;j++)
  {
   if ((n & ((UINT64_MAX-1)/2)+1)>0LLU)
   {
    printf("1");
   }
   else
   {
    printf("0");
   }
   n <<= 1;
  }
 }
printf("\n");
}

void setops_insert_ul(set_t set1, unsigned long newel)
{
 uint64_t n;
 float k;
 int oldnchunks=set1->nchunks;
 int chunkindex,bitindex,i;
 chunkindex=(int)(newel/64) +2;
 //printf("chunkindex is %d",chunkindex);
 if (chunkindex>set1-> nchunks) // we need to augment!
 {
  set1->elements=realloc(set1->elements,chunkindex*sizeof(uint64_t));
  set1->nchunks=chunkindex;
 }
 for(i=oldnchunks;i<set1->nchunks;i++)
 {
  set1->elements[i]=0LLU;
 }
 set1->elements[chunkindex-1]|=1LLU<<(64-(newel%64));
} 

int setops_ismember_ul(set_t set1, unsigned long el)
{
 uint64_t n;
 float k;
 int chunkindex,bitindex,j,i;
 chunkindex=(int)(el/64) +2;
 if (chunkindex>set1->nchunks) // not a member
 {
  return 0;
 }
 else
 {
  if((set1->elements[chunkindex-1] & (1LLU<<(64-(el%64))))>0LLU) // member!
  {
   return 1;
  }
  else // not a member
  {
   return 0;
  }
 }
}

void setops_delete_ul(set_t set1, unsigned long delel)
{
 uint64_t n;
 float k;
 int chunkindex,bitindex;
 chunkindex=ceil(delel/64) +1;
 if (chunkindex>set1-> nchunks) // delel is not in the set
 {
  return;
 }
 n=(uint64_t)pow(2,64-(delel%64));
 set1->elements[chunkindex-1]=set1->elements[chunkindex-1] &= ~(1LLU << (64-(delel%64)));
} 

void setops_intersection(set_t intersection,set_t set1,set_t set2)
{
 // no need to initialize intersection
 int i,k;
 int oldnchunks=set1->nchunks;
 setops_init(intersection);
 k=set1->nchunks<set2->nchunks ? set1->nchunks: set2->nchunks;
 if(k>intersection->nchunks) //augment intersection if needed 
 {
  intersection->elements=realloc(intersection->elements,k*sizeof(uint64_t));
  intersection->nchunks=k;
 }
 for(i=oldnchunks;i<intersection->nchunks;i++)
 {
  intersection->elements[i]=0LLU;
 }
 for(i=0;i<k;i++)
 {
  intersection->elements[i]=set1->elements[i] & set2->elements[i];
 }
}


unsigned int bitcnt(uint64_t num)
{
 // a simple divide and conquer approach thats will find number of 1s(hamming weight)
 // in an n-bit number in log_2(n) steps
 unsigned int k;
 uint64_t x=num;
 x = (x & 0x5555555555555555) + ((x >> 1) & 0x5555555555555555);
 x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
 x = (x & 0x0F0F0F0F0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F0F0F0F0F);
 x = (x & 0x00FF00FF00FF00FF) + ((x >> 8) & 0x00FF00FF00FF00FF);
 x = (x & 0x0000FFFF0000FFFF) + ((x >> 16) & 0x0000FFFF0000FFFF);
 x = (x & 0x00000000FFFFFFFF) + ((x >> 32) & 0x00000000FFFFFFFF);
 k=(unsigned int)x;// we can do this since the number will never exceed 64
 return x;
}

unsigned int setops_cardinality(set_t set1)
{
 int i;
 unsigned int cnt=0;
 for(i=0;i<set1->nchunks;i++)
 {
  cnt+=bitcnt(set1->elements[i]);
 }
 return cnt;
}

void setops_union(set_t setunion,set_t set1,set_t set2)
{
 // no need to initialize setunion
 uint64_t* elements1,*elements2;
 int i,k,l;
 setops_init(setunion);
 k=set1->nchunks<set2->nchunks ? set2->nchunks: set1->nchunks;//max chunks
 elements1=set1->nchunks<set2->nchunks ? set2->elements: set1->elements;// elements of larger set
 l=set1->nchunks<set2->nchunks ? set1->nchunks: set2->nchunks;//min chunks
 elements2=set1->nchunks<set2->nchunks ? set1->elements: set2->elements;// elements of smaller set
 if(k>setunion->nchunks) //augment intersection if needed 
 {
  setunion->elements=realloc(setunion->elements,k*sizeof(uint64_t));
  setunion->nchunks=k;
 }
 for(i=0;i<k;i++)
 {
  if(i<l)
  {
   setunion->elements[i]=elements1[i] | elements2[i];
  }
  else
  {
   setunion->elements[i]=elements1[i];
  }
 }
}

void setops_setdiff(set_t setdiff,set_t set1,set_t set2)
{
 int i;
 setdiff->elements=calloc(set1->nchunks,sizeof(uint64_t)); 
 setdiff->nchunks=set1->nchunks;
 for(i=0;i<set1->nchunks;i++)
 {
  setdiff->elements[i]=set1->elements[i] & (~set2->elements[i]);
 }
}

void setops_printset_pretty(set_t set1)
{
 int i;
 long j;
 printf("\n{");
 for(i=0;i<set1->nchunks;i++)
 {
  for(j=0;j<64;j++)
  {
   if(setops_ismember_ul(set1,i*64+j+1)==1)
   {
    printf("  %ld",i*64+j+1);
   }
  }
 }
 printf("  }\n");
}

void setops_clear(set_t set1)
{
 free(set1->elements);
}

int* setops_set2intarray1(set_t set1,unsigned int cardinality)
{
 int i,k=0;
 long j;
 int* setarray;
 setarray=malloc(cardinality*sizeof(int));
 for(i=0;i<set1->nchunks;i++)
 {
  for(j=0;j<64;j++)
  {
   if(setops_ismember_ul(set1,i*64+j+1)==1)
   {
    setarray[k]=i*64+j+1;
    k++;
   }
  }
 }
 return setarray;
}

No comments:

Post a Comment

About Me

My photo
This is Jayant Apte. I am a Ph.D. student at Drexel University. I am interested in variety of problems on the intersection of information theory and computer science. Lately I have been working on multi-source network coding problem. Polyhedra and matroids are some of the things I have been recently working on. I write a lot of code. It is mostly C, OpenMPI or CUDA C. I am an avid Chelsea supporter. I don't get much time to watch all their games but I do catch the highlights when I can.

Blog Archive