// File AesGen.cs --- an implementation of the Advanced Encryption // Standard (AES) using runtime code generation (RTCG) // Peter Sestoft * sestoft@itu.dk * 2002-05-27 // The RTCG generated code is 35 per cent faster than the best I could // do by hand (file AesFast.cs). The same IL generator generates both // the encryption routine and the decryption routine. It works for // all three key sizes (128, 192, 265 bit). // With key size 128 the RTCG code can encrypt or decrypt 134 Mbit/s // under the MS CLR under Win 2000 under VmWare under Linux on a 850 // MHz Mobile P3 laptop. Extrapolating from data given by Rijmen and // Daemen, a highly optimized implementation by Brian Gladman using // VC++ and native Pentium 3 rotate instructions (not expressible in // ANSI C) can do 280-300 Mbit/s. // For information about AES/Rijndael, see // http://csrc.nist.gov/encryption/aes/ // For AES/Rijndael performance figures, see // http://www.tcs.hut.fi/~helger/aes/rijndael.html and // http://www.gladman.uk.net/ using System; using System.Reflection; using System.Reflection.Emit; public class AesGen { private static readonly int BC = 4; // Only 128-bit blocks // Logarithm and inverse logarithms support multiplication in GF(2^m) private static readonly byte[] logtable = { 0,0,25,1,50,2,26,198,75,199,27,104,51,238,223,3, 100,4,224,14,52,141,129,239,76,113,8,200,248,105,28,193, 125,194,29,181,249,185,39,106,77,228,166,114,154,201,9,120, 101,47,138,5,33,15,225,36,18,240,130,69,53,147,218,142, 150,143,219,189,54,208,206,148,19,92,210,241,64,70,131,56, 102,221,253,48,191,6,139,98,179,37,226,152,34,136,145,16, 126,110,72,195,163,182,30,66,58,107,40,84,250,133,61,186, 43,121,10,21,155,159,94,202,78,212,172,229,243,115,167,87, 175,88,168,80,244,234,214,116,79,174,233,213,231,230,173,232, 44,215,117,122,235,22,11,245,89,203,95,176,156,169,81,160, 127,12,246,111,23,196,73,236,216,67,31,45,164,118,123,183, 204,187,62,90,251,96,177,134,59,82,161,108,170,85,41,157, 151,178,135,144,97,190,220,252,188,149,207,205,55,63,91,209, 83,57,132,60,65,162,109,71,20,42,158,93,86,242,211,171, 68,17,146,217,35,32,46,137,180,124,184,38,119,153,227,165, 103,74,237,222,197,49,254,24,13,99,140,128,192,247,112,7, }; private static readonly byte[] alogtable = { 1,3,5,15,17,51,85,255,26,46,114,150,161,248,19,53, 95,225,56,72,216,115,149,164,247,2,6,10,30,34,102,170, 229,52,92,228,55,89,235,38,106,190,217,112,144,171,230,49, 83,245,4,12,20,60,68,204,79,209,104,184,211,110,178,205, 76,212,103,169,224,59,77,215,98,166,241,8,24,40,120,136, 131,158,185,208,107,189,220,127,129,152,179,206,73,219,118,154, 181,196,87,249,16,48,80,240,11,29,39,105,187,214,97,163, 254,25,43,125,135,146,173,236,47,113,147,174,233,32,96,160, 251,22,58,78,210,109,183,194,93,231,50,86,250,21,63,65, 195,94,226,61,71,201,64,192,91,237,44,116,156,191,218,117, 159,186,213,100,172,239,42,126,130,157,188,223,122,142,137,128, 155,182,193,88,232,35,101,175,234,37,111,177,200,67,197,84, 252,31,33,99,165,244,7,9,27,45,119,153,176,203,70,202, 69,207,74,222,121,139,134,145,168,227,62,66,198,81,243,14, 18,54,90,238,41,123,141,140,143,138,133,148,167,242,13,23, 57,75,221,124,132,151,162,253,28,36,108,180,199,82,246,1, }; // The S-box and its inverse public static readonly byte[] S = { 99,124,119,123,242,107,111,197,48,1,103,43,254,215,171,118, 202,130,201,125,250,89,71,240,173,212,162,175,156,164,114,192, 183,253,147,38,54,63,247,204,52,165,229,241,113,216,49,21, 4,199,35,195,24,150,5,154,7,18,128,226,235,39,178,117, 9,131,44,26,27,110,90,160,82,59,214,179,41,227,47,132, 83,209,0,237,32,252,177,91,106,203,190,57,74,76,88,207, 208,239,170,251,67,77,51,133,69,249,2,127,80,60,159,168, 81,163,64,143,146,157,56,245,188,182,218,33,16,255,243,210, 205,12,19,236,95,151,68,23,196,167,126,61,100,93,25,115, 96,129,79,220,34,42,144,136,70,238,184,20,222,94,11,219, 224,50,58,10,73,6,36,92,194,211,172,98,145,149,228,121, 231,200,55,109,141,213,78,169,108,86,244,234,101,122,174,8, 186,120,37,46,28,166,180,198,232,221,116,31,75,189,139,138, 112,62,181,102,72,3,246,14,97,53,87,185,134,193,29,158, 225,248,152,17,105,217,142,148,155,30,135,233,206,85,40,223, 140,161,137,13,191,230,66,104,65,153,45,15,176,84,187,22, }; public static readonly byte[] Si = { 82,9,106,213,48,54,165,56,191,64,163,158,129,243,215,251, 124,227,57,130,155,47,255,135,52,142,67,68,196,222,233,203, 84,123,148,50,166,194,35,61,238,76,149,11,66,250,195,78, 8,46,161,102,40,217,36,178,118,91,162,73,109,139,209,37, 114,248,246,100,134,104,152,22,212,164,92,204,93,101,182,146, 108,112,72,80,253,237,185,218,94,21,70,87,167,141,157,132, 144,216,171,0,140,188,211,10,247,228,88,5,184,179,69,6, 208,44,30,143,202,63,15,2,193,175,189,3,1,19,138,107, 58,145,17,65,79,103,220,234,151,242,207,206,240,180,230,115, 150,172,116,34,231,173,53,133,226,249,55,232,28,117,223,110, 71,241,26,113,29,41,197,137,111,183,98,14,170,24,190,27, 252,86,62,75,198,210,121,32,154,219,192,254,120,205,90,244, 31,221,168,51,136,7,199,49,177,18,16,89,39,128,236,95, 96,81,127,169,25,181,74,13,45,229,122,159,147,201,156,239, 160,224,59,77,174,42,245,176,200,235,187,60,131,83,153,97, 23,43,4,126,186,119,214,38,225,105,20,99,85,33,12,125 }; // Round key constants private static readonly byte[] rcon = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91 }; private static byte mul(byte a, byte b) { // Multiply two elements of GF(2^m) if (a!=0 && b!=0) return alogtable[(logtable[a] + logtable[b]) % 255]; else return 0; } private static uint innerProd(uint coeff, uint b) { uint res = 0; for (int i = 3; i >= 0; i--) { byte sum = 0; for (int s = 24; s >= 0; s -= 8) sum ^= mul((byte)(coeff >> s), (byte)(b >> s)); res |= (uint)(sum << (i * 8)); coeff = coeff << 24 | coeff >> 8; } return res; } private static uint[] InvMixColumn(uint[] a) { uint coeff = 0x0e0b0d09; uint[] b = new uint[4]; for (int j = 0; j < 4; j++) b[j] = innerProd(coeff, a[j]); return b; } // The T0, T1, T2, T3 tables contain packets of multiplied S-box entries public readonly static uint[] T0 = new uint[256], T1 = new uint[256], T2 = new uint[256], T3 = new uint[256]; // The U0, U1, U2, U3 tables contain packets of multiplied Si-box entries public readonly static uint[] U0 = new uint[256], U1 = new uint[256], U2 = new uint[256], U3 = new uint[256]; // Initialize static tables static AesGen() { for (int i=0; i<256; i++) { uint p; T0[i] = p = (uint)(mul(2,S[i])<<24 | S[i]<<16 | S[i]<<8 | mul(3,S[i])); T1[i] = p = p << 24 | p >> 8; T2[i] = p = p << 24 | p >> 8; T3[i] = p << 24 | p >> 8; U0[i] = p = (uint)(mul(0x0e, Si[i])<<24 | mul(0x09, Si[i])<<16 | mul(0x0d, Si[i])<< 8 | mul(0x0b, Si[i])); U1[i] = p = p << 24 | p >> 8; U2[i] = p = p << 24 | p >> 8; U3[i] = p << 24 | p >> 8; } } // Create the round keys from a given key k, for encryption and decryption // The given key must be 128, 192, or 256 bit (16, 24, or 32 bytes) public static void KeySchedule (byte[] k, out uint[][] W, out uint[][] Wi) { int KC, ROUNDS; switch (k.Length) { case 16: KC = 4; ROUNDS = 10; break; case 24: KC = 6; ROUNDS = 12; break; case 32: KC = 8; ROUNDS = 14; break; default: throw new Exception("AesGen.KeySchedule"); } byte[,] tk = new byte[4, KC]; for (int i = 0; i < 4; i++) for (int j = 0; j < KC; j++) tk[i,j] = k[i+4*j]; byte[][,] Wt = new byte[ROUNDS+1][,]; for (int r = 0; r 1) { int methods = int.Parse(args[1]); Timer t = new Timer(); for (int i=0; i 1) { int methods = int.Parse(args[1]); Timer t = new Timer(); for (int i=0; i0; i--) encrypt(datablock); Console.WriteLine("Encrypting {0:F1} Kbit/second", count * 0.128 / t.Check()); Print("Encrypted plaintext", datablock); } MethodInfo mydecrypt = ty.GetMethod("MyDecrypt"); { Timer t = new Timer(); Crypter decrypt = (Crypter)Delegate.CreateDelegate(typeof(Crypter), mydecrypt); for (int i=count; i>0; i--) decrypt(datablock); Console.WriteLine("Decrypting {0:F1} Kbit/second", count * 0.128 / t.Check()); Print("Decrypted ciphertext", datablock); } } public static void CryptGen(ILGenerator ilg, bool encrypt, uint[][] rk) { int ROUNDS = rk.Length - 1; uint[] k = rk[0]; // Allocate local variables a0-a3 LocalBuilder[] a = new LocalBuilder[4]; for (int j = 0; j < 4; j++) a[j] = ilg.DeclareLocal(typeof(uint)); // Allocate local variables t0-t3 LocalBuilder[] t = new LocalBuilder[4]; for (int j = 0; j < 4; j++) t[j] = ilg.DeclareLocal(typeof(uint)); // Allocate local variables T0-T3 or U0-U3, and bind to static fields LocalBuilder[] T = new LocalBuilder[4]; for (int j = 0; j < 4; j++) T[j] = ilg.DeclareLocal(typeof(uint[])); String TOrU = encrypt ? "T" : "U"; for (int j = 0; j < 4; j++) { ilg.Emit(OpCodes.Ldsfld, typeof(AesGen).GetField(TOrU+j)); ilg.Emit(OpCodes.Stloc, T[j]); } // Generate code to add first round key for (int j = 0; j < 4; j++) { ilg.Emit(OpCodes.Ldarg_0); ilg.Emit(OpCodes.Ldc_I4, j * 4); ilg.Emit(OpCodes.Ldelem_U1); ilg.Emit(OpCodes.Ldc_I4, 24); ilg.Emit(OpCodes.Shl); for (int i = 1; i < 4; i++) { ilg.Emit(OpCodes.Ldarg_0); ilg.Emit(OpCodes.Ldc_I4, i + j * 4); ilg.Emit(OpCodes.Ldelem_U1); ilg.Emit(OpCodes.Ldc_I4, 24 - i * 8); ilg.Emit(OpCodes.Shl); ilg.Emit(OpCodes.Or); } ilg.Emit(OpCodes.Ldc_I4, k[j]); ilg.Emit(OpCodes.Xor); ilg.Emit(OpCodes.Stloc, a[j]); } // Generate code for the intermediate rounds for (int r = 1; r < ROUNDS; r++) { k = rk[r]; for (int j = 0; j < 4; j++) { ilg.Emit(OpCodes.Ldc_I4, k[j]); for (int i = 0; i < 4; i++) { ilg.Emit(OpCodes.Ldloc, T[i]); ilg.Emit(OpCodes.Ldloc, a[encrypt ? (i+j) % 4 : (j+4-i) % 4]); if (i != 3) { ilg.Emit(OpCodes.Ldc_I4, 24 - 8 * i); ilg.Emit(OpCodes.Shr_Un); } if (i != 0) { ilg.Emit(OpCodes.Ldc_I4, 0xFF); ilg.Emit(OpCodes.And); } ilg.Emit(OpCodes.Ldelem_U4); ilg.Emit(OpCodes.Xor); } ilg.Emit(OpCodes.Stloc, t[j]); } for (int j = 0; j < 4; j++) { ilg.Emit(OpCodes.Ldloc, t[j]); ilg.Emit(OpCodes.Stloc, a[j]); } } // Allocate local variable S or Si, and bind to static field LocalBuilder S = ilg.DeclareLocal(typeof(byte[])); String SOrSinv = encrypt ? "S" : "Si"; ilg.Emit(OpCodes.Ldsfld, typeof(AesGen).GetField(SOrSinv)); ilg.Emit(OpCodes.Stloc, S); // Generate code for the last round k = rk[ROUNDS]; for (int j = 0; j < 4; j++) { for (int i = 0; i < 4; i++) { ilg.Emit(OpCodes.Ldarg_0); ilg.Emit(OpCodes.Ldc_I4, i + 4 * j); ilg.Emit(OpCodes.Ldloc, S); ilg.Emit(OpCodes.Ldloc, a[encrypt ? (i+j) % 4 : (j+4-i) % 4]); if (i != 3) { ilg.Emit(OpCodes.Ldc_I4, 24 - 8 * i); ilg.Emit(OpCodes.Shr_Un); } if (i != 0) { ilg.Emit(OpCodes.Ldc_I4, 0xFF); ilg.Emit(OpCodes.And); } ilg.Emit(OpCodes.Ldelem_U1); ilg.Emit(OpCodes.Ldc_I4, (k[j] >> (24 - 8 * i)) & 0xFF); ilg.Emit(OpCodes.Xor); ilg.Emit(OpCodes.Stelem_I1); } } ilg.Emit(OpCodes.Ret); } // The type of encryption and decryption functions public delegate void Crypter(byte[] a); // Converting from 4x4 byte array to 4 uint array public static uint[] ToUints(byte[,] bs) { uint[] ws = new uint[4]; for (int j=0; j<4; j++) ws[j] = (uint)(bs[0,j] << 24 | bs[1,j] << 16 | bs[2,j] << 8 | bs[3,j]); return ws; } public static void Print(String name, byte[,] a) { Console.WriteLine("\n" + name + ":\n"); int imax = a.GetLength(0), jmax = a.GetLength(1); for (int i=0; i