summary refs log tree commit diff
diff options
context:
space:
mode:
authortzlil <tzlils@protonmail.com>2023-10-31 12:44:26 +0200
committertzlil <tzlils@protonmail.com>2023-10-31 12:44:26 +0200
commit3a114d7766919b3a26fad4b228c60f34f6f46c20 (patch)
tree1b6ff97d72284045ef6608931479636e8bae9564
initial commit
-rw-r--r--Simulator.hs79
-rw-r--r--compiler.c333
-rw-r--r--instructions.txt908
3 files changed, 1320 insertions, 0 deletions
diff --git a/Simulator.hs b/Simulator.hs
new file mode 100644
index 0000000..1718b60
--- /dev/null
+++ b/Simulator.hs
@@ -0,0 +1,79 @@
+module Main where
+
+import qualified Data.Sequence as S
+import Control.Monad.State
+import Data.Word ( Word16 )
+import Data.Maybe (isNothing)
+import Data.Bits ( Bits((.&.), complement, shiftR) )
+import Data.Sequence ((!?))
+import Debug.Trace
+
+
+data Machine = Machine {
+    pc :: Int,
+    registers :: S.Seq Int,
+    memory :: S.Seq Int
+} deriving Show
+
+mkMachine :: [String] -> Machine
+mkMachine f = Machine 0 (S.replicate 8 0) (S.fromList $ map read f)
+
+data MachineError = Halt | MemoryAccessError deriving (Show,Eq)
+execute :: State Machine (Maybe MachineError)
+execute = do
+    inst <- gets (S.lookup.pc<*>memory)
+    -- increment pc
+    -- modify $ Machine.((+1).pc)<*>memory<*>registers
+    regs <- gets registers
+    mem <- gets memory
+    pc <- gets $ (+1).pc
+    case inst of
+        Nothing -> return $ Just MemoryAccessError
+        Just inst ->
+            let Just regA = S.lookup (inst `shiftR` 19 .&. 0b111) regs
+                Just regB = S.lookup (inst `shiftR` 16 .&. 0b111) regs
+                destReg = inst .&. 0b111
+                offsetField = inst .&. 0b1111111111111111
+                wrapAdd x y = (fromIntegral (fromIntegral (x+y) :: Word16) :: Int)
+                 in case inst `shiftR` 22 .&. 0b111 of
+                -- ADD
+                0 -> do
+                    put $ Machine pc (S.update destReg (regA+regB) regs) mem
+                    return Nothing
+                -- NAND
+                1 -> do
+                    put $ Machine pc (S.update destReg (complement regA.&.regB) regs) mem
+                    return Nothing
+                -- LW
+                2 -> do
+                    case S.lookup (wrapAdd offsetField regA) mem of
+                        Nothing -> return $ Just MemoryAccessError
+                        Just v -> do
+                            put $ Machine pc (S.update (inst `shiftR` 16 .&. 0b11) v regs) mem
+                            return Nothing
+                -- SW
+                3 -> do
+                    put $ Machine pc regs (S.update (wrapAdd offsetField regA) regB mem)
+                    return Nothing
+                --- BEQ
+                4 -> do
+                    if regA==regB then 
+                        put $ Machine (wrapAdd offsetField pc)  regs mem
+                    else put $ Machine pc regs mem
+                    return Nothing
+                -- XXX
+                5 -> return Nothing
+                -- HALT
+                6 -> return $ Just Halt
+                -- NOOP
+                7 -> return Nothing
+
+
+main :: IO ()
+main = do
+    input <- getContents
+    let m = until ((/=Nothing).evalState execute) (execState execute) $ mkMachine (lines input)
+    case evalState execute m of
+        Just Halt -> putStrLn "Machine halted"
+        Just MemoryAccessError -> putStrLn "Out of bounds memory access"
+    print m
\ No newline at end of file
diff --git a/compiler.c b/compiler.c
new file mode 100644
index 0000000..18983be
--- /dev/null
+++ b/compiler.c
@@ -0,0 +1,333 @@
+/* Assembler for LC */

+

+#include <stdlib.h>

+#include <stdio.h>

+#include <string.h>

+#define MAXLINELENGTH 1000

+#define MAXNUMLABELS 65536

+#define MAXLABELLENGTH 7 /* includes the null character termination */

+

+#define ADD 0

+#define NAND 1

+#define LW 2

+#define SW 3

+#define BEQ 4

+#define JALR 5

+#define HALT 6

+#define NOOP 7

+

+/*

+ * Read and parse a line of the assembly-language file.  Fields are returned

+ * in label, opcode, arg0, arg1, arg2 (these strings must have memory already

+ * allocated to them).

+ *

+ * Return values:

+ *     0 if reached end of file

+ *     1 if all went well

+ *

+ * exit(1) if line is too long.

+ */

+int

+readAndParse(FILE *inFilePtr, char *label, char *opcode, char *arg0,

+    char *arg1, char *arg2)

+{

+    char line[MAXLINELENGTH];

+    char *ptr = line;

+

+    /* delete prior values */

+    label[0] = opcode[0] = arg0[0] = arg1[0] = arg2[0] = '\0';

+

+    /* read the line from the assembly-language file */

+    if (fgets(line, MAXLINELENGTH, inFilePtr) == NULL) {

+	/* reached end of file */

+        return(0);

+    }

+

+    /* check for line too long */

+    if (strlen(line) == MAXLINELENGTH-1) {

+	printf("error: line too long\n");

+	exit(1);

+    }

+

+    /* is there a label? */

+    ptr = line;

+    if (sscanf(ptr, "%[^\t\n ]", label)) {

+	/* successfully read label; advance pointer over the label */

+        ptr += strlen(label);

+    }

+

+    /*

+     * Parse the rest of the line.  Would be nice to have real regular

+     * expressions, but scanf will suffice.

+     */

+    sscanf(ptr, "%*[\t\n\r ]%[^\t\n\r ]%*[\t\n\r ]%[^\t\n\r ]%*[\t\n\r ]%[^\t\n\r ]%*[\t\n\r ]%[^\t\n\r ]",

+        opcode, arg0, arg1, arg2);

+    return(1);

+}

+

+int

+translateSymbol(char labelArray[MAXNUMLABELS][MAXLABELLENGTH],

+    int labelAddress[MAXNUMLABELS], int numLabels, char *symbol)

+{

+    int i;

+

+    /* search through address label table */

+    for (i=0; i<numLabels && strcmp(symbol, labelArray[i]); i++) {

+    }

+

+    if (i>=numLabels) {

+	printf("error: missing label %s\n", symbol);

+	exit(1);

+    }

+

+    return(labelAddress[i]);

+}

+

+int

+isNumber(char *string)

+{

+    /* return 1 if string is a number */

+    int i;

+    return( (sscanf(string, "%d", &i)) == 1);

+}

+

+/*

+ * Test register argument; make sure it's in range and has no bad characters.

+ */

+void

+testRegArg(char *arg)

+{

+    int num;

+    char c;

+

+    if (atoi(arg) < 0 || atoi(arg) > 7) {

+	printf("error: register out of range\n");

+	exit(2);

+    }

+    if (sscanf(arg, "%d%c", &num, &c) != 1) {

+	printf("bad character in register argument\n");

+	exit(2);

+    }

+}

+

+/*

+ * Test addressField argument.

+ */

+void

+testAddrArg(char *arg)

+{

+    int num;

+    char c;

+

+    /* test numeric addressField */

+    if (isNumber(arg)) {

+	if (sscanf(arg, "%d%c", &num, &c) != 1) {

+	    printf("bad character in addressField\n");

+	    exit(2);

+	}

+    }

+}

+

+/*

+ * main function

+ */

+int

+main(int argc, char *argv[])

+{

+    char *inFileString, *outFileString;

+    FILE *inFilePtr, *outFilePtr;

+    int address;

+    char label[MAXLINELENGTH], opcode[MAXLINELENGTH], arg0[MAXLINELENGTH],

+	arg1[MAXLINELENGTH], arg2[MAXLINELENGTH], argTmp[MAXLINELENGTH];

+    int i;

+    int numLabels=0;

+    int num;

+    int addressField;

+

+    char labelArray[MAXNUMLABELS][MAXLABELLENGTH];

+    int labelAddress[MAXNUMLABELS];

+

+    if (argc != 3) {

+	printf("error: usage: %s <assembly-code-file> <machine-code-file>\n",

+	    argv[0]);

+	exit(1);

+    }

+

+    inFileString = argv[1];

+    outFileString = argv[2];

+

+    inFilePtr = fopen(inFileString, "r");

+    if (inFilePtr == NULL) {

+	printf("error in opening %s\n", inFileString);

+	exit(1);

+    }

+    outFilePtr = fopen(outFileString, "w");

+    if (outFilePtr == NULL) {

+	printf("error in opening %s\n", outFileString);

+	exit(1);

+    }

+

+    /* map symbols to addresses */

+

+    /* assume address start at 0 */

+    for (address=0; readAndParse(inFilePtr, label, opcode, arg0, arg1, arg2);

+	    address++) {

+	/*

+	printf("%d: label=%s, opcode=%s, arg0=%s, arg1=%s, arg2=%s\n",

+	    address, label, opcode, arg0, arg1, arg2);

+	*/

+

+	/* check for illegal opcode */

+	if (strcmp(opcode, "add") && strcmp(opcode, "nand") &&

+	        strcmp(opcode, "lw") && strcmp(opcode, "sw") &&

+		strcmp(opcode, "beq") && strcmp(opcode, "jalr") &&

+		strcmp(opcode, "halt") && strcmp(opcode, "noop") &&

+		strcmp(opcode, ".fill") ) {

+	    printf("error: unrecognized opcode %s at address %d\n", opcode,

+		    address);

+	    exit(1);

+	}

+

+	/* check register fields */

+	if (!strcmp(opcode, "add") || !strcmp(opcode, "nand") ||

+		!strcmp(opcode, "lw") || !strcmp(opcode, "sw") ||

+		!strcmp(opcode, "beq") || !strcmp(opcode, "jalr")) {

+	    testRegArg(arg0);

+	    testRegArg(arg1);

+	}

+	if (!strcmp(opcode, "add") || !strcmp(opcode, "nand")) {

+	    testRegArg(arg2);

+	}

+

+	/* check addressField */

+	if (!strcmp(opcode, "lw") || !strcmp(opcode, "sw") ||

+		!strcmp(opcode, "beq")) {

+	    testAddrArg(arg2);

+	}

+	if (!strcmp(opcode, ".fill")) {

+	    testAddrArg(arg0);

+	}

+

+	/* check for enough arguments */

+	if ( (strcmp(opcode, "halt") && strcmp(opcode, "noop") &&

+	      strcmp(opcode, ".fill")  && strcmp(opcode, "jalr")

+	      && arg2[0]=='\0') ||

+	     (!strcmp(opcode, "jalr") && arg1[0]=='\0') ||

+	     (!strcmp(opcode, ".fill") && arg0[0]=='\0')) {

+	    printf("error at address %d: not enough arguments\n", address);

+	    exit(2);

+	}

+

+	if (label[0] != '\0') {

+	    /* check for labels that are too long */

+	    if (strlen(label) >= MAXLABELLENGTH) {

+		printf("label too long\n");

+		exit(2);

+	    }

+

+	    /* make sure label starts with letter */

+	    if (! sscanf(label, "%[a-zA-Z]", argTmp) ) {

+	        printf("label doesn't start with letter\n");

+		exit(2);

+	    }

+

+	    /* make sure label consists of only letters and numbers */

+	    sscanf(label, "%[a-zA-Z0-9]", argTmp);

+	    if (strcmp(argTmp, label)) {

+	        printf("label has character other than letters and numbers\n");

+		exit(2);

+	    }

+

+	    /* look for duplicate label */

+	    for (i=0; i<numLabels; i++) {

+		if (!strcmp(label, labelArray[i])) {

+		    printf("error: duplicate label %s at address %d\n",

+			label, address);

+		    exit(1);

+		}

+	    }

+	    /* see if there are too many labels */

+	    if (numLabels >= MAXNUMLABELS) {

+		printf("error: too many labels (label=%s)\n", label);

+		exit(2);

+	    }

+

+	    strcpy(labelArray[numLabels], label);

+	    labelAddress[numLabels++] = address;

+	}

+    }

+

+    for (i=0; i<numLabels; i++) {

+	/* printf("%s = %d\n", labelArray[i], labelAddress[i]); */

+    }

+

+    /* now do second pass (print machine code, with symbols filled in as

+	addresses) */

+    rewind(inFilePtr);

+    for (address=0; readAndParse(inFilePtr, label, opcode, arg0, arg1, arg2);

+	    address++) {

+	if (!strcmp(opcode, "add")) {

+	    num = (ADD << 22) | (atoi(arg0) << 19) | (atoi(arg1) << 16)

+		    | atoi(arg2);

+	} else if (!strcmp(opcode, "nand")) {

+	    num = (NAND << 22) | (atoi(arg0) << 19) | (atoi(arg1) << 16)

+		    | atoi(arg2);

+	} else if (!strcmp(opcode, "jalr")) {

+	    num = (JALR << 22) | (atoi(arg0) << 19) | (atoi(arg1) << 16);

+	} else if (!strcmp(opcode, "halt")) {

+	    num = (HALT << 22);

+	} else if (!strcmp(opcode, "noop")) {

+	    num = (NOOP << 22);

+	} else if (!strcmp(opcode, "lw") || !strcmp(opcode, "sw") ||

+		   !strcmp(opcode, "beq")) {

+	    /* if arg2 is symbolic, then translate into an address */

+	    if (!isNumber(arg2)) {

+		addressField = translateSymbol(labelArray, labelAddress,

+					    numLabels, arg2);

+		/*

+		printf("%s being translated into %d\n", arg2, addressField);

+		*/

+		if (!strcmp(opcode, "beq")) {

+		    addressField = addressField-address-1;

+		}

+	    } else {

+		addressField = atoi(arg2);

+	    }

+

+

+	    if (addressField < -32768 || addressField > 32767) {

+		printf("error: offset %d out of range\n", addressField);

+		exit(1);

+	    }

+

+	    /* truncate the offset field, in case it's negative */

+	    addressField = addressField & 0xFFFF;

+

+	    if (!strcmp(opcode, "beq")) {

+		num = (BEQ << 22) | (atoi(arg0) << 19) | (atoi(arg1) << 16)

+		    | addressField;

+	    } else {

+		/* lw or sw */

+		if (!strcmp(opcode, "lw")) {

+		    num = (LW << 22) | (atoi(arg0) << 19) |

+			    (atoi(arg1) << 16) | addressField;

+		} else {

+		    num = (SW << 22) | (atoi(arg0) << 19) |

+			    (atoi(arg1) << 16) | addressField;

+		}

+	    }

+	} else if (!strcmp(opcode, ".fill")) {

+	    if (!isNumber(arg0)) {

+		num = translateSymbol(labelArray, labelAddress, numLabels,

+					arg0);

+	    } else {

+		num = atoi(arg0);

+	    }

+	}

+	/* printf("(address %d): %d (hex 0x%x)\n", address, num, num); */

+	fprintf(outFilePtr, "%d\n", num);

+    }

+

+    exit(0);

+}

+

diff --git a/instructions.txt b/instructions.txt
new file mode 100644
index 0000000..71093b4
--- /dev/null
+++ b/instructions.txt
@@ -0,0 +1,908 @@
+			Project 1-- CDA 3100 Worth: 10 points
+
+1. Purpose
+
+This project is intended to help you understand the instructions of a very
+simple assembly language and how to simulate the execution of the resulting
+machine code representation of a program.
+
+2. Problem
+
+In this project you will write a behavioral simulator for the machine code
+created using the assembler code provided.  This simulator will read in a text file 
+consisting of LC3100 machine code instructions (represented as decimal values),
+and execute the program, then display the values of register files and memory
+after each instruction is completed. Running any reasonable length program will
+generate a large amount of output, but it will make debugging easier.
+
+3. LC3100 Instruction-Set Architecture
+
+For the first several projects, you will be gradually "building" the LC3100
+(Little Computer for  CDA 3100). The LC3100 is very simple, but it is general
+enough to solve complex problems. For this project, you will only need to know
+the instruction set and instruction format of the LC3100.
+
+The LC3100 is an 8-register, 32-bit computer.  All addresses are
+word-addresses.  The LC3100 has 65536 words of memory.  By assembly-language
+convention, register 0 will always contain 0 (i.e. the machine will not enforce
+this, but no assembly-language program should ever change register 0 from its
+initial value of 0).
+
+There are 4 instruction formats (bit 0 is the least-significant bit).  Bits
+31-25 are unused for all instructions, and should always be 0.
+
+R-type instructions (add, nand):
+    bits 24-22: opcode
+    bits 21-19: reg A
+    bits 18-16: reg B
+    bits 15-3:  unused (should all be 0)
+    bits 2-0:   destReg
+
+I-type instructions (lw, sw, beq):
+    bits 24-22: opcode
+    bits 21-19: reg A
+    bits 18-16: reg B
+    bits 15-0:  offsetField (an 16-bit, 2's complement number with a range of
+		    -32768 to 32767)
+
+O-type instructions (halt, noop):
+    bits 24-22: opcode
+    bits 21-0:  unused (should all be 0)
+
+-------------------------------------------------------------------------------
+Table 1: Description of Machine Instructions
+-------------------------------------------------------------------------------
+Assembly language 	Opcode in binary		Action
+name for instruction	(bits 24, 23, 22)
+-------------------------------------------------------------------------------
+add (R-type format)	000 			add contents of regA with
+						contents of regB, store
+						results in destReg.
+
+nand (R-type format)	001			nand contents of regA with
+						contents of regB, store
+						results in destReg.
+
+lw (I-type format)	010			load regB from memory. Memory
+						address is formed by adding
+						offsetField with the contents of
+						regA.
+
+sw (I-type format)	011			store regB into memory. Memory
+						address is formed by adding
+						offsetField with the contents of
+						regA.
+
+beq (I-type format)	100			if the contents of regA and
+						regB are the same, then branch
+						to the address PC+1+offsetField,
+						where PC is the address of the
+						beq instruction.
+
+xxx (O-type format)	101			Unused for this assignment.
+
+halt (O-type format)	110			increment the PC (as with all
+						instructions), then halt the
+						machine (let the simulator
+						notice that the machine
+						halted).
+
+noop (O-type format)	111			do nothing.
+-------------------------------------------------------------------------------
+
+4. Assembly Code
+
+For this (and later assignments, you are provided an assembler to enable you to 
+write test cases in LC3100 assembly code instead of LC3100 machine code. 
+
+The format for a line of assembly code is (<white> means a series of tabs
+and/or spaces):
+
+label<white>instruction<white>field0<white>field1<white>field2<white>comments
+
+The leftmost field on a line is the label field.  Valid labels contain a
+maximum of 6 characters and can consist of letters and numbers (but must start
+with a letter). The label is optional (the white space following the label
+field is required).  Labels make it much easier to write assembly-language
+programs, since otherwise you would need to modify all address fields each time
+you added a line to your assembly-language program!
+
+After the optional label is white space.  Then follows the instruction field,
+where the instruction can be any of the assembly-language instruction names
+listed in the above table.  After more white space comes a series of fields.
+All fields are given as decimal numbers or labels.  The number of fields
+depends on the instruction, and unused fields should be ignored (treat them
+like comments).
+
+    R-type instructions (add, nand) instructions require 3 fields: field0
+    is regA, field1 is regB, and field2 is destReg.
+
+    I-type instructions (lw, sw, beq) require 3 fields: field0 is regA, field1
+    is regB, and field2 is either a numeric value for offsetField or a symbolic
+    address.  Numeric offsetFields can be positive or negative; symbolic
+    addresses are discussed below.
+
+    O-type instructions (noop and halt) require no fields.
+
+Symbolic addresses refer to labels.  For lw or sw instructions, the assembler
+should compute offsetField to be equal to the address of the label.  This could
+be used with a zero base register to refer to the label, or could be used with
+a non-zero base register to index into an array starting at the label.  For beq
+instructions, the assembler should translate the label into the numeric
+offsetField needed to branch to that label.
+
+After the last used field comes more white space, then any comments.  The
+comment field ends at the end of a line.  Comments are vital to creating
+understandable assembly-language programs, because the instructions themselves
+are rather cryptic.
+
+In addition to LC3100 instructions, an assembly-language program may contain
+directions for the assembler. The only assembler directive we will use is .fill
+(note the leading period). .fill tells the assembler to put a number into the
+place where the instruction would normally be stored. .fill instructions use
+one field, which can be either a numeric value or a symbolic address.  For
+example, ".fill 32" puts the value 32 where the instruction would normally be
+stored.  .fill with a symbolic address will store the address of the label.
+In the example below, ".fill start" will store the value 2, because the label
+"start" is at address 2.
+
+The assembler makes two passes over the assembly-language program. In the
+first pass, it will calculate the address for every symbolic label.  Assume
+that the first instruction is at address 0.  In the second pass, it will
+generate a machine-language instruction (in decimal) for each line of assembly
+language.  For example, here is an assembly-language program (that counts down
+from 5, stopping when it hits 0).
+
+	lw	0	1	five	load reg1 with 5 (uses symbolic address)
+	lw	1	2	3	load reg2 with -1 (uses numeric address)
+start	add	1	2	1	decrement reg1
+	beq	0	1	2	goto end of program when reg1==0
+	beq	0	0	start	go back to the beginning of the loop
+	noop
+done	halt				end of program
+five	.fill	5
+neg1	.fill	-1
+stAddr	.fill	start			will contain the address of start (2)
+
+Here is the symbol table generated at the end of pass one (note that the symbol
+table will not be written to the file - unless you do it for debugging purposes)
+
+start   2
+done    6
+five    7
+neg1    8
+stAddr  9
+
+And here is the corresponding machine language:
+
+(address 0): 8454151 (hex 0x810007)
+(address 1): 9043971 (hex 0x8a0003)
+(address 2): 655361 (hex 0xa0001)
+(address 3): 16842754 (hex 0x1010002)
+(address 4): 16842749 (hex 0x100fffd)
+(address 5): 29360128 (hex 0x1c00000)
+(address 6): 25165824 (hex 0x1800000)
+(address 7): 5 (hex 0x5)
+(address 8): -1 (hex 0xffffffff)
+(address 9): 2 (hex 0x2)
+
+Be sure you understand how the above assembly-language program got translated
+to machine language.
+
+Since your programs will always start at address 0, your program should only
+output the contents, not the addresses.
+
+8454151
+9043971
+655361
+16842754
+16842749
+29360128
+25165824
+5
+-1
+2
+
+5. Behavioral Simulator
+
+The first assignment is to write a program that can simulate any
+legal LC3100 machine-code program.  The input for this part will be the
+machine-code file that you created with your assembler.  With a program name
+of "simulate" and a machine-code file of "program.mc", your program should be
+run as follows:
+
+    simulate program.mc > output
+
+This directs all printfs to the file "output".
+
+The simulator should begin by initializing all registers and the program
+counter to 0.  The simulator will then simulate the program until the program
+executes a halt.
+
+The simulator should call printState (included below) before executing each
+instruction and once just before exiting the program.  This function prints the
+current state of the machine (program counter, registers, memory).  printState
+will print the memory contents for memory locations defined in the machine-code
+file (addresses 0-9 in the example used in assignment 1).
+
+5.1 Test Cases
+
+You will write a suite of test cases to validate any LC3100 simulator.
+
+The test cases for the simulator part of this project will be short
+assembly-language programs that, after being assembled into machine code, serve
+as input to a simulator.  You will submit your suite of test cases together
+with your simulator, and we will grade your test suite according to how
+thoroughly it exercises an LC3100 simulator.  Each test case may execute at
+most 200 instructions on a correct simulator, and your test suite may contain
+up to 20 test cases.  These limits are much larger than needed for full credit
+(the solution test suite is composed of a couple test cases, each executing
+less than 40 instructions). 
+graded.
+
+5.2. Simulator Hints
+
+While this assigmnent is fairly easy, the next ones will not be.  I suggest that
+you do incremental testing of your program.  This means start with confirming
+that you are able to load the machine code and the initial print state is correct.
+Then chose one instruction at a time to test. The instruction "halt" should be the
+first test. Create a machine code program consisting of just the halt instruction,
+assemble then simulate it.  My advice is to do this well before you have started 
+programming the simulator to handle any other instructions.  Once this works, you
+can add another instruction to your simulator ("add" would be a good choice) and
+write an assembly language program that consists of two instructions - add followed
+by halt.  Assemble, simulate and debug as necessary.  This incremental development
+style is better in general and for the complex projects will will soon be doing,
+it is necessary (at least it will minimize frustration).
+
+Also, be careful how you handle offsetField for lw, sw, and beq. Remember that it's
+a 2's complement 16-bit number, so you need to convert a negative offsetField
+to a negative 32-bit integer on the Sun workstations (by sign extending it).
+To do this, use the following function.
+
+    int
+    convertNum(int num)
+    {
+	/* convert a 16-bit number into a 32-bit Sun integer */
+	if (num & (1<<15) ) {
+	    num -= (1<<16);
+	}
+	return(num);
+    }
+
+An example run of the simulator (not for the specified task of multiplication)
+is included at the end of this posting.
+
+6. Grading, Formatting and Test Cases
+
+We will grade primarily on functionality, correctly simulating all
+instructions, input and output format, and comprehensiveness of the
+test suites.
+
+To help you validate your project, your submission will be graded
+using scripts The results from the grader will not be very illuminating;
+they won't tell you where your problem is or give you the test programs. 
+The best way to debug your program is to generate your own test cases,
+figure out the correct answers, and compare your program's output to the
+correct answers.  This is also one of the best ways to learn the concepts
+in the project.
+
+The student suite of test cases for the simulator project will be graded
+according to how thoroughly they test an LC3100 simulator.  We will judge
+thoroughness of the test suite by how well it exposes bugs in our simulator.
+
+For your simulator test suite, the grader will correctly assemble each
+test case, then use it as input to our "buggy" simulator which tests for
+common implementation errors.  A test case exposes a buggy simulator by
+causing it to generate a different answer from a correct simulator. 
+The test suite is graded based on how many of the buggy simulators were
+exposed by at least one test case.
+
+Because all programs will be graded in a semi-automated manner using scripts,
+you must be careful to follow the exact formatting rules in the project description:
+
+    1) Don't modify printState or stateStruct at all.  Download
+	this handout into your program electronically (don't re-type it) to
+	avoid typos.
+
+    2) Call printState exactly once before each instruction
+	executes and once just before the simulator exits.  Do not call
+	printState at any other time.
+
+    3) Don't print the sequence "@@@" anywhere except in printState.
+
+    4) state.numMemory must be equal to the number of lines in the
+	machine-code file.
+    
+    5) Initialize all registers to 0.
+
+
+7. Turning in the Project
+
+Use the canvas link for this project to submit your program that simulates the
+LC3100 machine code as well as a set of assembly language files that constitute
+your test cases.
+
+8. Code Fragment for Simulator
+
+Here is some C code that may help you write the simulator.  Again, you should
+take this merely as a hint.  You may have to re-code this to make it do exactly
+what you want, but this should help you get started.  Remember not to
+change stateStruct or printState.
+
+/* instruction-level simulator for LC3100 */
+
+#include <stdio.h>
+#include <string.h>
+
+#define NUMMEMORY 65536 /* maximum number of words in memory */
+#define NUMREGS 8 /* number of machine registers */
+#define MAXLINELENGTH 1000
+
+typedef struct stateStruct {
+    int pc;
+    int mem[NUMMEMORY];
+    int reg[NUMREGS];
+    int numMemory;
+} stateType;
+
+void printState(stateType *);
+
+int
+main(int argc, char *argv[])
+{
+    char line[MAXLINELENGTH];
+    stateType state;
+    FILE *filePtr;
+
+    if (argc != 2) {
+	printf("error: usage: %s <machine-code file>\n", argv[0]);
+	exit(1);
+    }
+
+    filePtr = fopen(argv[1], "r");
+    if (filePtr == NULL) {
+	printf("error: can't open file %s", argv[1]);
+	perror("fopen");
+	exit(1);
+    }
+
+    /* read in the entire machine-code file into memory */
+    for (state.numMemory = 0; fgets(line, MAXLINELENGTH, filePtr) != NULL;
+	state.numMemory++) {
+	if (sscanf(line, "%d", state.mem+state.numMemory) != 1) {
+	    printf("error in reading address %d\n", state.numMemory);
+	    exit(1);
+	}
+	printf("memory[%d]=%d\n", state.numMemory, state.mem[state.numMemory]);
+    }
+
+    return(0);
+}
+
+void
+printState(stateType *statePtr)
+{
+    int i;
+    printf("\n@@@\nstate:\n");
+    printf("\tpc %d\n", statePtr->pc);
+    printf("\tmemory:\n");
+	for (i=0; i<statePtr->numMemory; i++) {
+	    printf("\t\tmem[ %d ] %d\n", i, statePtr->mem[i]);
+	}
+    printf("\tregisters:\n");
+	for (i=0; i<NUMREGS; i++) {
+	    printf("\t\treg[ %d ] %d\n", i, statePtr->reg[i]);
+	}
+    printf("end state\n");
+}
+
+11. Programming Tips
+
+Here are a few programming tips for writing C, C++ or python programs to
+manipulate bits:
+
+1) To indicate a hexadecimal constant in C, precede the number by 0x. For
+example, 27 decimal is 0x1b in hexadecimal.
+
+2) The value of the expression (a >> b) is the number "a" shifted right by "b"
+bits. Neither a nor b are changed. E.g. (25 >> 2) is 6. Note that 25 is 11001 in
+binary, and 6 is 110 in binary.
+
+3) The value of the expression (a << b) is the number "a" shifted left by "b"
+bits. Neither a nor b are changed. E.g. (25 << 2) is 100. Note that 25 is 11001
+in binary, and 100 is 1100100 in binary.
+
+4) To find the value of the expression (a & b), perform a logical AND on each
+bit of a and b (i.e. bit 31 of a ANDED with bit 31 of b, bit 30 of a ANDED with
+bit 30 of b, etc.). E.g.  (25 & 11) is 9, since:
+
+    11001 (binary) 
+  & 01011 (binary)
+---------------------
+ =  01001 (binary), which is 9 decimal.
+
+5) To find the value of the expression (a | b), perform a logical OR on each bit
+of a and b (i.e. bit 31 of a ORED with bit 31 of b, bit 30 of a ORED with bit 30
+of b, etc.). E.g.  (25 | 11) is 27, since:
+
+    11001 (binary) 
+  & 01011 (binary)
+---------------------
+ =  11011 (binary), which is 27 decimal.
+
+6) ~a is the bit-wise complement of a (a is not changed).
+
+Use these operations to create and manipulate machine-code. E.g. to look at bit
+3 of the variable a, you might do: (a>>3) & 0x1. To look at bits (bits 15-12) of
+a 16-bit word, you could do: (a>>12) & 0xF. To put a 6 into bits 5-3 and a 3
+into bits 2-1, you could do: (6<<3) | (3<<1). If you're not sure what an
+operation is doing, print some intermediate results to help you debug.
+-------------------------------------------------------------------------------
+
+9. Example Run of Simulator
+
+memory[0]=8454151
+memory[1]=9043971
+memory[2]=655361
+memory[3]=16842754
+memory[4]=16842749
+memory[5]=29360128
+memory[6]=25165824
+memory[7]=5
+memory[8]=-1
+memory[9]=2
+
+
+@@@
+state:
+	pc 0
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 0
+		reg[ 2 ] 0
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 1
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 5
+		reg[ 2 ] 0
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 2
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 5
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 3
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 4
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 4
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 4
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 2
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 4
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 3
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 3
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 4
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 3
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 2
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 3
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 3
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 2
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 4
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 2
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 2
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 2
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 3
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 1
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 4
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 1
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 2
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 1
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 3
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 0
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+
+@@@
+state:
+	pc 6
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 0
+		reg[ 2 ] -1
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state
+machine halted
+total of 17 instructions executed
+final state of machine:
+
+@@@
+state:
+	pc 7
+	memory:
+		mem[ 0 ] 8454151
+		mem[ 1 ] 9043971
+		mem[ 2 ] 655361
+		mem[ 3 ] 16842754
+		mem[ 4 ] 16842749
+		mem[ 5 ] 29360128
+		mem[ 6 ] 25165824
+		mem[ 7 ] 5
+		mem[ 8 ] -1
+		mem[ 9 ] 2
+	registers:
+		reg[ 0 ] 0
+		reg[ 1 ] 0
+		reg[ 2 ] :
+		reg[ 3 ] 0
+		reg[ 4 ] 0
+		reg[ 5 ] 0
+		reg[ 6 ] 0
+		reg[ 7 ] 0
+end state