#!/usr/bin/perl # # Sarcasm Z80 Assembler # # Current web site: http://www.ecstaticlyrics.com/electronics/Z80/sarcasm/ # # If all else fails, search for "Sarcasm Z80 Assembler" with the quotes. # # Information is always nice to have... if (@ARGV == 0) { print STDERR "Usage: ./sarcasm.pl source.asm ...possibly more source files...\r\n"; print STDERR "Output files are now specified with 'output' in the source files.\r\n"; exit 1; }; # Create a hash of reserved words for easy lookups. foreach $peanut ('output', 'section', 'namespace', 'goto', 'data', 'bytes', 'words') { $peanuts{$peanut} = ''; }; # Read that supercool opcodes file. Without it we know nothing! open OPCODE, ") { $line =~ s/[\r\n]//g; # Strip out line terminators. $line =~ s/\#.*//g; # Remove comments. $line =~ s/[ \t]+/ /g; # Change whitespaces to single spaces. $line =~ s/^ +//g; # Remove leading spaces. $line =~ s/ +$//g; # Remove trailing spaces. next if $line eq ''; # Skip empty lines. # Record the opcode encoding in a hash... ($hex, $ops) = $line =~ /([0-9A-F]+) (.+)/; die "Opcodes File Error: Duplicate Entry for '$ops'\r\n" if exists $opcodes{$ops}; $opcodes{$ops} = pack 'H' . length($hex), $hex; # Create a list of reserved terms, e.g. 'bc', 'de', 'hl', etc. @eggs = split / /, $ops; foreach $egg (@eggs) { $eggs{$egg}=''; }; }; close OPCODE; # Read the source files... foreach $sourcefile (@ARGV) { if (open SOURCE, "<$sourcefile") { @source = ; $source = join '', @source; close SOURCE; } else { print STDERR "Error opening '$sourcefile' for input: $!\r\n"; $fatal = 'unfortunately'; }; # Convert all line endings to null characters for portability... $source =~ s/\r\n/\x00/g; #$source =~ s/\n\r/\x00/g; $source =~ s/\n/\x00/g; $source =~ s/\r/\x00/g; $source .= "\x00" unless $source =~ /\x00$/; push @sources, $sourcefile; $sources{$sourcefile} = $source; }; exit(1) if $fatal; # Parse the source files... # (Yes, there are some things that cannot be done with regular expressions.) # # This part of Sarcasm removes comments from the source, and formats it nicely # so that the rest of the code has no problem parsing it. It ensures that # strings are properly quoted, that square brackets are used correctly, # it removes unnecessary spaces, and it splits multiple instructions on a line # into multiple lines, making a note of where each piece of code came from # in case any error messages are necessary later. foreach $sourcefile (@sources) { $source = $sources{$sourcefile}; $line = 1; $part = 1; $string = ''; $quote = ''; $start = 0; for ($c = 0; $c < length($source); $c++) { $character = substr($source, $c, 1); $theline = substr($source, $start, index($source, "\x00", $start)); $comment = '' if $character eq "\x00"; if ($character eq "\x00" and $quote ne '') { print "$sourcefile: $line, $part -- $string\r\n -- Unterminated String\r\n"; $fatal = 'unfortunately'; $quote = ''; }; if ($character eq "\x00" and $bracket ne '') { print "$sourcefile: $line, $part -- $string\r\n -- Incomplete Square Bracket Set\r\n"; $fatal = 'unfortunately'; $bracket = ''; }; if ($comment) { # Sarcasm is the awesomest assembler ever! } elsif ($character eq "\x00") { $string =~ s/\s*$//; push @code, [$sourcefile . ': ' . (($part eq 1) ? ($line) : ($line . '.' . $part)), $string] if $string; $line++; $part = 1; $string = ''; } elsif ($quote eq '"') { $string .= $character; $quote = '' if $character eq $quote; } elsif ($quote eq "'") { $string .= $character; if ( substr($source, $c+1, 1) =~ /^[\s\,\x00]?$/ ) { $quote = '' if $character eq $quote; }; } elsif ($character eq '#') { $comment = '#'; } elsif ($character eq ';') { if ($bracket) { print "$sourcefile: $line, $part -- $string$character...\r\n -- Square Bracket Error\r\n"; $fatal = 'unfortunately'; $bracket = ''; }; $string =~ s/\s*$//; push @code, [$sourcefile . ': ' . $line . '.' . $part, $string] if $string; $part++; $string = ''; } elsif ($bracket) { $string .= $character if $character !~ /[\s\,]/; $bracket = '' if $character eq $bracket; } elsif ($character =~ /[\'\"]/) { $string .= $character; $quote = $character; } elsif ($character eq '[') { $string .= $character; $bracket = ']'; } elsif ($character eq ']') { $string .= $character; print "$sourcefile: $line, $part -- $string$character...\r\n -- Unopened Square Brackets\r\n"; $fatal = 'unfortunately'; } elsif ($character =~ /[\s\,]/) { $string .= ' ' if $string !~ /\s$/ and $string ne ''; } else { $string .= $character; }; }; @{$codes{$sourcefile}} = @code; }; exit(1) if $fatal; # Here we look for things like "mov a -6 + label" and turn them into # "mov a -6+label" It's tricky because we don't combine just anything. # Stuff in quotes has to be left alone, register names, instruction names # and reserved words cannot be formed into an equation, and negative symbols # have to be distinguished from subtraction. Finally, consecuative # '+' or '-' symbols are reduced to a single symbol. foreach $sourcefile (@sources) { for ($i = 0; $i < @{$codes{$sourcefile}}; $i++) { $temp = ${$codes{$sourcefile}}[$i][1]; @temp = split / /, $temp; $new = ''; foreach $word (@temp) { if ($scratch) { $scratch .= ' ' . $word; if (length($scratch) > 1 and substr($scratch, 0, 1) eq substr($scratch, -1)) { $new .= ' ' . $scratch; $scratch = ''; }; next; } elsif ( $word =~ /^[\'\"]/ ) { $scratch = $word; if (length($scratch) > 1 and substr($scratch, 0, 1) eq substr($scratch, -1)) { $new .= ' ' . $scratch; $scratch = ''; }; next; }; while ($word =~ /[\+\-][\+\-]/) { $word =~ s/\+\-/\-/g while $word =~ /\+\-/; $word =~ s/\-\+/\-/g while $word =~ /\-\+/; $word =~ s/\-\-/\+/g while $word =~ /\-\-/; $word =~ s/\++/\+/g; }; if ( $word =~ /^\[.*\]$/ or exists($peanuts{(split / /, $new)[-1]}) or exists($eggs{(split / /, $new)[-1]}) ) { $new .= ' ' . $word; } elsif ( $new =~ /[\+\-]$/ ) { $new .= $word; } elsif ( $word =~ /^[\+\-]/ ) { if ($word =~ /^.[0-9]/) { $new .= ' ' . $word; } else { $new .= $word; }; } else { $new .= ' ' . $word; }; }; #while ($new =~ /[\+\-][\+\-]/) { # $new =~ s/\+\-/\-/g while $new =~ /\+\-/; # $new =~ s/\-\+/\-/g while $new =~ /\-\+/; # $new =~ s/\-\-/\+/g while $new =~ /\-\-/; # $new =~ s/\++/\+/g; #}; $new =~ s/^ +//g; $new =~ s/ +$//g; ${$codes{$sourcefile}}[$i][1] = $new; }; }; # Function: addcode($data) # # This function adds assembled code or data to the memory image in the current # section, and warns if the addition exceeds the section limit. sub addcode { substr $memory, $sec{$section}[2], length $_[0], $_[0]; $sec{$section}[2] += length $_[0]; if ( $sec{$section}[2] > $sec{$section}[1] + 1 ) { unless ($exceeded) { print "$$piece[0] -- $$piece[1]\r\n -- $section section's limit exceeded.\r\n"; $exceeded = 'yes'; $fatal = 'unfortunately'; }; }; }; # Function: solve($datatype, $equation) # # This function solves equations such as "label + 12" and such, returning # the specified data type. 'xxxx' is a word, 'xx' is a byte, 'zz' is a # signed byte, 'rr' is a signed byte relative to the current code pointer, # and 'word' is a word used by Sarcasm which isn't assembled into code. # Type 'word' returns a number, everything else returns binary data. sub solve { $equation = $_[1]; @cludge = (); if (($pass == 2 and $_[1] ne '') or $_[0] eq 'word') { while ($equation ne '') { ($whatever) = $equation =~ /([\+\-]?[^\+\-]+)/; substr($equation, 0, length($whatever)) = ''; push @cludge, $whatever; }; $value = 0; foreach $term (@cludge) { ($math, $term) = $term =~ /([\+\-])?(.*)/; if ($term =~ /\./) { if ($term =~ /^\./) { $term = substr($term, 1); if (exists $labels{"$namespace.$prefix.$term"}) { $junk = $labels{"$namespace.$prefix.$term"}; } else { print "$$piece[0] -- $$piece[1]\r\n -- Local label '.$term' does not exist under prefix '$prefix'.\r\n"; $fatal = 'unfortunately'; }; } else { if (exists $labels{"$namespace.$term"}) { $junk = $labels{"$namespace.$term"}; } elsif (exists $labels{"$term."}) { $junk = $labels{"$term."}; } elsif (exists $labels{"$term"}) { $junk = $labels{"$term"}; } else { ($space, $label) = $term =~ /(.*?)\.(.*)/; # Yes, two errors for the price of one! print "$$piece[0] -- $$piece[1]\r\n"; $fatal = 'unfortunately'; print " -- Label '$term' does not exist in namespace '$namespace'.\r\n"; print " -- Label '$label' does not exist in namespace '$space'.\r\n"; }; }; } elsif (exists $labels{"$namespace.$term."}) { $junk = $labels{"$namespace.$term."} } elsif ($term eq '$') { $junk = $sec{$section}[2]; } elsif ($term =~ /^\$[0-9A-Fa-f]+$/) { $junk = hex substr($term, 1); } elsif ($term =~ /^0x[0-9A-Fa-f]+$/) { $junk = hex substr($term, 2); } elsif ($term =~ /^[0-9]+/) { $junk = $term; } else { # This should probably be a double error as well, of sorts... print "$$piece[0] -- $$piece[1]\r\n"; $fatal = 'unfortunately'; print " -- I cannot figure out how to turn '$term' into a number,\r\n"; print " -- nor is there a label '$term' in namespace '$namespace'.\r\n"; }; if ($math eq '-') { $value -= $junk; } else { $value += $junk; }; }; } else { # We don't know the value of labels in pass one, so always return zero. $value = 0; $value = $sec{$section}[2] if $_[0] eq 'rr'; }; if ($_[0] eq 'xxxx') { if ($value > 65535 or $value < -32768) { print "$$piece[0] -- $$piece[1]\r\n -- $value does not fit into a word\r\n"; $fatal = 'unfortunately'; }; $value += 65536 if $value < 0; return pack 'v', $value; } elsif ($_[0] eq 'xx') { if ($value > 255 or $value < -128) { print "$$piece[0] -- $$piece[1]\r\n -- $value does not fit into a byte\r\n"; $fatal = 'unfortunately'; }; $value += 256 if $value < 0; return pack 'C', $value; } elsif ($_[0] eq 'zz') { if ($value > 127 or $value < -128) { print "$$piece[0] -- $$piece[1]\r\n -- $value is not within -128 to +127\r\n"; $fatal = 'unfortunately'; }; return pack 'c', $value; } elsif ($_[0] eq 'rr') { $value = ($value - $sec{$section}[2] - 2) & 65535; $value -= 65536 if $value > 32767; if ($value > 127 or $value < -128) { print "$$piece[0] -- $$piece[1]\r\n -- $value bytes is too far away\r\n"; $fatal = 'unfortunately'; }; return pack 'c', $value; } elsif ($_[0] eq 'word') { if ($value > 65535 or $value < 0) { print "$$piece[0] -- $$piece[1]\r\n -- $value does not fit into a word\r\n"; $fatal = 'unfortunately'; }; return $value; }; }; # The assembler!!! Yes, this is where the fun happens! # # Assembly is done in two passes. The first pass is just to determine the # value of labels, the second pass does the final assembly. foreach $pass (1,2) { %sec = (); %namespaces = (); $memory = "\x00" x 65536; $section = 'default'; $sec{$section} = [0x0000, 0xFFFF, 0x0000]; foreach $sourcefile (@sources) { @code = @{$codes{$sourcefile}}; $namespace = "[$sourcefile]"; $namespaces{$namespace} = $sourcefile; $prefix = '[no local prefix]' if $pass == 2; foreach $piece (@code) { if ($$piece[1] =~ /^output .* \S+ \S+$/) { @ops = $$piece[1] =~ /^output (.*) (\S+) (\S+)$/; $base = solve('word', $ops[1]); $limit = solve('word', $ops[2]); $size = $limit - $base + 1; if ($ops[0] =~ /^[\'\"].*[\'\"]$/ and substr($ops[0], 0, 1) eq substr($ops[0], -1)) { $ops[0] = substr($ops[0], 1, length($ops[0]) - 2); }; if ($pass == 2) { if (open OUTPUT, ">$ops[0]") { print OUTPUT substr($memory, $base, $size); close OUTPUT; $successfuloutput = 'true' unless $successfuloutput eq 'false'; } else { print STDERR "$$piece[0] -- $$piece[1]\r\n -- Error opening '$ops[0]' for output: $!\r\n"; $fatal = 'unfortunately'; $successfuloutput = 'false'; }; }; } elsif ($$piece[1] =~ /^section \S+ \S+ \S+$/) { @ops = $$piece[1] =~ /^section (\S+) (\S+) (\S+)$/; if (exists $sec{$ops[0]}) { print "$$piece[0] -- $$piece[1]\r\n -- Section $ops[0] already defined.\r\n"; $fatal = 'unfortunately'; } else { $offset = solve('word', $ops[1]); $size = solve('word', $ops[2]); $limit = $offset + $size - 1; if ($limit < 65536) { $section = $ops[0]; $sec{$section}[0] = $offset; $sec{$section}[1] = $limit; $sec{$section}[2] = $offset; } else { print "$$piece[0] -- $$piece[1]\r\n -- Offset + Size must be < 64k.\r\n"; $fatal = 'unfortunately'; }; }; } elsif ($$piece[1] =~ /^section \S+$/) { @ops = $$piece[1] =~ /^section (\S+)$/; if (exists $sec{$ops[0]}) { $section = $ops[0]; } else { print "$$piece[0] -- $$piece[1]\r\n -- Section $ops[0] not defined.\r\n"; $fatal = 'unfortunately'; }; } elsif ($$piece[1] =~ /^namespace \S+$/) { @ops = $$piece[1] =~ /^namespace (\S+)$/; $namespace = $ops[0]; $namespaces{$ops[0]} = $sourcefile; $prefix = '[no local prefix]' if $pass == 2; } elsif ($$piece[1] =~ /^goto \S+$/) { @ops = $$piece[1] =~ /^goto (\S+)$/; $address = solve('word', $ops[0]); if ($sec{$section}[0] <= $address and $sec{$section}[1] >= $address) { $sec{$section}[2] = $address; } else { print "$$piece[0] -- $$piece[1]\r\n -- Address $address is not in section $section.\r\n"; $fatal = 'unfortunately'; }; } elsif ($$piece[1] =~ /^words \S+/) { @ops = split / /, $$piece[1]; shift @ops; foreach $word (@ops) { $data = solve('xxxx', $word); addcode $data; }; } elsif ($$piece[1] =~ /^bytes \S+/) { @ops = split / /, $$piece[1]; shift @ops; foreach $word (@ops) { $data = solve('xx', $word); addcode $data; }; } elsif ($$piece[1] =~ /^data \S+/) { @ops = split / /, $$piece[1]; shift @ops; foreach $word (@ops) { if ($quote ne '') { $scratch .= ' ' . $word; if ($quote eq substr($word, -1)) { $word = $scratch; $quote = ''; $scratch = ''; }; } elsif ($word =~ /^[\'\"]/) { if ($word !~ /^\'.*\'$/ and $word !~ /^\".*\"$/) { $scratch = $word; ($quote) = $word =~ /^([\'\"])/; }; }; if ($quote eq '') { if ($word =~ /^\$[0-9A-Fa-f]{2}$/) { $data = pack "C", hex substr($word, 1, 2); addcode $data; } elsif ($word =~ /^\$[0-9A-Fa-f]{4}$/) { $data = pack "v", hex substr($word, 1, 4); addcode $data; } elsif ($word =~ /^\!([0-9A-Fa-f]{2})+$/) { $len = (length $word) - 1; $data = pack "H$len", substr($word, 1); addcode $data; } elsif ($word =~ /^\".*\"$/ or $word =~ /^\'.*\'$/) { $len = (length $word) - 2; $data = substr($word, 1, $len); addcode $data; } else { print "$$piece[0] -- $$piece[1]\r\n -- Data statement requires numbers in the form of \$xx or \$xxxx.\r\n"; $fatal = 'unfortunately'; }; }; }; } elsif (exists $opcodes{$$piece[1]}) { addcode $opcodes{$$piece[1]}; } else { @temp = split / /, $$piece[1]; $cream = shift @temp; if (@temp) { $sugar = ''; $toast = ''; $syrup = ''; foreach $temp (@temp) { if (exists $eggs{$temp}) { $cream .= ' ' . $temp; } elsif ($temp =~ /^\[st\b.*\]$/) { $cream .= ' [st+zz]'; $syrup .= solve('zz', $temp =~ /^\[st\b(.*)\]$/); } elsif ($temp =~ /^\[uv\b.*\]$/) { $cream .= ' [uv+zz]'; $syrup .= solve('zz', $temp =~ /^\[uv\b(.*)\]$/); } elsif ($temp =~ /^\[.*\]$/) { $cream .= ' [xxxx]'; $sugar .= solve('xxxx', $temp =~ /^\[(.*)\]$/); } else { # An error may occur here, but the error message is generated elsewhere. $cream .= ' ***'; $toast = $temp; }; }; if ($toast eq '') { if (exists $opcodes{$cream}) { $sugar = $opcodes{$cream} . $sugar; substr $sugar, 2, 0, $syrup; addcode $sugar; } else { print "$$piece[0] -- $$piece[1]\r\n -- \"$cream\" not found\r\n"; $fatal = 'unfortunately'; }; } else { ($xxxx = $cream) =~ s/\*\*\*/xxxx/; ($xx = $cream) =~ s/\*\*\*/xx/; ($rr = $cream) =~ s/\*\*\*/rr/; if (exists $opcodes{$xxxx}) { $sugar = $opcodes{$xxxx} . solve('xxxx', $toast); addcode $sugar; } elsif (exists $opcodes{$xx}) { $sugar = $opcodes{$xx} . solve('xx', $toast); substr $sugar, 2, 0, $syrup; addcode $sugar; } elsif (exists $opcodes{$rr}) { $sugar = $opcodes{$rr} . solve('rr', $toast); addcode $sugar; } else { print "$$piece[0] -- $$piece[1]\r\n -- \"$cream\" not found\r\n"; $fatal = 'unfortunately'; }; }; } else { if ($cream !~ /\./) { $prefix = $cream; $cream = ''; } elsif ($cream =~ /^\.[^\.]*$/) { $cream = substr($cream, 1); } elsif ($cream =~ /^[^\.]+\.[^\.]+$/) { ($prefix, $cream) = $cream =~ /([^\.]+)\.([^\.]+)/; } else { print "$$piece[0] -- $$piece[1]\r\n -- I hate this label name! Choose a new one!\r\n"; $cream = "\x00" . $hate++; $fatal = 'unfortunately'; }; if ($pass == 1) { if (exists $labels{"$namespace.$prefix.$cream"}) { print "$$piece[0] -- $$piece[1]\r\n -- Label '$namespace.$prefix.$cream' is also at " . $definitions{"$namespace.$prefix.$cream"} . "\r\n"; $fatal = 'unfortunately'; } else { $labels{"$namespace.$prefix.$cream"} = $sec{$section}[2]; $definitions{"$namespace.$prefix.$cream"} = $$piece[0]; }; }; }; }; }; }; exit(1) if $fatal; # If pass one fails, skip pass two. }; # Well, with any luck... unless ($successfuloutput) { print "No errors, but lack of any 'output' statement means a lack of any output.\r\n"; };