#!/usr/bin/perl -w use strict; ###################################################################### # Artemis Marfez Reverse Engineer, (C) 2015-2019 Nosey Nick Waterman, # https://noseynick.org/artemis/ # All wrong righted, all rights reserved. Licensed under thev GNU # Affero General Public License v3.0 https://www.gnu.org/licenses/agpl.txt # with Commons Clause https://commonsclause.com/ v1.0 ###################################################################### # Given a load of Artemis hex on stdin (deadbeef headers stripped, # payloads only). This prog will analyse the packets and try to # reverse-engineer their formats. It will also look in more detail # at the 0x80803df9 packets for different object types, and attempt # to discover the field lengths. ###################################################################### # This program is named in honour of Zefram Cochrane, inventor # of the Earth's first Warp Drive. It seemed only fitting to # reverse Zefram's name for our reverse-enngineering tool :-) ###################################################################### use Data::Dumper; # only for Data::Dumper::qquote? use bytes; # OK there's SOME UTF16 kicking around, but honestly... # Assume the following types have the following length bitmasks # (only needed if this prog can't determine them from observations): my %assume = ( # 0x06 => 2, # observed bf20 should PROBABLY be ff30 ); $| = 1; # output autoflush my (%bits, %minid, %maxid, %dat, %pkts, $n); while (<>) { print "#" unless $n++ % 1000; print " $n packets...\n" unless $n % 100000; # Strip channel numbers. Also the DEADBEEF header # if you didn't read the instructions above :-p s/^(?:[CS]\d*\s*)?(?:efbeadde................................)?//; next unless /^([0-9a-f]{8,})/i; # ignore non-hex my ($pkttype, $payload) = unpack('V a*', pack('H*', $1)); next unless defined $pkttype; if ($pkttype == 0x80803df9) { objectBitStream($payload) } else { packet($pkttype, $payload) } } sub packet { my ($pkttype, $payload) = @_; # +++++++ definitely need to be able to handle packets with SUBTYPES but... push @{$pkts{sprintf('0x%08x', $pkttype)}}, $payload; } sub objectBitStream { my $payload = shift; my ($type, $id, $bits) = unpack("C V a*", $payload); return unless $type; # (We think) we have a good objectBitStream packet my $terminator = substr($payload, -4, 4, ''); unless ($terminator eq "\0\0\0\0") { $payload .= $terminator; print "Terminator wasn't \\0\\0\\0\\0 in... ", unpack('H*', $payload), "\n"; } push @{$dat{$type}}, $payload; $bits{$type} |= substr($bits,0,20); $minid{$type} = $id if $id < ($minid{$type} // 9999999999); $maxid{$type} = $id if $id > ($maxid{$type} // 0); } ###################################################################### print " $n packets scanned\n"; print "# Object analysis:\n" if %bits; my ($min, $max); for my $type (sort {$a <=> $b} keys %bits) { my $tmin = $minid{$type}; $min = $tmin if $tmin < ($min // 9999999999); my $tmax = $maxid{$type}; $max = $tmax if $tmax > ($max // 0); my $mask = $bits{$type}; my ($bits, $bytes, $unsure, $bitpat); while ($mask =~ s/^\xff//) { $bits += 8; $bytes++; $bitpat .= '.'; } if ($mask =~ /^\x7f/ ) { $bits += 7; $bytes++; $bitpat .= '[\0-\x7f]'; } elsif ($mask =~ /^\x3f/ ) { $bits += 6; $bytes++; $bitpat .= '[\0-\x3f]'; } elsif ($mask =~ /^\x1f/ ) { $bits += 5; $bytes++; $bitpat .= '[\0-\x1f]'; } elsif ($mask =~ /^\x0f/ ) { $bits += 4; $bytes++; $bitpat .= '[\0-\x0f]'; } elsif ($mask =~ /^\x07/ ) { $bits += 3; $bytes++; $bitpat .= '[\0-\7]'; } elsif ($mask =~ /^\x03/ ) { $bits += 2; $bytes++; $bitpat .= '[\0-\3]'; } elsif ($mask =~ /^\x01/ ) { $bits += 1; $bytes++; $bitpat .= '[\0\1]'; } elsif ($mask =~ /^\x00/ ) { $bytes++; $bitpat .= '\0'; } else { $bytes++; $bytes = $assume{$type} || $bytes; printf "#### ASSUMING %d bits (%d byte%s) due to poor mask %s:\n", $bytes*8, $bytes, $bytes > 1 ? 's' : '', unpack('H*', $bits{$type}); $bits = 8*$bytes; $bitpat = '.' x $bytes; } my @packets = sort { (length($a) <=> length($b)) || ($a cmp $b) } @{ delete $dat{$type}}; printf "# ObjType 0x%02x IDs %d..%d %s-bit field (%d byte%s) (%d packets)\n", $type, $tmin, $tmax, $bits, $bytes, $bytes > 1 ? 's' : '', scalar(@packets); my $typat = sprintf('\x%02x', $type); my $idpat; $tmax += 10; # to catch a few extra IDs that only appear later ++++++++++ MAY NOT BE ENOUGH for (0 .. 3) { # ++++ "Could do better" by collecting ACTUAL values for bytes 0..3? if ($tmin == $tmax) { $idpat .= sprintf '\x%02x', $tmin & 255; } elsif (($tmin | 255) == ($tmax | 255)) { # $idpat .= sprintf '[\x%02x-\x%02x]', $tmin & 255, $tmax & 255; $idpat .= '.'; # ++++++++++ $tmax += 10 might have been NOWHERE NEAR enough? } else { $idpat .= '.'; } $tmin = $tmin >> 8; $tmax = $tmax >> 8; } my %obj; print " ### Matching ^f93d8080 ($typat)($idpat)($bitpat):\n"; for my $packet (@packets) { my ($first, $ty, $id, $bits, $data, @rest) = split /($typat)($idpat)($bitpat)/s, $packet; print " NON-EMPTY ", unpack('H*', $first.$ty.$id.$bits.$data), "\n" if $first; while (defined $bits) { push @{ $obj{$bits} }, $data; ($ty, $id, $bits, $data, @rest) = @rest; } } for my $mask ( sort { (length(mask2bits($a)) <=> length(mask2bits($b))) || (mask2bits($a) cmp mask2bits($b)) } keys %obj ) { my $bits = mask2bits($mask); printf " Mask %s: (bits %s):\n", unpack('H*', $mask), $bits; analyse(@{ $obj{$mask} }); } } print "# ID range $min .. $max\n" if defined($min) && defined($max); if (%pkts) { for my $type (sort keys %pkts) { print "# Packet type $type:\n"; analyse(@{ $pkts{$type} }); }} sub analyse { my @lens; for (@_) { $lens[length $_]++; } print" lengths:"; for (0 .. scalar(@lens)-1) { print " ${_}B(x", $lens[$_], ")" if $lens[$_]; } print "\n"; # Look for possible strings, floats, u8, u16, u32: my @poss; for my $one (@_) { my $off = -1; my $adjoff = -1; while (++$off < length($one)) { $adjoff++; $poss[$adjoff] //= {}; my $is = $poss[$adjoff]; canbe($is, 'u8', unpack('C', substr($one, $off))); canbe($is, 'u16', unpack('v', substr($one, $off))); my $u32 = unpack('V', substr($one, $off)) // next; my $float = unpack('f<', substr($one, $off)); if ($u32 == 0) { # zero float OR zero u32 canbe($is, 'float', $float); canbe($is, 'u32', $u32); next; # don't try to check for 0-len str } elsif ($u32 == 0x80000000) { # -0 float? canbe($is, 'float', $float); canbe($is, 'u32', $u32); next; # not gonna be a string } elsif (($u32 & 0x70000000)==0x30000000 # small float 0.X or ($u32 & 0x78000000)==0x40000000 ) # big float XXXX.XXX { canbe($is, 'float', $float); # doubly likely # COULD be but less likely: for (-3 .. 3) { next unless $adjoff+$_ >= 0; $poss[$adjoff+$_]->{not_float}+=.5 if $_; $poss[$adjoff+$_]->{not_u32}++; $poss[$adjoff+$_]->{not_u16}++ if $_ > -2; $poss[$adjoff+$_]->{not_u8}++ if $_ > -1;; } next; # not gonna be a string } $is->{not_float}++; # or stupidly tiny one? canbe($is, 'u32', $u32); # How about the possibility of strings? Check u32 length: next unless ($off + 4 + $u32 + $u32) <= length($one); my @str = unpack('V/v', substr($one, $off)); # does this look like a vlid string? ... next unless scalar(@str) == $u32; # ,,, which we can parse? next if pop(@str); # ... with terminating zero? next if grep {$_ >= 127} @str; # all ASCII? next unless grep {$_ > 31} @str; # SOME printable? not 0s/1s? # OK, we SEEM to have a viable string. Store it, and adjust # offset for future floats/strings. The adjusted offset # attempts to remove PREVIOUS strings, in the hope that they # will all start in the same place, not influenced by # length of strings earlier in the packet. $is->{'str'}++; $is->{Data::Dumper::qquote(pack('U*', @str))}++; # COULD be but less likely: for (-3 .. 3) { next unless $adjoff+$_ >= 0; $poss[$adjoff+$_]->{not_float}++ if $_; $poss[$adjoff+$_]->{not_u32}++; $poss[$adjoff+$_]->{not_u16}++ if $_ > -2; $poss[$adjoff+$_]->{not_u8}++ if $_ > -1; } $off += $u32 + $u32; # but NOT $adjoff } } for (0 .. scalar(@poss)-1) { my $is = $poss[$_]; print " payload[$_]:"; for my $type (qw(u8 u16 u32 float str)) { my $count = (delete $is->{$type} // 0); my $not = (delete $is->{"not_$type"} // 0); $count += -$not; my $min = delete $is->{$type.'_min'}; my $max = delete $is->{$type.'_max'}; next unless $count > 0; print " $type("; print "x$count" if $count > 1; print " " if ($count > 1) && defined($min) && defined($max); print "$min..$max" if defined($min) && defined($max); print ")"; } for my $str (sort keys %$is) { print " ${str} (x", $is->{$str}, ")"; } print "\n"; } } sub canbe { my ($is, $type, $val, $add) = @_; return unless defined $val; $is->{$type} += $add // 1; $is->{$type.'_min'} //= $val; $is->{$type.'_min'} = $val if $val < $is->{$type.'_min'}; $is->{$type.'_max'} //= $val; $is->{$type.'_max'} = $val if $val > $is->{$type.'_max'}; } sub mask2bits { my (@ret, $byte); for my $b (unpack('C*', shift)) { $byte++; push @ret, "$byte.1" if $b & 1; push @ret, "$byte.2" if $b & 2; push @ret, "$byte.3" if $b & 4; push @ret, "$byte.4" if $b & 8; push @ret, "$byte.5" if $b & 16; push @ret, "$byte.6" if $b & 32; push @ret, "$byte.7" if $b & 64; push @ret, "$byte.8" if $b & 128; } return "@ret"; }