Convert output symbol table back to a table

It looks like at some point the output symbol
was converted from an actual table of SYMREC
structs to an in-memory copy of a COFF/a.out
symbol table. This entailed relying on the
assumption that all symbols had an entry in the
string table when using OSTLookup()'s return
value as anything other than a boolean value,
as is done in the relocation procesing logic.

In preparation for adding support for debug
symbols, which often have no string table entry,
revert to using an intermediate output symbol
table representation and serializing it one symbol
at a time when writing the output file. This
simplifies various code paths, but potentially
slows down writing COFF symbol tables to disk.
Fortunately, this table is not written with
default options, and is rather small unless
using debug symbols, so this shouldn't
significantly affect the runtime of most
existing use cases.
This commit is contained in:
James Jones 2022-07-26 03:36:50 -07:00 committed by Shamus Hammons
parent a63bf68077
commit 0d71dcf002
2 changed files with 125 additions and 164 deletions

276
rln.c
View File

@ -55,13 +55,12 @@ char * arPtr[512];
uint32_t arIndex = 0;
struct HREC * htable[NBUCKETS]; // Hash table
struct HREC * unresolved = NULL; // Pointer to unresolved hash list
char * ost; // Output symbol table
char * ost_ptr; // Output symbol table; current pointer
char * ost_end; // Output symbol table; end pointer
char * oststr; // Output string table
char * oststr_ptr; // Output string table; current pointer
char * oststr_end; // Output string table; end pointer
int ost_index = 0; // Index of next ost addition
struct SYMREC * ost; // Output symbol table
char * oststr = NULL; // Output string table
char * oststr_ptr = NULL; // Output string table; current pointer
char * oststr_end = NULL; // Output string table; end pointer
int ost_index = 0; // Index of next free ost entry
int ost_size = 0; // Size of ost
uint8_t nullStr[1] = "\x00"; // Empty string
struct HREC * arSymbol = NULL; // Pointer to AR symbol table
@ -139,6 +138,7 @@ int DoSymbols(struct OFILE * ofile)
int type;
long value;
int index;
char *string;
int j;
struct HREC * hptr;
uint32_t tsoSave, dsoSave, bsoSave;
@ -166,6 +166,7 @@ int DoSymbols(struct OFILE * ofile)
index = GetLong(symptr + 0); // Obtain symbol string index
type = GetLong(symptr + 4); // Obtain symbol type
value = GetLong(symptr + 8); // Obtain symbol value
string = index ? symend + index : "";
// Global/External symbols have a pre-processing stage
// N.B.: This destroys the t/d/bsegoffset discovered above. So if a
@ -176,21 +177,21 @@ int DoSymbols(struct OFILE * ofile)
// Obtain the string table index for the relocation symbol, look
// for it in the globals hash table to obtain information on that
// symbol.
hptr = LookupHREC(symend + index);
hptr = LookupHREC(string);
if (hptr == NULL)
{
// Try to find it in the OST
int ostIndex = OSTLookup(symend + index);
int ostIndex = OSTLookup(string);
if (ostIndex == -1)
{
printf("DoSymbols(): Symbol not found in hash table: '%s' (%s)\n", symend + index, ofile->o_name);
printf("DoSymbols(): Symbol not found in hash table: '%s' (%s)\n", string, ofile->o_name);
return 1;
}
if (vflag > 1)
printf("DoSymbols(): Skipping symbol '%s' (%s) found in OST...\n", symend + index, ofile->o_name);
printf("DoSymbols(): Skipping symbol '%s' (%s) found in OST...\n", string, ofile->o_name);
// If the symbol is not in any .a or .o units, it must be one
// of the injected ones (_TEXT_E, _DATA_E, or _BSS_E), so skip
@ -230,7 +231,7 @@ int DoSymbols(struct OFILE * ofile)
break;
default:
if (vflag > 1)
printf("DoSymbols: No adjustment made for symbol: %s (%s) = %X\n", symend + index, ofile->o_name, hptr->h_value);
printf("DoSymbols: No adjustment made for symbol: %s (%s) = %X\n", string, ofile->o_name, hptr->h_value);
}
}
}
@ -311,13 +312,13 @@ int DoSymbols(struct OFILE * ofile)
if (isglobal(type) || lflag)
{
if (vflag > 1)
printf("DoSymbols: Adding symbol: %s (%s) to OST...\n", symend + index, ofile->o_name);
printf("DoSymbols: Adding symbol: %s (%s) to OST...\n", string, ofile->o_name);
index = OSTAdd(symend + index, type, value);
index = OSTAdd(index ? string : NULL, type, value);
if (index == -1)
{
printf("DoSymbols(): Failed to add symbol '%s' to OST!\n", symend + index);
printf("DoSymbols(): Failed to add symbol '%s' to OST!\n", string);
return 1;
}
}
@ -394,77 +395,9 @@ long DoCommon(void)
//
int OSTAdd(char * name, int type, long value)
{
int ost_offset_p, ost_offset_e = 0; // OST table offsets for position calcs
int ost_offset_p = 0, ost_offset_e; // OST table offsets for position calcs
int ostresult; // OST index result
int slen = strlen(name);
// If the OST or OST string table has not been initialised then do so
if (ost_index == 0)
{
ost = malloc(OST_BLOCK);
oststr = malloc(OST_BLOCK);
if (ost == NULL)
{
printf("OST memory allocation error.\n");
return -1;
}
if (oststr == NULL)
{
printf("OSTSTR memory allocation error.\n");
return -1;
}
ost_ptr = ost; // Set OST start pointer
ost_end = ost + OST_BLOCK; // Set OST end pointer
PutLong(oststr, 0x00000004); // Just null long for now
oststr_ptr = oststr + 4; // Skip size of str table long (incl null long)
PutLong(oststr_ptr, 0x00000000); // Null terminating long
oststr_end = oststr + OST_BLOCK;
}
else
{
// If next symbol record exceeds current allocation then expand symbol
// table and/or symbol string table.
ost_offset_p = (ost_ptr - ost);
ost_offset_e = (ost_end - ost);
// 3 x uint32_t (12 bytes)
if ((ost_ptr + 12) > ost_end)
{
// We want to allocate the current size of the OST + another block.
ost = realloc(ost, ost_offset_e + OST_BLOCK);
if (ost == NULL)
{
printf("OST memory reallocation error.\n");
return -1;
}
ost_ptr = ost + ost_offset_p;
ost_end = (ost + ost_offset_e) + OST_BLOCK;
}
ost_offset_p = (oststr_ptr - oststr);
ost_offset_e = (oststr_end - oststr);
// string length + terminating NULL + uint32_t (terminal long)
if ((oststr_ptr + (slen + 1 + 4)) > oststr_end)
{
oststr = realloc(oststr, ost_offset_e + OST_BLOCK);
if (oststr == NULL)
{
printf("OSTSTR memory reallocation error.\n");
return -1;
}
oststr_ptr = oststr + ost_offset_p;
oststr_end = (oststr + ost_offset_e) + OST_BLOCK;
}
}
int slen; // String length, including terminator
// If this is a debug symbol and the include debug symbol flag (-g) is not
// set then do nothing
@ -474,47 +407,90 @@ int OSTAdd(char * name, int type, long value)
return 0;
}
if (!name || !name[0])
slen = 0;
else
slen = strlen(name) + 1;
// Get symbol index in OST, if any (-1 if not found)
ostresult = OSTLookup(name);
ostresult = slen ? OSTLookup(name) : -1;
// If the symbol is in the output symbol table and the bflag is set
// (don't remove multiply defined locals) and this is not an
// external/global symbol *** OR *** the symbol is not in the output
// symbol table then add it.
if (((ostresult != -1) && bflag && !(type & 0x01000000))
|| ((ostresult != -1) && gflag && (type & 0xF0000000))
|| (ostresult == -1))
// external/global symbol, or the gflag (output debug symbols) is
// set and this a debug symbol, *** OR *** the symbol is not in the
// output symbol table then add it.
if ((ostresult != -1) && !(bflag && !(type & 0x01000000))
&& !(gflag && (type & 0xF0000000)))
{
if ((type & 0xF0000000) == 0x40000000)
PutLong(ost_ptr, 0x00000000); // Zero string table offset for dbg line
else
PutLong(ost_ptr, (oststr_ptr - oststr)); // String table offset of symbol string
PutLong(ost_ptr + 4, type);
PutLong(ost_ptr + 8, value);
ost_ptr += 12;
// If the symbol type is anything but a debug line information
// symbol then write the symbol string to the string table
if ((type & 0xF0000000) != 0x40000000)
{
strcpy(oststr_ptr, name); // Put symbol name in string table
*(oststr_ptr + slen) = '\0'; // Add null terminating character
oststr_ptr += (slen + 1);
PutLong(oststr_ptr, 0x00000000); // Null terminating long
PutLong(oststr, (oststr_ptr - oststr)); // Update size of string table
}
if (vflag > 1)
printf("OSTAdd: (%s), type=$%08X, val=$%08lX\n", name, type, value);
// is ost_index pointing one past?
// does this return the same regardless of if its ++n or n++?
// no. it returns the value of ost_index *before* it's incremented.
return ++ost_index;
return ostresult;
}
return ostresult;
// If the OST has not been initialised, or more space is needed, then
// allocate it.
if ((ost_index + 1) > ost_size)
{
if (ost_size == 0)
ost_size = OST_SIZE_INIT;
ost_size *= 2;
ost = realloc(ost, ost_size * sizeof(ost[0]));
if (ost == NULL)
{
printf("OST memory allocation error.\n");
return -1;
}
}
if (slen)
{
ost_offset_p = (oststr_ptr - oststr);
ost_offset_e = (oststr_end - oststr);
// If the OST data has been exhausted, allocate another chunk.
if (((oststr_ptr + slen + 4) > oststr_end))
{
// string length + terminating NULL + uint32_t (terminal long)
if ((oststr_ptr + (slen + 1 + 4)) > oststr_end)
{
oststr = realloc(oststr, ost_offset_e + OST_BLOCK);
if (oststr == NULL)
{
printf("OSTSTR memory reallocation error.\n");
return -1;
}
oststr_ptr = oststr + ost_offset_p;
oststr_end = (oststr + ost_offset_e) + OST_BLOCK;
// On the first alloc, reserve space for the string table
// size field.
if (ost_offset_e == 0)
oststr_ptr += 4;
}
}
strcpy(oststr_ptr, name); // Put symbol name in string table
oststr_ptr += slen;
oststr_ptr[-1] = '\0'; // Add null terminating character
PutLong(oststr_ptr, 0x00000000); // Null terminating long
PutLong(oststr, (oststr_ptr - oststr)); // Update size of string table
}
ostresult = ost_index++;
ost[ostresult].s_idx = ost_offset_p;
ost[ostresult].s_type = type;
ost[ostresult].s_value = value;
if (vflag > 1)
printf("OSTAdd: (%s), type=$%08X, val=$%08lX\n",
slen ? name : "", type, value);
return ost_index;
}
@ -525,14 +501,11 @@ int OSTAdd(char * name, int type, long value)
int OSTLookup(char * sym)
{
int i;
int stro = 4; // Offset in string table
for(i=0; i<ost_index; i++)
{
if (strcmp(oststr + stro, sym) == 0)
if (ost[i].s_idx && (strcmp(oststr + ost[i].s_idx, sym) == 0))
return i + 1;
stro += strlen(oststr + stro) + 1;
}
return -1;
@ -689,7 +662,7 @@ int RelocateSegment(struct OFILE * ofile, int flag)
strcpy(sym, symbols + symidx);
olddata = newdata = 0; // Initialise old and new segment data
ssidx = OSTLookup(sym);
newdata = GetLong(ost + ((ssidx - 1) * 12) + 8);
newdata = ost[ssidx - 1].s_value;
}
// Obtain the existing long word (or word) segment data and flip words
@ -1076,10 +1049,8 @@ int WriteOutputFile(struct OHEADER * header)
int i, j; // Iterators
char himage[0x168]; // Header image (COF = 0xA8)
uint32_t tsoff, dsoff, bsoff; // Segment offset values
unsigned index, type, value; // Symbol table index, type and value
short abstype; // ABS symbol type
char symbol[14]; // Symbol record for ABS files
int slen; // Symbol string length
char symbol[14]; // raw symbol record
symoffset = 0; // Initialise symbol offset
@ -1267,8 +1238,15 @@ int WriteOutputFile(struct OHEADER * header)
{
if (header->ssize)
{
if (fwrite(ost, (ost_ptr - ost), 1, fd) != 1)
goto werror;
for (i = 0; i < ost_index; i++)
{
PutLong(symbol, ost[i].s_idx);
PutLong(symbol + 4, ost[i].s_type);
PutLong(symbol + 8, ost[i].s_value);
if (fwrite(symbol, 12, 1, fd) != 1)
goto werror;
}
if (fwrite(oststr, (oststr_ptr - oststr), 1, fd) != 1)
goto werror;
@ -1288,32 +1266,16 @@ int WriteOutputFile(struct OHEADER * header)
{
memset(symbol, 0, 14); // Initialise symbol record
abstype = 0; // Initialise ABS symbol type
slen = 0; // Initialise symbol string length
index = GetLong(ost + (i * 12)); // Get symbol index
type = GetLong((ost + (i * 12)) + 4); // Get symbol type
// Skip debug symbols
if (type & 0xF0000000)
if (ost[i].s_type & 0xF0000000)
continue;
// Get symbol value
value = GetLong((ost + (i * 12)) + 8);
slen = strlen(oststr + index);
// Get symbol string (maximum 8 chars)
if (slen > 8)
{
for(j=0; j<8; j++)
*(symbol + j) = *(oststr + index + j);
}
else
{
for(j=0; j<slen; j++)
*(symbol + j) = *(oststr + index + j);
}
strncpy(symbol, oststr + ost[i].s_idx, 8);
// Modify to ABS symbol type
switch (type)
switch (ost[i].s_type)
{
case 0x02000000: abstype = (short)ABST_DEFINED; break;
case 0x04000000: abstype = (short)ABST_DEFINED | ABST_TEXT; break;
@ -1323,13 +1285,13 @@ int WriteOutputFile(struct OHEADER * header)
case 0x08000000: abstype = (short)ABST_DEFINED | ABST_BSS; break;
case 0x09000000: abstype = (short)ABST_DEFINED | ABST_GLOBAL | ABST_BSS; break;
default:
printf("warning (WriteOutputFile): ABS, cannot determine symbol type ($%08X) [%s]\n", type, symbol);
printf("warning (WriteOutputFile): ABS, cannot determine symbol type ($%08X) [%s]\n", ost[i].s_type, symbol);
// type = 0;
break;
}
PutWord(symbol + 8, abstype); // Write back new ABS type
PutLong(symbol + 10, value); // Write back value
PutWord(symbol + 8, abstype); // Write back new ABS type
PutLong(symbol + 10, ost[i].s_value); // Write back value
// Write symbol record
if (fwrite(symbol, 14, 1, fd) != 1)
@ -1391,10 +1353,10 @@ int ShowSymbolLoadMap(struct OHEADER * header)
// Inner loop to process each record in the symbol table
for(i=0; i<(unsigned)ost_index; i++)
{
index = GetLong(ost + (i * 12)); // Get symbol string index
type = GetLong(ost + (i * 12) + 4); // Get symbol type
value = GetLong(ost + (i * 12) + 8); // Get symbol value
symbol = oststr + index; // Get symbol string
index = ost[i].s_idx; // Get symbol string index
type = ost[i].s_type; // Get symbol type
value = ost[i].s_value; // Get symbol value
symbol = index ? oststr + index : ""; // Get symbol string
// Display only three columns
if (c == 3)
@ -1645,8 +1607,8 @@ struct OHEADER * MakeOutputObject()
header->tsize = textsize; // TEXT segment size
header->dsize = datasize; // DATA segment size
header->bsize = bsssize; // BSS segment size
header->ssize = (ost_ptr - ost); // Symbol table size
header->ostbase = ost; // Output symbol table base address
header->ssize = ost_index * 12; // Symbol table size
header->ostbase = NULL; // Output symbol table base address
// For each object file, relocate its TEXT and DATA segments. OR the result
// into ret so all files get moved (and errors reported) before returning

13
rln.h
View File

@ -250,18 +250,15 @@ struct OFILE
// Symbol Record
// SYMREC: Used by builddir for the lists of exports and imports, and by the
// linker for the output symbol table (that's why there are type and value
// fields, unused in builddir)
// SYMREC: Used by the linker for the output symbol table
#define SYMLEN 100 // Symbol name size (incl. null)
#define OST_SIZE_INIT 8 // Half the initial output symbol table size
struct SYMREC
{
uint8_t s_name[SYMLEN]; // Including null terminator
uint16_t s_type;
uint32_t s_idx;
uint32_t s_type;
uint32_t s_value;
struct SYMREC * s_next;
};
#define new_symrec() (struct SYMREC *)malloc(sizeof(struct SYMREC))
@ -272,6 +269,8 @@ struct SYMREC
// and Globals share a hash table, but their value fields are interpreted
// differently.
#define SYMLEN 100 // Symbol name size (incl. null)
struct HREC
{
uint8_t h_sym[SYMLEN];