34 std::list<irep_idt> linker_defined_symbols;
37 linker_defined_symbols,
40 linker_def_outfile());
49 error() <<
"Problem parsing linker script JSON data" <<
eom;
56 error() <<
"Malformed linker-script JSON document" <<
eom;
69 error() <<
"Unable to read goto binary for linker script merging" <<
eom;
106 error() <<
"Could not pointerize all linker-defined expressions" <<
eom;
113 error() <<
"Could not write linkerscript-augmented binary" <<
eom;
120 const std::string &elf_binary,
121 const std::string &goto_binary,
125 elf_binary(elf_binary), goto_binary(goto_binary),
127 replacement_predicates(
134 return expr.
id()==ID_address_of &&
135 expr.
type().
id()==ID_pointer &&
137 expr.
op0().
id()==ID_index &&
143 expr.
op0().
op1().
id()==ID_constant &&
151 return expr.
id()==ID_address_of &&
152 expr.
type().
id()==ID_pointer &&
154 expr.
op0().
id()==ID_symbol &&
162 return expr.
id()==ID_address_of &&
163 expr.
type().
id()==ID_pointer &&
165 expr.
op0().
id()==ID_symbol &&
166 ns.follow(expr.
op0().
type()).
id()==ID_struct;
173 return expr.
id()==ID_symbol &&
174 expr.
type().
id()==ID_array;
181 return expr.
id()==ID_symbol &&
182 expr.
type().
id()==ID_pointer;
196 for(
const auto &pair : linker_values)
198 const auto maybe_symbol=symbol_table.
get_writeable(pair.first);
204 entry.
value=pair.second.second;
209 for(
const auto &pair : symbol_table.
symbols)
211 std::list<symbol_exprt> to_pointerize;
214 if(to_pointerize.empty())
216 debug() <<
"Pointerizing the symbol-table value of symbol " << pair.first
223 if(to_pointerize.empty() && fail==0)
226 for(
const auto &sym : to_pointerize)
227 error() <<
" Could not pointerize '" << sym.get_identifier()
228 <<
"' in symbol table entry " << pair.first <<
". Pretty:\n" 229 << sym.pretty() <<
"\n";
240 for(
exprt *insts : std::list<exprt *>({&(iit->code), &(iit->guard)}))
242 std::list<symbol_exprt> to_pointerize;
244 if(to_pointerize.empty())
246 debug() <<
"Pointerizing a program expression..." <<
eom;
248 *insts, to_pointerize, linker_values, ns);
249 if(to_pointerize.empty() && fail==0)
252 for(
const auto &sym : to_pointerize)
254 error() <<
" Could not pointerize '" << sym.get_identifier()
255 <<
"' in function " << gf.first <<
". Pretty:\n" 256 << sym.pretty() <<
"\n";
271 const std::string &shape)
273 auto it=linker_values.find(ident);
274 if(it==linker_values.end())
276 error() <<
"Could not find a new expression for linker script-defined " 277 <<
"symbol '" << ident <<
"'" <<
eom;
282 debug() <<
"Pointerizing linker-defined symbol '" << ident <<
"' of shape <" 283 << shape <<
">." <<
eom;
290 std::list<symbol_exprt> &to_pointerize,
295 for(
auto const &pair : linker_values)
298 if(!pattern.match(expr, ns))
301 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
304 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
305 pattern.description());
307 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
309 if(
result==to_pointerize.end())
316 to_pointerize.erase(
result);
335 std::list<symbol_exprt> &to_pointerize)
const 337 for(
const auto &op : expr.
operands())
339 if(op.id()!=ID_symbol)
343 if(pair!=linker_values.end())
344 to_pointerize.push_back(sym_exp);
346 for(
const auto &op : expr.
operands())
351 The current implementation of
this function is less precise than the
352 commented-out version below. To understand the difference between these
353 implementations, consider the following example:
355 Suppose we have a section in the linker script, 100 bytes long, where the
356 address of the symbol sec_start is the start of the section (value 4096) and the
357 address of sec_end is the end of that section (value 4196).
359 The current implementation synthesizes the
goto-version of the following C:
361 char __sec_array[100];
362 char *sec_start=(&__sec_array[0]);
363 char *sec_end=((&__sec_array[0])+100);
367 This is imprecise
for the following reason: the actual address of the array and
368 the pointers shall be some random CBMC-
internal address, instead of being 4096
369 and 4196. The linker script, on the other hand, would have specified the exact
370 position of the section, and we even know what the actual values of sec_start
371 and sec_end are from the
object file (these values are in the `addresses` list
372 of the `
data` argument to
this function). If the correctness of the code depends
373 on these actual values, then CBMCs model of the code is too imprecise to verify
376 The commented-out version of
this function below synthesizes the following:
378 char *sec_start=4096;
382 This code has both the actual addresses of the start and end of the section and
383 tells CBMC that the intermediate region is valid. However, the allocated_memory
384 macro does not currently allocate an actual
object at the address 4096, so
385 symbolic execution can fail. In particular, the
'allocated memory' is part of
386 __CPROVER_memory, which does not have a bounded size;
this means that (
for 387 example) calls to memcpy or memset fail, because the first and third arguments
388 do not have know n size. The commented-out implementation should be reinstated
391 In either
case, no other changes to the rest of the code (outside
this function)
392 should be necessary. The rest of
this file converts expressions containing the
393 linker-defined symbol into pointer types
if they were not already, and
this is
394 the right behaviour
for both implementations.
398 const std::string &linker_script,
405 std::map<irep_idt, std::size_t> truncated_symbols;
406 for(
auto &d :
data[
"regions"].array)
408 bool has_end=d[
"has-end-symbol"].is_true();
412 << d[
"start-symbol"].value;
421 warning() <<
"Object section '" << d[
"section"].value <<
"' of size " 422 << array_size <<
" is too large to model. Truncating to " 435 std::ostringstream array_comment;
436 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size " 438 array_loc.set_comment(array_comment.str());
439 array_expr.add_source_location()=array_loc;
447 linker_values[d[
"start-symbol"].value]=std::make_pair(start_sym,
452 auto it=std::find_if(
data[
"addresses"].array.begin(),
453 data[
"addresses"].array.end(),
455 {
return add[
"sym"].value==d[
"start-symbol"].value; });
456 if(it==
data[
"addresses"].array.end())
458 error() <<
"Start: Could not find address corresponding to symbol '" 459 << d[
"start-symbol"].value <<
"' (start of section)" << eom;
464 std::ostringstream start_comment;
465 start_comment <<
"Pointer to beginning of object section '" 466 << d[
"section"].value <<
"'. Original address in object file" 467 <<
" is " << (*it)[
"val"].value;
477 initialize_instructions.push_front(start_assign_i);
481 plus_exprt array_end(array_start, array_size_expr);
484 linker_values[d[
"end-symbol"].value]=std::make_pair(end_sym, array_end);
486 auto it=std::find_if(
data[
"addresses"].array.begin(),
487 data[
"addresses"].array.end(),
489 {
return add[
"sym"].value==d[
"end-symbol"].value; });
490 if(it==
data[
"addresses"].array.end())
492 error() <<
"Could not find address corresponding to symbol '" 493 << d[
"end-symbol"].value <<
"' (end of section)" << eom;
498 std::ostringstream end_comment;
499 end_comment <<
"Pointer to end of object section '" 500 << d[
"section"].value <<
"'. Original address in object file" 501 <<
" is " << (*it)[
"val"].value;
510 initialize_instructions.push_front(end_assign_i);
521 array_sym.
type=array_type;
523 symbol_table.
add(array_sym);
534 initialize_instructions.push_front(array_assign_i);
542 for(
const auto &d :
data[
"addresses"].array)
545 if(it!=linker_values.end())
552 const auto &pair=truncated_symbols.find(d[
"sym"].value);
553 if(pair==truncated_symbols.end())
554 symbol_value=d[
"val"].value;
557 debug() <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from " 559 <<
" because it corresponds to the size of a too-large section." 565 loc.set_file(linker_script);
566 loc.set_comment(
"linker script-defined symbol: char *"+
567 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
579 linker_values[
irep_idt(d[
"sym"].value)]=std::make_pair(lhs, rhs_tc);
586 initialize_instructions.push_front(assign_i);
593 for(
const auto &d :
data[
"regions"].array)
606 loc.set_file(linker_script);
607 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
613 initialize_instructions.push_front(i);
624 symbol_table.
add(sym);
627 for(
const auto &d :
data[
"addresses"].array)
630 loc.set_file(linker_script);
631 loc.set_comment(
"linker script-defined symbol: char *"+
632 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
644 linker_values[
irep_idt(d[
"sym"].value)]=std::make_pair(lhs, rhs_tc);
647 assign.add_source_location()=
loc;
650 initialize_instructions.push_front(assign_i);
657 std::list<irep_idt> &linker_defined_symbols,
659 const std::string &out_file,
660 const std::string &def_out_file)
662 for(
auto const &pair : symbol_table.
symbols)
663 if(pair.second.is_extern && pair.second.value.is_nil() &&
664 pair.second.name!=
"__CPROVER_memory")
665 linker_defined_symbols.push_back(pair.second.name);
667 std::ostringstream linker_def_str;
669 linker_defined_symbols.begin(),
670 linker_defined_symbols.end(),
671 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
672 debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n" 676 std::ofstream linker_def_file(linker_def_infile());
677 linker_def_file << linker_def_str.str();
678 linker_def_file.close();
680 std::vector<std::string> argv=
684 "--object", out_file,
685 "--sym-file", linker_def_infile(),
686 "--out-file", def_out_file
690 argv.push_back(
"--very-verbose");
692 argv.push_back(
"--verbose");
695 for(std::size_t i=0; i<argv.size(); i++)
696 debug() <<
" " << argv[i];
699 int rc=
run(argv[0], argv, linker_def_infile(), def_out_file);
701 warning() <<
"Problem parsing linker script" <<
eom;
707 const std::list<irep_idt> &linker_defined_symbols,
711 for(
const auto &sym : linker_defined_symbols)
712 if(linker_values.find(sym)==linker_values.end())
715 error() <<
"Variable '" << sym <<
"' was declared extern but never given " 716 <<
"a value, even in a linker script" <<
eom;
719 for(
const auto &pair : linker_values)
721 auto it=std::find(linker_defined_symbols.begin(),
722 linker_defined_symbols.end(), pair.first);
723 if(it==linker_defined_symbols.end())
726 error() <<
"Linker script-defined symbol '" << pair.first <<
"' was " 727 <<
"either defined in the C source code, not declared extern in " 728 <<
"the C source code, or does not appear in the C source code" 737 return (!(
data.is_object() &&
738 data.object.find(
"regions")!=
data.object.end() &&
739 data.object.find(
"addresses")!=
data.object.end() &&
740 data[
"regions"].is_array() &&
741 data[
"addresses"].is_array() &&
742 std::all_of(
data[
"addresses"].array.begin(),
743 data[
"addresses"].array.end(),
746 return j.is_object() &&
747 j.object.find(
"val")!=j.object.end() &&
748 j.object.find(
"sym")!=j.object.end() &&
749 j[
"val"].is_number() &&
750 j[
"sym"].is_string();
752 std::all_of(
data[
"regions"].array.begin(),
753 data[
"regions"].array.end(),
756 return j.is_object() &&
757 j.object.find(
"start")!=j.object.end() &&
758 j.object.find(
"size")!=j.object.end() &&
759 j.object.find(
"annot")!=j.object.end() &&
760 j.object.find(
"commt")!=j.object.end() &&
761 j.object.find(
"start-symbol")!=j.object.end() &&
762 j.object.find(
"has-end-symbol")!=j.object.end() &&
763 j.object.find(
"section")!=j.object.end() &&
764 j[
"start"].is_number() &&
765 j[
"size"].is_number() &&
766 j[
"annot"].is_string() &&
767 j[
"start-symbol"].is_string() &&
768 j[
"section"].is_string() &&
769 j[
"commt"].is_string() &&
770 ( (j[
"has-end-symbol"].is_true() &&
771 j.object.find(
"end-symbol")!=j.object.end() &&
772 j[
"end-symbol"].is_string())
773 ||(j[
"has-end-symbol"].
is_false() &&
774 j.object.find(
"size-symbol")!=j.object.end() &&
775 j.object.find(
"end-symbol")==j.object.end() &&
776 j[
"size-symbol"].is_string()));
symbol_tablet symbol_table
irep_idt name
The unique identifier.
const std::string & id2string(const irep_idt &d)
pointer_typet pointer_type(const typet &subtype)
const mp_integer string2integer(const std::string &n, unsigned base)
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
unsignedbv_typet unsigned_int_type()
void set_comment(const irep_idt &comment)
const irep_idt & get_identifier() const
int run(const std::string &what, const std::vector< std::string > &argv, const std::string &std_input, const std::string &std_output, const std::string &std_error)
linker_script_merget(compilet &, const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
exprt value
Initial value of symbol.
std::string get_value(char option) const
function_mapt function_map
irep_idt pretty_name
Language-specific display name.
exprt::operandst argumentst
unsignedbv_typet size_type()
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Magic numbers used throughout the codebase.
A constant literal expression.
static mstreamt & eom(mstreamt &m)
int pointerize_linker_defined_symbols(goto_functionst &goto_functions, symbol_tablet &symbol_table, const linker_valuest &linker_values)
convert the type of linker script-defined symbols to char*
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
void make_function_call(const codet &_code)
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
mstreamt & warning() const
This class represents an instruction in the GOTO intermediate representation.
virtual symbolt * get_writeable(const irep_idt &name) override
Find a symbol in the symbol table for read-write access.
std::list< instructiont > instructionst
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values, const namespacet &ns)
Expression Initialization.
bool read_goto_binary(const std::string &filename, goto_modelt &dest, message_handlert &message_handler)
const irep_idt & id() const
void set_value(const irep_idt &value)
bool is_false(const literalt &l)
virtual bool isset(char option) const
const std::string & elf_binary
source_locationt source_location
void set_file(const irep_idt &file)
#define INITIALIZE_FUNCTION
instructionst instructions
The list of instructions in the goto program.
Patterns of expressions that should be replaced.
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast a generic exprt to a symbol_exprt.
Operator to return the address of an object.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool write_object_file(const std::string &, const symbol_tablet &, goto_functionst &)
writes the goto functions in the function table to a binary format object file.
exprt zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns, message_handlert &message_handler)
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
A generic container class for the GOTO intermediate representation of one function.
unsigned integer2unsigned(const mp_integer &n)
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
unsigned safe_string2unsigned(const std::string &str, int base)
typet type
Type of symbol.
source_locationt location
Source code location of definition of symbol.
message_handlert & get_message_handler()
mstreamt & result() const
source_locationt source_location
The location of the instruction in the source file.
Base class for all expressions.
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
std::string to_string(const string_constraintt &expr)
Used for debug printing.
const source_locationt & source_location() const
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
const std::string integer2binary(const mp_integer &n, std::size_t width)
source_locationt & add_source_location()
#define Forall_goto_program_instructions(it, program)
Expression to hold a symbol (variable)
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
goto_programt coverage_criteriont message_handlert & message_handler
std::size_t integer2size_t(const mp_integer &n)
Merge linker script-defined symbols into a goto-program.
int ls_data2instructions(jsont &data, const std::string &linker_script, goto_programt &gp, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into gp's instructions member.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
const std::string & goto_binary
void make_typecast(const typet &_type)
const irept & find(const irep_namet &name) const
bitvector_typet char_type()
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::string array_name(const namespacet &ns, const exprt &expr)
goal_filters add(util_make_unique< internal_goals_filtert >(message_handler))