38 auto original_goto_model =
41 if(!original_goto_model.has_value())
43 log.
error() <<
"Unable to read goto binary for linker script merging"
49 std::list<irep_idt> linker_defined_symbols;
51 linker_defined_symbols,
52 original_goto_model->symbol_table,
54 linker_def_outfile());
79 if(pair == original_goto_model->goto_functions.function_map.end())
89 original_goto_model->symbol_table,
109 log.
error() <<
"Could not pointerize all linker-defined expressions"
116 *original_goto_model,
122 log.
error() <<
"Could not write linkerscript-augmented binary"
130 const std::string &elf_binary,
131 const std::string &goto_binary,
134 : elf_binary(elf_binary),
135 goto_binary(goto_binary),
137 log(message_handler),
138 replacement_predicates(
140 "address of array's first member",
145 [](
const exprt &expr) {
146 return expr.
id() == ID_address_of &&
147 expr.
type().
id() == ID_pointer &&
155 .
id() == ID_symbol &&
163 .
id() == ID_constant &&
167 .
id() == ID_signedbv;
174 [](
const exprt &expr) {
175 return expr.
id() == ID_address_of &&
176 expr.
type().
id() == ID_pointer &&
186 [](
const exprt &expr) {
187 return expr.
id() == ID_address_of &&
188 expr.
type().
id() == ID_pointer &&
200 [](
const exprt &expr) {
201 return expr.
id() == ID_symbol && expr.
type().
id() == ID_array;
204 "pointer (does not need pointerizing)",
208 [](
const exprt &expr) {
209 return expr.
id() == ID_symbol && expr.
type().
id() == ID_pointer;
221 for(
const auto &pair : linker_values)
229 entry.
value=pair.second.second;
236 std::list<symbol_exprt> to_pointerize;
239 if(to_pointerize.empty())
241 log.
debug() <<
"Pointerizing the symbol-table value of symbol "
247 if(to_pointerize.empty() && fail==0)
250 for(
const auto &sym : to_pointerize)
252 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
253 <<
"' in symbol table entry " << pair.first <<
". Pretty:\n"
254 << sym.pretty() <<
"\n";
266 for(
exprt *insts : std::list<exprt *>(
267 {&(instruction.code_nonconst()), &(instruction.guard)}))
269 std::list<symbol_exprt> to_pointerize;
271 if(to_pointerize.empty())
275 if(to_pointerize.empty() && fail==0)
278 for(
const auto &sym : to_pointerize)
280 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
281 <<
"' in function " << gf.first <<
". Pretty:\n"
282 << sym.pretty() <<
"\n";
297 const std::string &shape)
299 auto it=linker_values.find(ident);
300 if(it==linker_values.end())
302 log.
error() <<
"Could not find a new expression for linker script-defined "
308 log.
debug() <<
"Pointerizing linker-defined symbol '" << ident
316 std::list<symbol_exprt> &to_pointerize,
320 for(
auto const &pair : linker_values)
323 if(!pattern.match(expr))
326 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
329 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
330 pattern.description());
332 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
334 if(result==to_pointerize.end())
341 to_pointerize.erase(result);
360 std::list<symbol_exprt> &to_pointerize)
const
362 for(
const auto &op : expr.
operands())
364 if(op.id()!=ID_symbol)
368 if(pair!=linker_values.end())
369 to_pointerize.push_back(sym_exp);
371 for(
const auto &op : expr.
operands())
376 The current implementation of
this function is less precise than the
377 commented-out version below. To understand the difference between these
378 implementations, consider the following example:
380 Suppose we have a section in the linker script, 100 bytes long, where the
381 address of the symbol sec_start is the start of the section (value 4096) and the
382 address of sec_end is the end of that section (value 4196).
384 The current implementation synthesizes the
goto-version of the following C:
386 char __sec_array[100];
387 char *sec_start=(&__sec_array[0]);
388 char *sec_end=((&__sec_array[0])+100);
392 This is imprecise
for the following reason: the actual address of the array and
393 the pointers shall be some random CBMC-
internal address, instead of being 4096
394 and 4196. The linker script, on the other hand, would have specified the exact
395 position of the section, and we even know what the actual values of sec_start
396 and sec_end are from the
object file (these values are in the `addresses` list
397 of the `
data` argument to
this function). If the correctness of the code depends
398 on these actual values, then CBMCs model of the code is too imprecise to verify
401 The commented-out version of
this function below synthesizes the following:
403 char *sec_start=4096;
407 This code has both the actual addresses of the start and end of the section and
408 tells CBMC that the intermediate region is valid. However, the allocated_memory
409 macro does not currently allocate an actual
object at the address 4096, so
410 symbolic execution can fail. In particular, the
'allocated memory' is part of
411 __CPROVER_memory, which does not have a bounded size;
this means that (
for
412 example) calls to memcpy or memset fail, because the first and third arguments
413 do not have know n size. The commented-out implementation should be reinstated
416 In either
case,
no other changes to the rest of the code (outside
this function)
417 should be necessary. The rest of
this file converts expressions containing the
418 linker-defined symbol into pointer types
if they were not already, and
this is
419 the right behaviour
for both implementations.
423 const std::string &linker_script,
430 std::map<irep_idt, std::size_t> truncated_symbols;
433 bool has_end=d[
"has-end-symbol"].is_true();
437 << d[
"start-symbol"].value;
446 log.warning() <<
"Object section '" << d[
"section"].value <<
"' of size "
447 << array_size <<
" is too large to model. Truncating to "
460 std::ostringstream array_comment;
461 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size "
462 << array_size <<
" bytes";
472 linker_values.emplace(
473 d[
"start-symbol"].value, std::make_pair(start_sym, array_start));
477 auto it = std::find_if(
480 [&d](
const jsont &add) {
481 return add[
"sym"].
value == d[
"start-symbol"].value;
485 log.error() <<
"Start: Could not find address corresponding to symbol '"
486 << d[
"start-symbol"].value <<
"' (start of section)"
492 std::ostringstream start_comment;
493 start_comment <<
"Pointer to beginning of object section '"
494 << d[
"section"].value <<
"'. Original address in object file"
495 <<
" is " << (*it)[
"val"].value;
504 initialize_instructions.push_front(start_assign_i);
508 plus_exprt array_end(array_start, array_size_expr);
511 linker_values.emplace(
512 d[
"end-symbol"].value, std::make_pair(end_sym, array_end));
514 auto entry = std::find_if(
517 [&d](
const jsont &add) {
518 return add[
"sym"].
value == d[
"end-symbol"].value;
522 log.debug() <<
"Could not find address corresponding to symbol '"
523 << d[
"end-symbol"].value <<
"' (end of section)"
529 std::ostringstream end_comment;
530 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
531 <<
"'. Original address in object file"
532 <<
" is " << (*entry)[
"val"].value;
540 initialize_instructions.push_front(end_assign_i);
551 array_sym.
type=array_type;
553 symbol_table.
add(array_sym);
564 initialize_instructions.push_front(array_assign_i);
574 auto it=linker_values.find(
irep_idt(d[
"sym"].value));
575 if(it!=linker_values.end())
582 const auto &pair=truncated_symbols.find(d[
"sym"].value);
583 if(pair==truncated_symbols.end())
584 symbol_value=d[
"val"].value;
588 <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from "
590 <<
" because it corresponds to the size of a too-large section."
597 loc.
set_comment(
"linker script-defined symbol: char *"+
598 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
610 linker_values.emplace(
611 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
617 initialize_instructions.push_front(assign_i);
636 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
638 f.add_source_location()=loc;
641 i.make_function_call(f);
642 initialize_instructions.push_front(i);
653 symbol_table.
add(sym);
660 loc.
set_comment(
"linker script-defined symbol: char *"+
661 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
673 linker_values.emplace(
674 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
677 assign.add_source_location()=loc;
679 assign_i.make_assignment(assign);
680 initialize_instructions.push_front(assign_i);
687 std::list<irep_idt> &linker_defined_symbols,
689 const std::string &out_file,
690 const std::string &def_out_file)
692 for(
auto const &pair : symbol_table.
symbols)
695 pair.second.is_extern && pair.second.value.is_nil() &&
698 linker_defined_symbols.push_back(pair.second.name);
702 std::ostringstream linker_def_str;
704 linker_defined_symbols.begin(),
705 linker_defined_symbols.end(),
706 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
707 log.
debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n"
711 std::ofstream linker_def_file(linker_def_infile());
712 linker_def_file << linker_def_str.str();
713 linker_def_file.close();
715 std::vector<std::string> argv=
719 "--object", out_file,
720 "--sym-file", linker_def_infile(),
721 "--out-file", def_out_file
725 argv.push_back(
"--very-verbose");
727 argv.push_back(
"--verbose");
730 for(std::size_t i=0; i<argv.size(); i++)
734 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
742 const std::list<irep_idt> &linker_defined_symbols,
746 for(
const auto &sym : linker_defined_symbols)
747 if(linker_values.find(sym)==linker_values.end())
751 <<
"' was declared extern but never given "
755 for(
const auto &pair : linker_values)
757 auto it=std::find(linker_defined_symbols.begin(),
758 linker_defined_symbols.end(), pair.first);
759 if(it==linker_defined_symbols.end())
763 <<
"Linker script-defined symbol '" << pair.first <<
"' was "
764 <<
"either defined in the C source code, not declared extern in "
765 <<
"the C source code, or does not appear in the C source code"
774 if(!
data.is_object())
779 !(data_object.
find(
"regions") != data_object.
end() &&
780 data_object.
find(
"addresses") != data_object.
end() &&
781 data[
"regions"].is_array() &&
data[
"addresses"].is_array() &&
790 return address.
find(
"val") != address.end() &&
791 address.find(
"sym") != address.end() &&
792 address[
"val"].is_number() && address[
"sym"].is_string();
802 return region.
find(
"start") != region.end() &&
803 region.find(
"size") != region.end() &&
804 region.find(
"annot") != region.end() &&
805 region.find(
"commt") != region.end() &&
806 region.find(
"start-symbol") != region.end() &&
807 region.find(
"has-end-symbol") != region.end() &&
808 region.find(
"section") != region.end() &&
809 region[
"start"].is_number() && region[
"size"].is_number() &&
810 region[
"annot"].is_string() &&
811 region[
"start-symbol"].is_string() &&
812 region[
"section"].is_string() && region[
"commt"].is_string() &&
813 ((region[
"has-end-symbol"].is_true() &&
814 region.find(
"end-symbol") != region.end() &&
815 region[
"end-symbol"].is_string()) ||
816 (region[
"has-end-symbol"].is_false() &&
817 region.find(
"size-symbol") != region.end() &&
818 region.find(
"end-symbol") == region.end() &&
819 region[
"size-symbol"].is_string()));
std::string array_name(const namespacet &ns, const exprt &expr)
unsignedbv_typet unsigned_int_type()
unsignedbv_typet size_type()
pointer_typet pointer_type(const typet &subtype)
bitvector_typet char_type()
Operator to return the address of an object.
std::string get_value(char option) const
virtual bool isset(char option) const
A codet representing an assignment in the program.
codet representation of a function call statement.
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
A constant literal expression.
void set_value(const irep_idt &value)
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Base class for all expressions.
source_locationt & add_source_location()
const source_locationt & source_location() const
typet & type()
Return the type of the expression.
function_mapt function_map
symbol_tablet symbol_table
Symbol table.
goto_functionst goto_functions
GOTO functions.
This class represents an instruction in the GOTO intermediate representation.
A generic container class for the GOTO intermediate representation of one function.
instructionst instructions
The list of instructions in the goto program.
static instructiont make_assignment(const code_assignt &_code, const source_locationt &l=source_locationt::nil())
Create an assignment instruction.
std::list< instructiont > instructionst
const irep_idt & id() const
iterator find(const std::string &key)
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
const std::string & elf_binary
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
int ls_data2instructions(jsont &data, const std::string &linker_script, goto_programt &gp, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into gp's instructions member.
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
const std::string & goto_binary
unsigned get_verbosity() const
source_locationt source_location
mstreamt & warning() const
message_handlert & get_message_handler()
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
The plus expression Associativity is not specified.
Patterns of expressions that should be replaced.
void set_comment(const irep_idt &comment)
void set_file(const irep_idt &file)
Expression to hold a symbol (variable)
const irep_idt & get_identifier() const
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
virtual symbolt * get_writeable(const irep_idt &name) override
Find a symbol in the symbol table for read-write access.
source_locationt location
Source code location of definition of symbol.
typet type
Type of symbol.
irep_idt name
The unique identifier.
irep_idt pretty_name
Language-specific display name.
exprt value
Initial value of symbol.
Semantic type conversion.
static exprt conditional_cast(const exprt &expr, const typet &type)
Compile and link source and object files.
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
const std::string & id2string(const irep_idt &d)
json_arrayt & to_json_array(jsont &json)
json_objectt & to_json_object(jsont &json)
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Merge linker script-defined symbols into a goto-program.
Magic numbers used throughout the codebase.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
const mp_integer string2integer(const std::string &n, unsigned base)
API to expression classes for Pointers.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
int run(const std::string &what, const std::vector< std::string > &argv)
#define CHECK_RETURN(CONDITION)
#define INITIALIZE_FUNCTION
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
unsigned safe_string2unsigned(const std::string &str, int base)
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.