cprover
linker_script_merge.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Linker Script Merging
4 
5 Author: Kareem Khazem <karkhaz@karkhaz.com>, 2017
6 
7 \*******************************************************************/
8 
9 #include "linker_script_merge.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <iterator>
14 
15 #include <util/arith_tools.h>
16 #include <util/c_types.h>
17 #include <util/cmdline.h>
18 #include <util/expr_initializer.h>
19 #include <util/magic.h>
20 #include <util/pointer_expr.h>
21 #include <util/run.h>
22 #include <util/tempfile.h>
23 
24 #include <json/json_parser.h>
25 
27 
30 
31 #include "compile.h"
32 
34 {
35  if(!cmdline.isset('T'))
36  return 0;
37 
38  auto original_goto_model =
40 
41  if(!original_goto_model.has_value())
42  {
43  log.error() << "Unable to read goto binary for linker script merging"
44  << messaget::eom;
45  return 1;
46  }
47 
48  temporary_filet linker_def_outfile("goto-cc-linker-info", ".json");
49  std::list<irep_idt> linker_defined_symbols;
50  int fail = get_linker_script_data(
51  linker_defined_symbols,
52  original_goto_model->symbol_table,
53  elf_binary,
54  linker_def_outfile());
55  // ignore linker script parsing failures until the code is tested more widely
56  if(fail!=0)
57  return 0;
58 
59  jsont data;
60  fail = parse_json(linker_def_outfile(), log.get_message_handler(), data);
61  if(fail!=0)
62  {
63  log.error() << "Problem parsing linker script JSON data" << messaget::eom;
64  return fail;
65  }
66 
68  if(fail!=0)
69  {
70  log.error() << "Malformed linker-script JSON document" << messaget::eom;
71  data.output(log.error());
72  return fail;
73  }
74 
75  fail=1;
76  linker_valuest linker_values;
77  const auto &pair =
78  original_goto_model->goto_functions.function_map.find(INITIALIZE_FUNCTION);
79  if(pair == original_goto_model->goto_functions.function_map.end())
80  {
81  log.error() << "No " << INITIALIZE_FUNCTION << " found in goto_functions"
82  << messaget::eom;
83  return fail;
84  }
85  fail = ls_data2instructions(
86  data,
87  cmdline.get_value('T'),
88  pair->second.body,
89  original_goto_model->symbol_table,
90  linker_values);
91  if(fail!=0)
92  {
93  log.error() << "Could not add linkerscript defs to " INITIALIZE_FUNCTION
94  << messaget::eom;
95  return fail;
96  }
97 
98  fail=goto_and_object_mismatch(linker_defined_symbols, linker_values);
99  if(fail!=0)
100  return fail;
101 
102  // The keys of linker_values are exactly the elements of
103  // linker_defined_symbols, so iterate over linker_values from now on.
104 
105  fail = pointerize_linker_defined_symbols(*original_goto_model, linker_values);
106 
107  if(fail!=0)
108  {
109  log.error() << "Could not pointerize all linker-defined expressions"
110  << messaget::eom;
111  return fail;
112  }
113 
115  goto_binary,
116  *original_goto_model,
117  cmdline.isset("validate-goto-model"),
119 
120  if(fail!=0)
121  {
122  log.error() << "Could not write linkerscript-augmented binary"
123  << messaget::eom;
124  }
125 
126  return fail;
127 }
128 
130  const std::string &elf_binary,
131  const std::string &goto_binary,
132  const cmdlinet &cmdline,
133  message_handlert &message_handler)
134  : elf_binary(elf_binary),
135  goto_binary(goto_binary),
136  cmdline(cmdline),
137  log(message_handler),
138  replacement_predicates(
140  "address of array's first member",
141  [](const exprt &expr) -> const symbol_exprt & {
142  return to_symbol_expr(
143  to_index_expr(to_address_of_expr(expr).object()).index());
144  },
145  [](const exprt &expr) {
146  return expr.id() == ID_address_of &&
147  expr.type().id() == ID_pointer &&
148 
149  to_address_of_expr(expr).object().id() == ID_index &&
150  to_address_of_expr(expr).object().type().id() ==
151  ID_unsignedbv &&
152 
153  to_index_expr(to_address_of_expr(expr).object())
154  .array()
155  .id() == ID_symbol &&
156  to_index_expr(to_address_of_expr(expr).object())
157  .array()
158  .type()
159  .id() == ID_array &&
160 
161  to_index_expr(to_address_of_expr(expr).object())
162  .index()
163  .id() == ID_constant &&
164  to_index_expr(to_address_of_expr(expr).object())
165  .index()
166  .type()
167  .id() == ID_signedbv;
168  }),
170  "address of array",
171  [](const exprt &expr) -> const symbol_exprt & {
172  return to_symbol_expr(to_address_of_expr(expr).object());
173  },
174  [](const exprt &expr) {
175  return expr.id() == ID_address_of &&
176  expr.type().id() == ID_pointer &&
177 
178  to_address_of_expr(expr).object().id() == ID_symbol &&
179  to_address_of_expr(expr).object().type().id() == ID_array;
180  }),
182  "address of struct",
183  [](const exprt &expr) -> const symbol_exprt & {
184  return to_symbol_expr(to_address_of_expr(expr).object());
185  },
186  [](const exprt &expr) {
187  return expr.id() == ID_address_of &&
188  expr.type().id() == ID_pointer &&
189 
190  to_address_of_expr(expr).object().id() == ID_symbol &&
191  (to_address_of_expr(expr).object().type().id() == ID_struct ||
192  to_address_of_expr(expr).object().type().id() ==
193  ID_struct_tag);
194  }),
196  "array variable",
197  [](const exprt &expr) -> const symbol_exprt & {
198  return to_symbol_expr(expr);
199  },
200  [](const exprt &expr) {
201  return expr.id() == ID_symbol && expr.type().id() == ID_array;
202  }),
204  "pointer (does not need pointerizing)",
205  [](const exprt &expr) -> const symbol_exprt & {
206  return to_symbol_expr(expr);
207  },
208  [](const exprt &expr) {
209  return expr.id() == ID_symbol && expr.type().id() == ID_pointer;
210  })})
211 {}
212 
214  goto_modelt &goto_model,
215  const linker_valuest &linker_values)
216 {
217  const namespacet ns(goto_model.symbol_table);
218 
219  int ret=0;
220  // First, pointerize the actual linker-defined symbols
221  for(const auto &pair : linker_values)
222  {
223  const auto maybe_symbol = goto_model.symbol_table.get_writeable(pair.first);
224  if(!maybe_symbol)
225  continue;
226  symbolt &entry=*maybe_symbol;
227  entry.type=pointer_type(char_type());
228  entry.is_extern=false;
229  entry.value=pair.second.second;
230  }
231 
232  // Next, find all occurrences of linker-defined symbols that are _values_
233  // of some symbol in the symbol table, and pointerize them too
234  for(const auto &pair : goto_model.symbol_table.symbols)
235  {
236  std::list<symbol_exprt> to_pointerize;
237  symbols_to_pointerize(linker_values, pair.second.value, to_pointerize);
238 
239  if(to_pointerize.empty())
240  continue;
241  log.debug() << "Pointerizing the symbol-table value of symbol "
242  << pair.first << messaget::eom;
243  int fail = pointerize_subexprs_of(
244  goto_model.symbol_table.get_writeable_ref(pair.first).value,
245  to_pointerize,
246  linker_values);
247  if(to_pointerize.empty() && fail==0)
248  continue;
249  ret=1;
250  for(const auto &sym : to_pointerize)
251  {
252  log.error() << " Could not pointerize '" << sym.get_identifier()
253  << "' in symbol table entry " << pair.first << ". Pretty:\n"
254  << sym.pretty() << "\n";
255  }
256  log.error() << messaget::eom;
257  }
258 
259  // Finally, pointerize all occurrences of linker-defined symbols in the
260  // goto program
261  for(auto &gf : goto_model.goto_functions.function_map)
262  {
263  goto_programt &program=gf.second.body;
264  for(auto &instruction : program.instructions)
265  {
266  for(exprt *insts : std::list<exprt *>(
267  {&(instruction.code_nonconst()), &(instruction.guard)}))
268  {
269  std::list<symbol_exprt> to_pointerize;
270  symbols_to_pointerize(linker_values, *insts, to_pointerize);
271  if(to_pointerize.empty())
272  continue;
273  log.debug() << "Pointerizing a program expression..." << messaget::eom;
274  int fail = pointerize_subexprs_of(*insts, to_pointerize, linker_values);
275  if(to_pointerize.empty() && fail==0)
276  continue;
277  ret=1;
278  for(const auto &sym : to_pointerize)
279  {
280  log.error() << " Could not pointerize '" << sym.get_identifier()
281  << "' in function " << gf.first << ". Pretty:\n"
282  << sym.pretty() << "\n";
283  log.error().source_location = instruction.source_location;
284  }
285  log.error() << messaget::eom;
286  }
287  }
288  }
289  return ret;
290 }
291 
293  exprt &old_expr,
294  const linker_valuest &linker_values,
295  const symbol_exprt &old_symbol,
296  const irep_idt &ident,
297  const std::string &shape)
298 {
299  auto it=linker_values.find(ident);
300  if(it==linker_values.end())
301  {
302  log.error() << "Could not find a new expression for linker script-defined "
303  << "symbol '" << ident << "'" << messaget::eom;
304  return 1;
305  }
306  symbol_exprt new_expr=it->second.first;
307  new_expr.add_source_location()=old_symbol.source_location();
308  log.debug() << "Pointerizing linker-defined symbol '" << ident
309  << "' of shape <" << shape << ">." << messaget::eom;
310  old_expr=new_expr;
311  return 0;
312 }
313 
315  exprt &expr,
316  std::list<symbol_exprt> &to_pointerize,
317  const linker_valuest &linker_values)
318 {
319  int fail=0, tmp=0;
320  for(auto const &pair : linker_values)
321  for(auto const &pattern : replacement_predicates)
322  {
323  if(!pattern.match(expr))
324  continue;
325  // take a copy, expr will be changed below
326  const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
327  if(pair.first!=inner_symbol.get_identifier())
328  continue;
329  tmp=replace_expr(expr, linker_values, inner_symbol, pair.first,
330  pattern.description());
331  fail=tmp?tmp:fail;
332  auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
333  inner_symbol);
334  if(result==to_pointerize.end())
335  {
336  fail=1;
337  log.error() << "Too many removals of '" << inner_symbol.get_identifier()
338  << "'" << messaget::eom;
339  }
340  else
341  to_pointerize.erase(result);
342  // If we get here, we've just pointerized this expression. That expression
343  // will be a symbol possibly wrapped in some unary junk, but won't contain
344  // other symbols as subexpressions that might need to be pointerized. So
345  // it's safe to bail out here.
346  return fail;
347  }
348 
349  for(auto &op : expr.operands())
350  {
351  tmp = pointerize_subexprs_of(op, to_pointerize, linker_values);
352  fail=tmp?tmp:fail;
353  }
354  return fail;
355 }
356 
358  const linker_valuest &linker_values,
359  const exprt &expr,
360  std::list<symbol_exprt> &to_pointerize) const
361 {
362  for(const auto &op : expr.operands())
363  {
364  if(op.id()!=ID_symbol)
365  continue;
366  const symbol_exprt &sym_exp=to_symbol_expr(op);
367  const auto &pair=linker_values.find(sym_exp.get_identifier());
368  if(pair!=linker_values.end())
369  to_pointerize.push_back(sym_exp);
370  }
371  for(const auto &op : expr.operands())
372  symbols_to_pointerize(linker_values, op, to_pointerize);
373 }
374 
375 #if 0
376 The current implementation of this function is less precise than the
377  commented-out version below. To understand the difference between these
378  implementations, consider the following example:
379 
380 Suppose we have a section in the linker script, 100 bytes long, where the
381 address of the symbol sec_start is the start of the section (value 4096) and the
382 address of sec_end is the end of that section (value 4196).
383 
384 The current implementation synthesizes the goto-version of the following C:
385 
386  char __sec_array[100];
387  char *sec_start=(&__sec_array[0]);
388  char *sec_end=((&__sec_array[0])+100);
389  // Yes, it is 100 not 99. We're pointing to the end of the memory occupied
390  // by __sec_array, not the last element of __sec_array.
391 
392 This is imprecise for the following reason: the actual address of the array and
393 the pointers shall be some random CBMC-internal address, instead of being 4096
394 and 4196. The linker script, on the other hand, would have specified the exact
395 position of the section, and we even know what the actual values of sec_start
396 and sec_end are from the object file (these values are in the `addresses` list
397 of the `data` argument to this function). If the correctness of the code depends
398 on these actual values, then CBMCs model of the code is too imprecise to verify
399 this.
400 
401 The commented-out version of this function below synthesizes the following:
402 
403  char *sec_start=4096;
404  char *sec_end=4196;
405  __CPROVER_allocated_memory(4096, 100);
406 
407 This code has both the actual addresses of the start and end of the section and
408 tells CBMC that the intermediate region is valid. However, the allocated_memory
409 macro does not currently allocate an actual object at the address 4096, so
410 symbolic execution can fail. In particular, the 'allocated memory' is part of
411 __CPROVER_memory, which does not have a bounded size; this means that (for
412 example) calls to memcpy or memset fail, because the first and third arguments
413 do not have know n size. The commented-out implementation should be reinstated
414 once this limitation of __CPROVER_allocated_memory has been fixed.
415 
416 In either case, no other changes to the rest of the code (outside this function)
417 should be necessary. The rest of this file converts expressions containing the
418 linker-defined symbol into pointer types if they were not already, and this is
419 the right behaviour for both implementations.
420 #endif
422  jsont &data,
423  const std::string &linker_script,
424  goto_programt &gp,
425  symbol_tablet &symbol_table,
426  linker_valuest &linker_values)
427 #if 1
428 {
429  goto_programt::instructionst initialize_instructions=gp.instructions;
430  std::map<irep_idt, std::size_t> truncated_symbols;
431  for(auto &d : to_json_array(data["regions"]))
432  {
433  bool has_end=d["has-end-symbol"].is_true();
434 
435  std::ostringstream array_name;
436  array_name << CPROVER_PREFIX << "linkerscript-array_"
437  << d["start-symbol"].value;
438  if(has_end)
439  array_name << "-" << d["end-symbol"].value;
440 
441 
442  // Array symbol_exprt
443  mp_integer array_size = string2integer(d["size"].value);
444  if(array_size > MAX_FLATTENED_ARRAY_SIZE)
445  {
446  log.warning() << "Object section '" << d["section"].value << "' of size "
447  << array_size << " is too large to model. Truncating to "
448  << MAX_FLATTENED_ARRAY_SIZE << " bytes" << messaget::eom;
449  array_size=MAX_FLATTENED_ARRAY_SIZE;
450  if(!has_end)
451  truncated_symbols[d["size-symbol"].value]=MAX_FLATTENED_ARRAY_SIZE;
452  }
453 
454  constant_exprt array_size_expr=from_integer(array_size, size_type());
455  array_typet array_type(char_type(), array_size_expr);
456  symbol_exprt array_expr(array_name.str(), array_type);
457  source_locationt array_loc;
458 
459  array_loc.set_file(linker_script);
460  std::ostringstream array_comment;
461  array_comment << "Object section '" << d["section"].value << "' of size "
462  << array_size << " bytes";
463  array_loc.set_comment(array_comment.str());
464  array_expr.add_source_location()=array_loc;
465 
466  // Array start address
467  index_exprt zero_idx(array_expr, from_integer(0, size_type()));
468  address_of_exprt array_start(zero_idx);
469 
470  // Linker-defined symbol_exprt pointing to start address
471  symbol_exprt start_sym(d["start-symbol"].value, pointer_type(char_type()));
472  linker_values.emplace(
473  d["start-symbol"].value, std::make_pair(start_sym, array_start));
474 
475  // Since the value of the pointer will be a random CBMC address, write a
476  // note about the real address in the object file
477  auto it = std::find_if(
478  to_json_array(data["addresses"]).begin(),
479  to_json_array(data["addresses"]).end(),
480  [&d](const jsont &add) {
481  return add["sym"].value == d["start-symbol"].value;
482  });
483  if(it == to_json_array(data["addresses"]).end())
484  {
485  log.error() << "Start: Could not find address corresponding to symbol '"
486  << d["start-symbol"].value << "' (start of section)"
487  << messaget::eom;
488  return 1;
489  }
490  source_locationt start_loc;
491  start_loc.set_file(linker_script);
492  std::ostringstream start_comment;
493  start_comment << "Pointer to beginning of object section '"
494  << d["section"].value << "'. Original address in object file"
495  << " is " << (*it)["val"].value;
496  start_loc.set_comment(start_comment.str());
497  start_sym.add_source_location()=start_loc;
498 
499  // Instruction for start-address pointer in __CPROVER_initialize
500  code_assignt start_assign(start_sym, array_start);
501  start_assign.add_source_location()=start_loc;
502  goto_programt::instructiont start_assign_i =
503  goto_programt::make_assignment(start_assign, start_loc);
504  initialize_instructions.push_front(start_assign_i);
505 
506  if(has_end) // Same for pointer to end of array
507  {
508  plus_exprt array_end(array_start, array_size_expr);
509 
510  symbol_exprt end_sym(d["end-symbol"].value, pointer_type(char_type()));
511  linker_values.emplace(
512  d["end-symbol"].value, std::make_pair(end_sym, array_end));
513 
514  auto entry = std::find_if(
515  to_json_array(data["addresses"]).begin(),
516  to_json_array(data["addresses"]).end(),
517  [&d](const jsont &add) {
518  return add["sym"].value == d["end-symbol"].value;
519  });
520  if(entry == to_json_array(data["addresses"]).end())
521  {
522  log.debug() << "Could not find address corresponding to symbol '"
523  << d["end-symbol"].value << "' (end of section)"
524  << messaget::eom;
525  return 1;
526  }
527  source_locationt end_loc;
528  end_loc.set_file(linker_script);
529  std::ostringstream end_comment;
530  end_comment << "Pointer to end of object section '" << d["section"].value
531  << "'. Original address in object file"
532  << " is " << (*entry)["val"].value;
533  end_loc.set_comment(end_comment.str());
534  end_sym.add_source_location()=end_loc;
535 
536  code_assignt end_assign(end_sym, array_end);
537  end_assign.add_source_location()=end_loc;
538  goto_programt::instructiont end_assign_i =
539  goto_programt::make_assignment(end_assign, end_loc);
540  initialize_instructions.push_front(end_assign_i);
541  }
542 
543  // Add the array to the symbol table. We don't add the pointer(s) to the
544  // symbol table because they were already there, but declared as extern and
545  // probably with a different type. We change the externness and type in
546  // pointerize_linker_defined_symbols.
547  symbolt array_sym;
548  array_sym.name=array_name.str();
549  array_sym.pretty_name=array_name.str();
550  array_sym.is_lvalue=array_sym.is_static_lifetime=true;
551  array_sym.type=array_type;
552  array_sym.location=array_loc;
553  symbol_table.add(array_sym);
554 
555  // Push the array initialization to the front now, so that it happens before
556  // the initialization of the symbols that point to it.
557  namespacet ns(symbol_table);
558  const auto zi = zero_initializer(array_type, array_loc, ns);
559  CHECK_RETURN(zi.has_value());
560  code_assignt array_assign(array_expr, *zi);
561  array_assign.add_source_location()=array_loc;
562  goto_programt::instructiont array_assign_i =
563  goto_programt::make_assignment(array_assign, array_loc);
564  initialize_instructions.push_front(array_assign_i);
565  }
566 
567  // We've added every linker-defined symbol that marks the start or end of a
568  // region. But there might be other linker-defined symbols with some random
569  // address. These will have been declared extern too, so we need to give them
570  // a value also. Here, we give them the actual value that they have in the
571  // object file, since we're not assigning any object to them.
572  for(const auto &d : to_json_array(data["addresses"]))
573  {
574  auto it=linker_values.find(irep_idt(d["sym"].value));
575  if(it!=linker_values.end())
576  // sym marks the start or end of a region; already dealt with.
577  continue;
578 
579  // Perhaps this is a size symbol for a section whose size we truncated
580  // earlier.
581  irep_idt symbol_value;
582  const auto &pair=truncated_symbols.find(d["sym"].value);
583  if(pair==truncated_symbols.end())
584  symbol_value=d["val"].value;
585  else
586  {
587  log.debug()
588  << "Truncating the value of symbol " << d["sym"].value << " from "
589  << d["val"].value << " to " << MAX_FLATTENED_ARRAY_SIZE
590  << " because it corresponds to the size of a too-large section."
591  << messaget::eom;
593  }
594 
595  source_locationt loc;
596  loc.set_file(linker_script);
597  loc.set_comment("linker script-defined symbol: char *"+
598  d["sym"].value+"="+"(char *)"+id2string(symbol_value)+"u;");
599 
600  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
601 
602  constant_exprt rhs(
604  string2integer(id2string(symbol_value)),
605  unsigned_int_type().get_width()),
607 
608  typecast_exprt rhs_tc(rhs, pointer_type(char_type()));
609 
610  linker_values.emplace(
611  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
612 
613  code_assignt assign(lhs, rhs_tc);
614  assign.add_source_location()=loc;
615  goto_programt::instructiont assign_i =
616  goto_programt::make_assignment(assign, loc);
617  initialize_instructions.push_front(assign_i);
618  }
619  return 0;
620 }
621 #else
622 {
623  goto_programt::instructionst initialize_instructions=gp.instructions;
624  for(const auto &d : to_json_array(data["regions"]))
625  {
626  unsigned start=safe_string2unsigned(d["start"].value);
627  unsigned size=safe_string2unsigned(d["size"].value);
628  constant_exprt first=from_integer(start, size_type());
629  constant_exprt second=from_integer(size, size_type());
630  const code_typet void_t({}, empty_typet());
632  symbol_exprt(CPROVER_PREFIX "allocated_memory", void_t), {first, second});
633 
634  source_locationt loc;
635  loc.set_file(linker_script);
636  loc.set_comment("linker script-defined region:\n"+d["commt"].value+":\n"+
637  d["annot"].value);
638  f.add_source_location()=loc;
639 
641  i.make_function_call(f);
642  initialize_instructions.push_front(i);
643  }
644 
645  if(!symbol_table.has_symbol(CPROVER_PREFIX "allocated_memory"))
646  {
647  symbolt sym;
648  sym.name=CPROVER_PREFIX "allocated_memory";
649  sym.pretty_name=CPROVER_PREFIX "allocated_memory";
650  sym.is_lvalue=sym.is_static_lifetime=true;
651  const code_typet void_t({}, empty_typet());
652  sym.type=void_t;
653  symbol_table.add(sym);
654  }
655 
656  for(const auto &d : to_json_array(data["addresses"]))
657  {
658  source_locationt loc;
659  loc.set_file(linker_script);
660  loc.set_comment("linker script-defined symbol: char *"+
661  d["sym"].value+"="+"(char *)"+d["val"].value+"u;");
662 
663  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
664 
665  constant_exprt rhs;
667  string2integer(d["val"].value), unsigned_int_type().get_width()));
668  rhs.type()=unsigned_int_type();
669 
670  exprt rhs_tc =
672 
673  linker_values.emplace(
674  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
675 
676  code_assignt assign(lhs, rhs_tc);
677  assign.add_source_location()=loc;
679  assign_i.make_assignment(assign);
680  initialize_instructions.push_front(assign_i);
681  }
682  return 0;
683 }
684 #endif
685 
687  std::list<irep_idt> &linker_defined_symbols,
688  const symbol_tablet &symbol_table,
689  const std::string &out_file,
690  const std::string &def_out_file)
691 {
692  for(auto const &pair : symbol_table.symbols)
693  {
694  if(
695  pair.second.is_extern && pair.second.value.is_nil() &&
696  pair.second.name != CPROVER_PREFIX "memory")
697  {
698  linker_defined_symbols.push_back(pair.second.name);
699  }
700  }
701 
702  std::ostringstream linker_def_str;
703  std::copy(
704  linker_defined_symbols.begin(),
705  linker_defined_symbols.end(),
706  std::ostream_iterator<irep_idt>(linker_def_str, "\n"));
707  log.debug() << "Linker-defined symbols: [" << linker_def_str.str() << "]\n"
708  << messaget::eom;
709 
710  temporary_filet linker_def_infile("goto-cc-linker-defs", "");
711  std::ofstream linker_def_file(linker_def_infile());
712  linker_def_file << linker_def_str.str();
713  linker_def_file.close();
714 
715  std::vector<std::string> argv=
716  {
717  "ls_parse.py",
718  "--script", cmdline.get_value('T'),
719  "--object", out_file,
720  "--sym-file", linker_def_infile(),
721  "--out-file", def_out_file
722  };
723 
725  argv.push_back("--very-verbose");
727  argv.push_back("--verbose");
728 
729  log.debug() << "RUN:";
730  for(std::size_t i=0; i<argv.size(); i++)
731  log.debug() << " " << argv[i];
732  log.debug() << messaget::eom;
733 
734  int rc = run(argv[0], argv, linker_def_infile(), def_out_file, "");
735  if(rc!=0)
736  log.warning() << "Problem parsing linker script" << messaget::eom;
737 
738  return rc;
739 }
740 
742  const std::list<irep_idt> &linker_defined_symbols,
743  const linker_valuest &linker_values)
744 {
745  int fail=0;
746  for(const auto &sym : linker_defined_symbols)
747  if(linker_values.find(sym)==linker_values.end())
748  {
749  fail=1;
750  log.error() << "Variable '" << sym
751  << "' was declared extern but never given "
752  << "a value, even in a linker script" << messaget::eom;
753  }
754 
755  for(const auto &pair : linker_values)
756  {
757  auto it=std::find(linker_defined_symbols.begin(),
758  linker_defined_symbols.end(), pair.first);
759  if(it==linker_defined_symbols.end())
760  {
761  fail=1;
762  log.error()
763  << "Linker script-defined symbol '" << pair.first << "' was "
764  << "either defined in the C source code, not declared extern in "
765  << "the C source code, or does not appear in the C source code"
766  << messaget::eom;
767  }
768  }
769  return fail;
770 }
771 
773 {
774  if(!data.is_object())
775  return true;
776 
777  const json_objectt &data_object = to_json_object(data);
778  return (
779  !(data_object.find("regions") != data_object.end() &&
780  data_object.find("addresses") != data_object.end() &&
781  data["regions"].is_array() && data["addresses"].is_array() &&
782  std::all_of(
783  to_json_array(data["addresses"]).begin(),
784  to_json_array(data["addresses"]).end(),
785  [](const jsont &j) {
786  if(!j.is_object())
787  return false;
788 
789  const json_objectt &address = to_json_object(j);
790  return address.find("val") != address.end() &&
791  address.find("sym") != address.end() &&
792  address["val"].is_number() && address["sym"].is_string();
793  }) &&
794  std::all_of(
795  to_json_array(data["regions"]).begin(),
796  to_json_array(data["regions"]).end(),
797  [](const jsont &j) {
798  if(!j.is_object())
799  return false;
800 
801  const json_objectt &region = to_json_object(j);
802  return region.find("start") != region.end() &&
803  region.find("size") != region.end() &&
804  region.find("annot") != region.end() &&
805  region.find("commt") != region.end() &&
806  region.find("start-symbol") != region.end() &&
807  region.find("has-end-symbol") != region.end() &&
808  region.find("section") != region.end() &&
809  region["start"].is_number() && region["size"].is_number() &&
810  region["annot"].is_string() &&
811  region["start-symbol"].is_string() &&
812  region["section"].is_string() && region["commt"].is_string() &&
813  ((region["has-end-symbol"].is_true() &&
814  region.find("end-symbol") != region.end() &&
815  region["end-symbol"].is_string()) ||
816  (region["has-end-symbol"].is_false() &&
817  region.find("size-symbol") != region.end() &&
818  region.find("end-symbol") == region.end() &&
819  region["size-symbol"].is_string()));
820  })));
821 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
irep_idt integer2bvrep(const mp_integer &src, std::size_t width)
convert an integer to bit-vector representation with given width This uses two's complement for negat...
std::string array_name(const namespacet &ns, const exprt &expr)
Definition: array_name.cpp:19
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:44
unsignedbv_typet size_type()
Definition: c_types.cpp:58
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
bitvector_typet char_type()
Definition: c_types.cpp:114
Operator to return the address of an object.
Definition: pointer_expr.h:341
exprt & object()
Definition: pointer_expr.h:350
Arrays with given size.
Definition: std_types.h:763
std::string get_value(char option) const
Definition: cmdline.cpp:47
virtual bool isset(char option) const
Definition: cmdline.cpp:29
A codet representing an assignment in the program.
Definition: std_code.h:293
codet representation of a function call statement.
Definition: std_code.h:1213
Base type of functions.
Definition: std_types.h:539
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
Definition: compile.cpp:577
A constant literal expression.
Definition: std_expr.h:2753
void set_value(const irep_idt &value)
Definition: std_expr.h:2766
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
The empty type.
Definition: std_types.h:51
Base class for all expressions.
Definition: expr.h:54
source_locationt & add_source_location()
Definition: expr.h:235
const source_locationt & source_location() const
Definition: expr.h:230
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:92
function_mapt function_map
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:178
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:71
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:652
static instructiont make_assignment(const code_assignt &_code, const source_locationt &l=source_locationt::nil())
Create an assignment instruction.
std::list< instructiont > instructionst
Definition: goto_program.h:644
Array index operator.
Definition: std_expr.h:1328
exprt & array()
Definition: std_expr.h:1344
exprt & index()
Definition: std_expr.h:1354
const irep_idt & id() const
Definition: irep.h:407
iterator find(const std::string &key)
Definition: json.h:356
iterator end()
Definition: json.h:386
Definition: json.h:27
std::string value
Definition: json.h:132
bool is_object() const
Definition: json.h:56
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
const std::string & elf_binary
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
const cmdlinet & cmdline
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
int ls_data2instructions(jsont &data, const std::string &linker_script, goto_programt &gp, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into gp's instructions member.
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
const std::string & goto_binary
unsigned get_verbosity() const
Definition: message.h:54
source_locationt source_location
Definition: message.h:247
mstreamt & error() const
Definition: message.h:399
mstreamt & warning() const
Definition: message.h:404
mstreamt & debug() const
Definition: message.h:429
message_handlert & get_message_handler()
Definition: message.h:184
@ M_DEBUG
Definition: message.h:171
@ M_RESULT
Definition: message.h:170
static eomt eom
Definition: message.h:297
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
The plus expression Associativity is not specified.
Definition: std_expr.h:914
Patterns of expressions that should be replaced.
void set_comment(const irep_idt &comment)
void set_file(const irep_idt &file)
Expression to hold a symbol (variable)
Definition: std_expr.h:80
const irep_idt & get_identifier() const
Definition: std_expr.h:109
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
The symbol table.
Definition: symbol_table.h:14
virtual symbolt * get_writeable(const irep_idt &name) override
Find a symbol in the symbol table for read-write access.
Definition: symbol_table.h:90
Symbol table entry.
Definition: symbol.h:28
bool is_extern
Definition: symbol.h:66
bool is_static_lifetime
Definition: symbol.h:65
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
bool is_lvalue
Definition: symbol.h:66
exprt value
Initial value of symbol.
Definition: symbol.h:34
Semantic type conversion.
Definition: std_expr.h:1866
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:1874
Compile and link source and object files.
void __CPROVER_allocated_memory(__CPROVER_size_t address, __CPROVER_size_t extent)
#define CPROVER_PREFIX
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
Symbol Table + CFG.
dstringt irep_idt
Definition: irep.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:49
json_arrayt & to_json_array(jsont &json)
Definition: json.h:426
json_objectt & to_json_object(jsont &json)
Definition: json.h:444
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
Merge linker script-defined symbols into a goto-program.
Magic numbers used throughout the codebase.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
Definition: magic.h:11
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
API to expression classes for Pointers.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: pointer_expr.h:378
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
Read Goto Programs.
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
BigInt mp_integer
Definition: smt_terms.h:12
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define INITIALIZE_FUNCTION
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:189
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1382
unsigned safe_string2unsigned(const std::string &str, int base)
Definition: string2int.cpp:16
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: kdev_t.h:24
Definition: kdev_t.h:19