cprover
goto_symex.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Symbolic Execution
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "goto_symex.h"
13 
14 #include "expr_skeleton.h"
15 #include "symex_assign.h"
16 
17 #include <util/arith_tools.h>
18 #include <util/c_types.h>
19 #include <util/format_expr.h>
20 #include <util/fresh_symbol.h>
21 #include <util/mathematical_expr.h>
24 #include <util/simplify_expr.h>
25 #include <util/simplify_utils.h>
26 #include <util/string_expr.h>
27 #include <util/string_utils.h>
28 
29 #include <climits>
30 
32 
34 {
36  simplify(expr, ns);
37 }
38 
40  statet &state,
41  const exprt &o_lhs,
42  const exprt &o_rhs)
43 {
44  exprt lhs = clean_expr(o_lhs, state, true);
45  exprt rhs = clean_expr(o_rhs, state, false);
46 
48  lhs.type() == rhs.type(), "assignments must be type consistent");
49 
51  log.debug(), [this, &lhs](messaget::mstreamt &mstream) {
52  mstream << "Assignment to " << format(lhs) << " ["
53  << pointer_offset_bits(lhs.type(), ns).value_or(0) << " bits]"
54  << messaget::eom;
55  });
56 
57  // rvalues present within the lhs (for example, "some_array[this_rvalue]" or
58  // "byte_extract <type> from an_lvalue offset this_rvalue") can affect whether
59  // we use field-sensitive symbols or not, so L2-rename them up front:
60  lhs = state.l2_rename_rvalues(lhs, ns);
61  do_simplify(lhs);
62  lhs = state.field_sensitivity.apply(ns, state, std::move(lhs), true);
63 
64  if(rhs.id() == ID_side_effect)
65  {
66  const side_effect_exprt &side_effect_expr = to_side_effect_expr(rhs);
67  const irep_idt &statement = side_effect_expr.get_statement();
68 
69  if(
70  statement == ID_cpp_new || statement == ID_cpp_new_array ||
71  statement == ID_java_new_array_data)
72  {
73  symex_cpp_new(state, lhs, side_effect_expr);
74  }
75  else if(statement == ID_allocate)
76  symex_allocate(state, lhs, side_effect_expr);
77  else if(statement == ID_va_start)
78  symex_va_start(state, lhs, side_effect_expr);
79  else
81  }
82  else
83  {
85 
86  // Let's hide return value assignments.
87  if(
88  lhs.id() == ID_symbol &&
89  id2string(to_symbol_expr(lhs).get_identifier()).find("#return_value!") !=
90  std::string::npos)
91  {
93  }
94 
95  // We hide if we are in a hidden function.
96  if(state.call_stack().top().hidden_function)
98 
99  // We hide if we are executing a hidden instruction.
100  if(state.source.pc->source_location.get_hide())
101  assignment_type = symex_targett::assignment_typet::HIDDEN;
102 
104  state, assignment_type, ns, symex_config, target};
105 
106  // Try to constant propagate potential side effects of the assignment, when
107  // simplification is turned on and there is one thread only. Constant
108  // propagation is only sound for sequential code as here we do not take into
109  // account potential writes from other threads when propagating the values.
110  if(symex_config.simplify_opt && state.threads.size() <= 1)
111  {
113  state, symex_assign, lhs, rhs))
114  return;
115  }
116 
117  exprt::operandst lhs_if_then_else_conditions;
118  symex_assign.assign_rec(
119  lhs, expr_skeletont{}, rhs, lhs_if_then_else_conditions);
120  }
121 }
122 
130 static std::string get_alnum_string(const array_exprt &char_array)
131 {
132  const auto &ibv_type =
134 
135  const std::size_t n_bits = ibv_type.get_width();
136  CHECK_RETURN(n_bits % 8 == 0);
137 
138  static_assert(CHAR_BIT == 8, "bitwidth of char assumed to be 8");
139 
140  const std::size_t n_chars = n_bits / 8;
141 
142  INVARIANT(
143  sizeof(std::size_t) >= n_chars,
144  "size_t shall be large enough to represent a character");
145 
146  std::string result;
147 
148  for(const auto &c : char_array.operands())
149  {
150  const std::size_t c_val = numeric_cast_v<std::size_t>(to_constant_expr(c));
151 
152  for(std::size_t i = 0; i < n_chars; i++)
153  {
154  const char c_chunk = static_cast<char>((c_val >> (i * 8)) & 0xff);
155  result.push_back(c_chunk);
156  }
157  }
158 
159  return escape_non_alnum(result);
160 }
161 
163  statet &state,
164  symex_assignt &symex_assign,
165  const exprt &lhs,
166  const exprt &rhs)
167 {
168  if(rhs.id() == ID_function_application)
169  {
171 
172  if(f_l1.function().id() == ID_symbol)
173  {
174  const irep_idt &func_id =
176 
177  if(func_id == ID_cprover_string_concat_func)
178  {
179  return constant_propagate_string_concat(state, symex_assign, f_l1);
180  }
181  else if(func_id == ID_cprover_string_empty_string_func)
182  {
183  // constant propagating the empty string always succeeds as it does
184  // not depend on potentially non-constant inputs
186  return true;
187  }
188  else if(func_id == ID_cprover_string_substring_func)
189  {
191  }
192  else if(
193  func_id == ID_cprover_string_of_int_func ||
194  func_id == ID_cprover_string_of_long_func)
195  {
197  }
198  else if(func_id == ID_cprover_string_delete_char_at_func)
199  {
200  return constant_propagate_delete_char_at(state, symex_assign, f_l1);
201  }
202  else if(func_id == ID_cprover_string_delete_func)
203  {
204  return constant_propagate_delete(state, symex_assign, f_l1);
205  }
206  else if(func_id == ID_cprover_string_set_length_func)
207  {
208  return constant_propagate_set_length(state, symex_assign, f_l1);
209  }
210  else if(func_id == ID_cprover_string_char_set_func)
211  {
212  return constant_propagate_set_char_at(state, symex_assign, f_l1);
213  }
214  else if(func_id == ID_cprover_string_trim_func)
215  {
216  return constant_propagate_trim(state, symex_assign, f_l1);
217  }
218  else if(func_id == ID_cprover_string_to_lower_case_func)
219  {
220  return constant_propagate_case_change(state, symex_assign, f_l1, false);
221  }
222  else if(func_id == ID_cprover_string_to_upper_case_func)
223  {
224  return constant_propagate_case_change(state, symex_assign, f_l1, true);
225  }
226  else if(func_id == ID_cprover_string_replace_func)
227  {
228  return constant_propagate_replace(state, symex_assign, f_l1);
229  }
230  }
231  }
232 
233  return false;
234 }
235 
237  statet &state,
238  symex_assignt &symex_assign,
239  const ssa_exprt &length,
240  const constant_exprt &new_length,
241  const ssa_exprt &char_array,
242  const array_exprt &new_char_array)
243 {
244  // We need to make sure that the length of the previous array isn't
245  // unconstrained, otherwise it could be arbitrarily large which causes
246  // invalid traces
247  symex_assume(state, equal_exprt{length, from_integer(0, length.type())});
248 
249  // assign length of string
250  symex_assign.assign_symbol(length, expr_skeletont{}, new_length, {});
251 
252  const std::string aux_symbol_name =
253  get_alnum_string(new_char_array) + "_constant_char_array";
254 
255  const bool string_constant_exists =
256  state.symbol_table.has_symbol(aux_symbol_name);
257 
258  const symbolt &aux_symbol =
259  string_constant_exists
260  ? state.symbol_table.lookup_ref(aux_symbol_name)
262  state, symex_assign, aux_symbol_name, char_array, new_char_array);
263 
264  INVARIANT(
265  aux_symbol.value == new_char_array,
266  "symbol shall have value derived from char array content");
267 
268  const address_of_exprt string_data(
269  index_exprt(aux_symbol.symbol_expr(), from_integer(0, index_type())));
270 
271  symex_assign.assign_symbol(char_array, expr_skeletont{}, string_data, {});
272 
273  if(!string_constant_exists)
274  {
276  state, symex_assign, new_char_array, string_data);
277  }
278 }
279 
281  statet &state,
282  symex_assignt &symex_assign,
283  const std::string &aux_symbol_name,
284  const ssa_exprt &char_array,
285  const array_exprt &new_char_array)
286 {
287  array_typet new_char_array_type = new_char_array.type();
288  new_char_array_type.set(ID_C_constant, true);
289 
290  symbolt &new_aux_symbol = get_fresh_aux_symbol(
291  new_char_array_type,
292  "",
293  aux_symbol_name,
295  ns.lookup(to_symbol_expr(char_array.get_original_expr())).mode,
296  ns,
297  state.symbol_table);
298 
299  CHECK_RETURN(new_aux_symbol.is_state_var);
300  CHECK_RETURN(new_aux_symbol.is_lvalue);
301 
302  new_aux_symbol.is_static_lifetime = true;
303  new_aux_symbol.is_file_local = false;
304  new_aux_symbol.is_thread_local = false;
305 
306  new_aux_symbol.value = new_char_array;
307 
308  const exprt arr = state.rename(new_aux_symbol.symbol_expr(), ns).get();
309 
310  symex_assign.assign_symbol(
311  to_ssa_expr(arr).get_l1_object(), expr_skeletont{}, new_char_array, {});
312 
313  return new_aux_symbol;
314 }
315 
317  statet &state,
318  symex_assignt &symex_assign,
319  const array_exprt &new_char_array,
320  const address_of_exprt &string_data)
321 {
322  const symbolt &function_symbol =
323  ns.lookup(ID_cprover_associate_array_to_pointer_func);
324 
325  const function_application_exprt array_to_pointer_app{
326  function_symbol.symbol_expr(), {new_char_array, string_data}};
327 
328  const symbolt &return_symbol = get_fresh_aux_symbol(
329  to_mathematical_function_type(function_symbol.type).codomain(),
330  "",
331  "return_value",
333  function_symbol.mode,
334  ns,
335  state.symbol_table);
336 
337  const ssa_exprt ssa_expr(return_symbol.symbol_expr());
338 
339  symex_assign.assign_symbol(
340  ssa_expr, expr_skeletont{}, array_to_pointer_app, {});
341 }
342 
345  const statet &state,
346  const exprt &content)
347 {
348  if(content.id() != ID_symbol)
349  {
350  return {};
351  }
352 
353  const auto s_pointer_opt =
354  state.propagation.find(to_symbol_expr(content).get_identifier());
355 
356  if(!s_pointer_opt)
357  {
358  return {};
359  }
360 
361  return try_get_string_data_array(s_pointer_opt->get(), ns);
362 }
363 
366 {
367  if(expr.id() != ID_symbol)
368  {
369  return {};
370  }
371 
372  const auto constant_expr_opt =
373  state.propagation.find(to_symbol_expr(expr).get_identifier());
374 
375  if(!constant_expr_opt || constant_expr_opt->get().id() != ID_constant)
376  {
377  return {};
378  }
379 
381  to_constant_expr(constant_expr_opt->get()));
382 }
383 
385  statet &state,
386  symex_assignt &symex_assign,
387  const function_application_exprt &f_l1)
388 {
389  const auto &f_type = f_l1.function_type();
390  const auto &length_type = f_type.domain().at(0);
391  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
392 
393  const constant_exprt length = from_integer(0, length_type);
394 
395  const array_typet char_array_type(char_type, length);
396 
398  f_l1.arguments().size() >= 2,
399  "empty string primitive requires two output arguments");
400 
401  const array_exprt char_array({}, char_array_type);
402 
404  state,
405  symex_assign,
406  to_ssa_expr(f_l1.arguments().at(0)),
407  length,
408  to_ssa_expr(f_l1.arguments().at(1)),
409  char_array);
410 }
411 
413  statet &state,
414  symex_assignt &symex_assign,
415  const function_application_exprt &f_l1)
416 {
417  const auto &f_type = f_l1.function_type();
418  const auto &length_type = f_type.domain().at(0);
419  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
420 
421  const refined_string_exprt &s1 = to_string_expr(f_l1.arguments().at(2));
422  const auto s1_data_opt = try_evaluate_constant_string(state, s1.content());
423 
424  if(!s1_data_opt)
425  return false;
426 
427  const array_exprt &s1_data = s1_data_opt->get();
428 
429  const refined_string_exprt &s2 = to_string_expr(f_l1.arguments().at(3));
430  const auto s2_data_opt = try_evaluate_constant_string(state, s2.content());
431 
432  if(!s2_data_opt)
433  return false;
434 
435  const array_exprt &s2_data = s2_data_opt->get();
436 
437  const std::size_t new_size =
438  s1_data.operands().size() + s2_data.operands().size();
439 
440  const constant_exprt new_char_array_length =
441  from_integer(new_size, length_type);
442 
443  const array_typet new_char_array_type(char_type, new_char_array_length);
444 
445  exprt::operandst operands(s1_data.operands());
446  operands.insert(
447  operands.end(), s2_data.operands().begin(), s2_data.operands().end());
448 
449  const array_exprt new_char_array(std::move(operands), new_char_array_type);
450 
452  state,
453  symex_assign,
454  to_ssa_expr(f_l1.arguments().at(0)),
455  new_char_array_length,
456  to_ssa_expr(f_l1.arguments().at(1)),
457  new_char_array);
458 
459  return true;
460 }
461 
463  statet &state,
464  symex_assignt &symex_assign,
465  const function_application_exprt &f_l1)
466 {
467  const std::size_t num_operands = f_l1.arguments().size();
468 
469  PRECONDITION(num_operands >= 4);
470  PRECONDITION(num_operands <= 5);
471 
472  const auto &f_type = f_l1.function_type();
473  const auto &length_type = f_type.domain().at(0);
474  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
475 
476  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
477  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
478 
479  if(!s_data_opt)
480  return false;
481 
482  const array_exprt &s_data = s_data_opt->get();
483 
484  mp_integer end_index;
485 
486  if(num_operands == 5)
487  {
488  const auto end_index_expr_opt =
489  try_evaluate_constant(state, f_l1.arguments().at(4));
490 
491  if(!end_index_expr_opt)
492  {
493  return false;
494  }
495 
496  end_index =
497  numeric_cast_v<mp_integer>(to_constant_expr(*end_index_expr_opt));
498 
499  if(end_index < 0 || end_index > s_data.operands().size())
500  {
501  return false;
502  }
503  }
504  else
505  {
506  end_index = s_data.operands().size();
507  }
508 
509  const auto start_index_expr_opt =
510  try_evaluate_constant(state, f_l1.arguments().at(3));
511 
512  if(!start_index_expr_opt)
513  {
514  return false;
515  }
516 
517  const mp_integer start_index =
518  numeric_cast_v<mp_integer>(to_constant_expr(*start_index_expr_opt));
519 
520  if(start_index < 0 || start_index > end_index)
521  {
522  return false;
523  }
524 
525  const constant_exprt new_char_array_length =
526  from_integer(end_index - start_index, length_type);
527 
528  const array_typet new_char_array_type(char_type, new_char_array_length);
529 
530  exprt::operandst operands(
531  std::next(
532  s_data.operands().begin(), numeric_cast_v<std::size_t>(start_index)),
533  std::next(
534  s_data.operands().begin(), numeric_cast_v<std::size_t>(end_index)));
535 
536  const array_exprt new_char_array(std::move(operands), new_char_array_type);
537 
539  state,
540  symex_assign,
541  to_ssa_expr(f_l1.arguments().at(0)),
542  new_char_array_length,
543  to_ssa_expr(f_l1.arguments().at(1)),
544  new_char_array);
545 
546  return true;
547 }
548 
550  statet &state,
551  symex_assignt &symex_assign,
552  const function_application_exprt &f_l1)
553 {
554  // The function application expression f_l1 takes the following arguments:
555  // - result string length (output parameter)
556  // - result string data array (output parameter)
557  // - integer to convert to a string
558  // - radix (optional, default 10)
559  const std::size_t num_operands = f_l1.arguments().size();
560 
561  PRECONDITION(num_operands >= 3);
562  PRECONDITION(num_operands <= 4);
563 
564  const auto &f_type = f_l1.function_type();
565  const auto &length_type = f_type.domain().at(0);
566  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
567 
568  const auto &integer_opt =
569  try_evaluate_constant(state, f_l1.arguments().at(2));
570 
571  if(!integer_opt)
572  {
573  return false;
574  }
575 
576  const mp_integer integer = numeric_cast_v<mp_integer>(integer_opt->get());
577 
578  unsigned base = 10;
579 
580  if(num_operands == 4)
581  {
582  const auto &base_constant_opt =
583  try_evaluate_constant(state, f_l1.arguments().at(3));
584 
585  if(!base_constant_opt)
586  {
587  return false;
588  }
589 
590  const auto base_opt = numeric_cast<unsigned>(base_constant_opt->get());
591 
592  if(!base_opt)
593  {
594  return false;
595  }
596 
597  base = *base_opt;
598  }
599 
600  std::string s = integer2string(integer, base);
601 
602  const constant_exprt new_char_array_length =
603  from_integer(s.length(), length_type);
604 
605  const array_typet new_char_array_type(char_type, new_char_array_length);
606 
607  exprt::operandst operands;
608 
609  std::transform(
610  s.begin(),
611  s.end(),
612  std::back_inserter(operands),
613  [&char_type](const char c) { return from_integer(tolower(c), char_type); });
614 
615  const array_exprt new_char_array(std::move(operands), new_char_array_type);
616 
618  state,
619  symex_assign,
620  to_ssa_expr(f_l1.arguments().at(0)),
621  new_char_array_length,
622  to_ssa_expr(f_l1.arguments().at(1)),
623  new_char_array);
624 
625  return true;
626 }
627 
629  statet &state,
630  symex_assignt &symex_assign,
631  const function_application_exprt &f_l1)
632 {
633  // The function application expression f_l1 takes the following arguments:
634  // - result string length (output parameter)
635  // - result string data array (output parameter)
636  // - string to delete char from
637  // - index of char to delete
638  PRECONDITION(f_l1.arguments().size() == 4);
639 
640  const auto &f_type = f_l1.function_type();
641  const auto &length_type = f_type.domain().at(0);
642  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
643 
644  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
645  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
646 
647  if(!s_data_opt)
648  {
649  return false;
650  }
651 
652  const array_exprt &s_data = s_data_opt->get();
653 
654  const auto &index_opt = try_evaluate_constant(state, f_l1.arguments().at(3));
655 
656  if(!index_opt)
657  {
658  return false;
659  }
660 
661  const mp_integer index = numeric_cast_v<mp_integer>(index_opt->get());
662 
663  if(index < 0 || index >= s_data.operands().size())
664  {
665  return false;
666  }
667 
668  const constant_exprt new_char_array_length =
669  from_integer(s_data.operands().size() - 1, length_type);
670 
671  const array_typet new_char_array_type(char_type, new_char_array_length);
672 
673  exprt::operandst operands;
674  operands.reserve(s_data.operands().size() - 1);
675 
676  const std::size_t i = numeric_cast_v<std::size_t>(index);
677 
678  operands.insert(
679  operands.end(),
680  s_data.operands().begin(),
681  std::next(s_data.operands().begin(), i));
682 
683  operands.insert(
684  operands.end(),
685  std::next(s_data.operands().begin(), i + 1),
686  s_data.operands().end());
687 
688  const array_exprt new_char_array(std::move(operands), new_char_array_type);
689 
691  state,
692  symex_assign,
693  to_ssa_expr(f_l1.arguments().at(0)),
694  new_char_array_length,
695  to_ssa_expr(f_l1.arguments().at(1)),
696  new_char_array);
697 
698  return true;
699 }
700 
702  statet &state,
703  symex_assignt &symex_assign,
704  const function_application_exprt &f_l1)
705 {
706  // The function application expression f_l1 takes the following arguments:
707  // - result string length (output parameter)
708  // - result string data array (output parameter)
709  // - string to delete substring from
710  // - index of start of substring to delete (inclusive)
711  // - index of end of substring to delete (exclusive)
712  PRECONDITION(f_l1.arguments().size() == 5);
713 
714  const auto &f_type = f_l1.function_type();
715  const auto &length_type = f_type.domain().at(0);
716  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
717 
718  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
719  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
720 
721  if(!s_data_opt)
722  {
723  return false;
724  }
725 
726  const array_exprt &s_data = s_data_opt->get();
727 
728  const auto &start_opt = try_evaluate_constant(state, f_l1.arguments().at(3));
729 
730  if(!start_opt)
731  {
732  return false;
733  }
734 
735  const mp_integer start = numeric_cast_v<mp_integer>(start_opt->get());
736 
737  if(start < 0 || start > s_data.operands().size())
738  {
739  return false;
740  }
741 
742  const auto &end_opt = try_evaluate_constant(state, f_l1.arguments().at(4));
743 
744  if(!end_opt)
745  {
746  return false;
747  }
748 
749  const mp_integer end = numeric_cast_v<mp_integer>(end_opt->get());
750 
751  if(start > end)
752  {
753  return false;
754  }
755 
756  const std::size_t start_index = numeric_cast_v<std::size_t>(start);
757 
758  const std::size_t end_index =
759  std::min(numeric_cast_v<std::size_t>(end), s_data.operands().size());
760 
761  const std::size_t new_size =
762  s_data.operands().size() - end_index + start_index;
763 
764  const constant_exprt new_char_array_length =
765  from_integer(new_size, length_type);
766 
767  const array_typet new_char_array_type(char_type, new_char_array_length);
768 
769  exprt::operandst operands;
770  operands.reserve(new_size);
771 
772  operands.insert(
773  operands.end(),
774  s_data.operands().begin(),
775  std::next(s_data.operands().begin(), start_index));
776 
777  operands.insert(
778  operands.end(),
779  std::next(s_data.operands().begin(), end_index),
780  s_data.operands().end());
781 
782  const array_exprt new_char_array(std::move(operands), new_char_array_type);
783 
785  state,
786  symex_assign,
787  to_ssa_expr(f_l1.arguments().at(0)),
788  new_char_array_length,
789  to_ssa_expr(f_l1.arguments().at(1)),
790  new_char_array);
791 
792  return true;
793 }
794 
796  statet &state,
797  symex_assignt &symex_assign,
798  const function_application_exprt &f_l1)
799 {
800  // The function application expression f_l1 takes the following arguments:
801  // - result string length (output parameter)
802  // - result string data array (output parameter)
803  // - current string
804  // - new length of the string
805  PRECONDITION(f_l1.arguments().size() == 4);
806 
807  const auto &f_type = f_l1.function_type();
808  const auto &length_type = f_type.domain().at(0);
809  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
810 
811  const auto &new_length_opt =
812  try_evaluate_constant(state, f_l1.arguments().at(3));
813 
814  if(!new_length_opt)
815  {
816  return false;
817  }
818 
819  const mp_integer new_length =
820  numeric_cast_v<mp_integer>(new_length_opt->get());
821 
822  if(new_length < 0)
823  {
824  return false;
825  }
826 
827  const std::size_t new_size = numeric_cast_v<std::size_t>(new_length);
828 
829  const constant_exprt new_char_array_length =
830  from_integer(new_size, length_type);
831 
832  const array_typet new_char_array_type(char_type, new_char_array_length);
833 
834  exprt::operandst operands;
835 
836  if(new_size != 0)
837  {
838  operands.reserve(new_size);
839 
840  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
841  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
842 
843  if(!s_data_opt)
844  {
845  return false;
846  }
847 
848  const array_exprt &s_data = s_data_opt->get();
849 
850  operands.insert(
851  operands.end(),
852  s_data.operands().begin(),
853  std::next(
854  s_data.operands().begin(),
855  std::min(new_size, s_data.operands().size())));
856 
857  operands.insert(
858  operands.end(),
859  new_size - std::min(new_size, s_data.operands().size()),
860  from_integer(0, char_type));
861  }
862 
863  const array_exprt new_char_array(std::move(operands), new_char_array_type);
864 
866  state,
867  symex_assign,
868  to_ssa_expr(f_l1.arguments().at(0)),
869  new_char_array_length,
870  to_ssa_expr(f_l1.arguments().at(1)),
871  new_char_array);
872 
873  return true;
874 }
875 
877  statet &state,
878  symex_assignt &symex_assign,
879  const function_application_exprt &f_l1)
880 {
881  // The function application expression f_l1 takes the following arguments:
882  // - result string length (output parameter)
883  // - result string data array (output parameter)
884  // - current string
885  // - index of char to set
886  // - new char
887  PRECONDITION(f_l1.arguments().size() == 5);
888 
889  const auto &f_type = f_l1.function_type();
890  const auto &length_type = f_type.domain().at(0);
891  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
892 
893  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
894  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
895 
896  if(!s_data_opt)
897  {
898  return false;
899  }
900 
901  array_exprt s_data = s_data_opt->get();
902 
903  const auto &index_opt = try_evaluate_constant(state, f_l1.arguments().at(3));
904 
905  if(!index_opt)
906  {
907  return false;
908  }
909 
910  const mp_integer index = numeric_cast_v<mp_integer>(index_opt->get());
911 
912  if(index < 0 || index >= s_data.operands().size())
913  {
914  return false;
915  }
916 
917  const auto &new_char_opt =
918  try_evaluate_constant(state, f_l1.arguments().at(4));
919 
920  if(!new_char_opt)
921  {
922  return false;
923  }
924 
925  const constant_exprt new_char_array_length =
926  from_integer(s_data.operands().size(), length_type);
927 
928  const array_typet new_char_array_type(char_type, new_char_array_length);
929 
930  s_data.operands()[numeric_cast_v<std::size_t>(index)] = new_char_opt->get();
931 
932  const array_exprt new_char_array(
933  std::move(s_data.operands()), new_char_array_type);
934 
936  state,
937  symex_assign,
938  to_ssa_expr(f_l1.arguments().at(0)),
939  new_char_array_length,
940  to_ssa_expr(f_l1.arguments().at(1)),
941  new_char_array);
942 
943  return true;
944 }
945 
947  statet &state,
948  symex_assignt &symex_assign,
949  const function_application_exprt &f_l1,
950  bool to_upper)
951 {
952  const auto &f_type = f_l1.function_type();
953  const auto &length_type = f_type.domain().at(0);
954  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
955 
956  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
957  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
958 
959  if(!s_data_opt)
960  return false;
961 
962  array_exprt string_data = s_data_opt->get();
963 
964  auto &operands = string_data.operands();
965  for(auto &operand : operands)
966  {
967  auto &constant_value = to_constant_expr(operand);
968  auto character = numeric_cast_v<unsigned int>(constant_value);
969 
970  // Can't guarantee matches against non-ASCII characters.
971  if(character >= 128)
972  return false;
973 
974  if(isalpha(character))
975  {
976  if(to_upper)
977  {
978  if(islower(character))
979  constant_value =
980  from_integer(toupper(character), constant_value.type());
981  }
982  else
983  {
984  if(isupper(character))
985  constant_value =
986  from_integer(tolower(character), constant_value.type());
987  }
988  }
989  }
990 
991  const constant_exprt new_char_array_length =
992  from_integer(operands.size(), length_type);
993 
994  const array_typet new_char_array_type(char_type, new_char_array_length);
995  const array_exprt new_char_array(std::move(operands), new_char_array_type);
996 
998  state,
999  symex_assign,
1000  to_ssa_expr(f_l1.arguments().at(0)),
1001  new_char_array_length,
1002  to_ssa_expr(f_l1.arguments().at(1)),
1003  new_char_array);
1004 
1005  return true;
1006 }
1007 
1009  statet &state,
1010  symex_assignt &symex_assign,
1011  const function_application_exprt &f_l1)
1012 {
1013  const auto &f_type = f_l1.function_type();
1014  const auto &length_type = f_type.domain().at(0);
1015  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
1016 
1017  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
1018  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
1019 
1020  if(!s_data_opt)
1021  return false;
1022 
1023  auto &new_data = f_l1.arguments().at(4);
1024  auto &old_data = f_l1.arguments().at(3);
1025 
1026  array_exprt::operandst characters_to_find;
1027  array_exprt::operandst characters_to_replace;
1028 
1029  // Two main ways to perform a replace: characters or strings.
1030  bool is_single_character = new_data.type().id() == ID_unsignedbv &&
1031  old_data.type().id() == ID_unsignedbv;
1032  if(is_single_character)
1033  {
1034  const auto new_char_pointer = try_evaluate_constant(state, new_data);
1035  const auto old_char_pointer = try_evaluate_constant(state, old_data);
1036 
1037  if(!new_char_pointer || !old_char_pointer)
1038  {
1039  return {};
1040  }
1041 
1042  characters_to_find.emplace_back(old_char_pointer->get());
1043  characters_to_replace.emplace_back(new_char_pointer->get());
1044  }
1045  else
1046  {
1047  auto &new_char_array = to_string_expr(new_data);
1048  auto &old_char_array = to_string_expr(old_data);
1049 
1050  const auto new_char_array_opt =
1051  try_evaluate_constant_string(state, new_char_array.content());
1052 
1053  const auto old_char_array_opt =
1054  try_evaluate_constant_string(state, old_char_array.content());
1055 
1056  if(!new_char_array_opt || !old_char_array_opt)
1057  {
1058  return {};
1059  }
1060 
1061  characters_to_find = old_char_array_opt->get().operands();
1062  characters_to_replace = new_char_array_opt->get().operands();
1063  }
1064 
1065  // Copy data, then do initial search for a replace sequence.
1066  array_exprt existing_data = s_data_opt->get();
1067  auto found_pattern = std::search(
1068  existing_data.operands().begin(),
1069  existing_data.operands().end(),
1070  characters_to_find.begin(),
1071  characters_to_find.end());
1072 
1073  // If we've found a match, proceed to perform a replace on all instances.
1074  while(found_pattern != existing_data.operands().end())
1075  {
1076  // Find the difference between our first/last match iterator.
1077  auto match_end = found_pattern + characters_to_find.size();
1078 
1079  // Erase them.
1080  found_pattern = existing_data.operands().erase(found_pattern, match_end);
1081 
1082  // Insert our replacement characters, then move the iterator to the end of
1083  // our new sequence.
1084  found_pattern = existing_data.operands().insert(
1085  found_pattern,
1086  characters_to_replace.begin(),
1087  characters_to_replace.end()) +
1088  characters_to_replace.size();
1089 
1090  // Then search from there for any additional matches.
1091  found_pattern = std::search(
1092  found_pattern,
1093  existing_data.operands().end(),
1094  characters_to_find.begin(),
1095  characters_to_find.end());
1096  }
1097 
1098  const constant_exprt new_char_array_length =
1099  from_integer(existing_data.operands().size(), length_type);
1100 
1101  const array_typet new_char_array_type(char_type, new_char_array_length);
1102  const array_exprt new_char_array(
1103  std::move(existing_data.operands()), new_char_array_type);
1104 
1106  state,
1107  symex_assign,
1108  to_ssa_expr(f_l1.arguments().at(0)),
1109  new_char_array_length,
1110  to_ssa_expr(f_l1.arguments().at(1)),
1111  new_char_array);
1112 
1113  return true;
1114 }
1115 
1117  statet &state,
1118  symex_assignt &symex_assign,
1119  const function_application_exprt &f_l1)
1120 {
1121  const auto &f_type = f_l1.function_type();
1122  const auto &length_type = f_type.domain().at(0);
1123  const auto &char_type = to_pointer_type(f_type.domain().at(1)).subtype();
1124 
1125  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
1126  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
1127 
1128  if(!s_data_opt)
1129  return false;
1130 
1131  auto is_not_whitespace = [](const exprt &expr) {
1132  auto character = numeric_cast_v<unsigned int>(to_constant_expr(expr));
1133  return character > ' ';
1134  };
1135 
1136  // Note the points where a trim would trim too.
1137  auto &operands = s_data_opt->get().operands();
1138  auto end_iter =
1139  std::find_if(operands.rbegin(), operands.rend(), is_not_whitespace);
1140  auto start_iter =
1141  std::find_if(operands.begin(), operands.end(), is_not_whitespace);
1142 
1143  // Then copy in the string with leading/trailing whitespace removed.
1144  // Note: if start_iter == operands.end it means the entire string is
1145  // whitespace, so we'll trim it to be empty anyway.
1146  exprt::operandst new_operands;
1147  if(start_iter != operands.end())
1148  new_operands = exprt::operandst{start_iter, end_iter.base()};
1149 
1150  const constant_exprt new_char_array_length =
1151  from_integer(new_operands.size(), length_type);
1152 
1153  const array_typet new_char_array_type(char_type, new_char_array_length);
1154  const array_exprt new_char_array(
1155  std::move(new_operands), new_char_array_type);
1156 
1158  state,
1159  symex_assign,
1160  to_ssa_expr(f_l1.arguments().at(0)),
1161  new_char_array_length,
1162  to_ssa_expr(f_l1.arguments().at(1)),
1163  new_char_array);
1164 
1165  return true;
1166 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
int16_t s2
Definition: bytecode_info.h:60
int8_t s1
Definition: bytecode_info.h:59
bitvector_typet index_type()
Definition: c_types.cpp:16
bitvector_typet char_type()
Definition: c_types.cpp:114
Operator to return the address of an object.
Definition: pointer_expr.h:200
Array constructor from list of elements.
Definition: std_expr.h:1382
const array_typet & type() const
Definition: std_expr.h:1389
Arrays with given size.
Definition: std_types.h:968
framet & top()
Definition: call_stack.h:17
A constant literal expression.
Definition: std_expr.h:2668
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
Equality.
Definition: std_expr.h:1140
Expression in which some part is missing and can be substituted for another expression.
Definition: expr_skeleton.h:26
Base class for all expressions.
Definition: expr.h:54
std::vector< exprt > operandst
Definition: expr.h:56
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:96
NODISCARD exprt apply(const namespacet &ns, goto_symex_statet &state, exprt expr, bool write) const
Turn an expression expr into a field-sensitive SSA expression.
Application of (mathematical) function.
const mathematical_function_typet & function_type() const
This helper method provides the type of the expression returned by function.
sharing_mapt< irep_idt, exprt > propagation
Definition: goto_state.h:67
Central data structure: state.
NODISCARD renamedt< exprt, level > rename(exprt expr, const namespacet &ns)
Rewrites symbol expressions in exprt, applying a suffix to each symbol reflecting its most recent ver...
NODISCARD exprt l2_rename_rvalues(exprt lvalue, const namespacet &ns)
call_stackt & call_stack()
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc.
field_sensitivityt field_sensitivity
symex_targett::sourcet source
std::vector< threadt > threads
virtual void symex_assume(statet &state, const exprt &cond)
Symbolically execute an ASSUME instruction or simulate such an execution for a synthetic assumption.
Definition: symex_main.cpp:202
bool constant_propagate_delete_char_at(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate deleting a character from a string.
Definition: goto_symex.cpp:628
bool constant_propagate_set_char_at(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate setting the char at the given index.
Definition: goto_symex.cpp:876
bool constant_propagate_assignment_with_side_effects(statet &state, symex_assignt &symex_assign, const exprt &lhs, const exprt &rhs)
Attempt to constant propagate side effects of the assignment (if any)
Definition: goto_symex.cpp:162
bool constant_propagate_delete(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate deleting a substring from a string.
Definition: goto_symex.cpp:701
void constant_propagate_empty_string(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Create an empty string constant.
Definition: goto_symex.cpp:384
symex_target_equationt & target
The equation that this execution is building up.
Definition: goto_symex.h:264
bool constant_propagate_case_change(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1, bool to_upper)
Attempt to constant propagate case changes, both upper and lower.
Definition: goto_symex.cpp:946
virtual void symex_allocate(statet &state, const exprt &lhs, const side_effect_exprt &code)
Symbolically execute an assignment instruction that has an allocate on the right hand side.
bool constant_propagate_integer_to_string(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate converting an integer to a string.
Definition: goto_symex.cpp:549
exprt clean_expr(exprt expr, statet &state, bool write)
Clean up an expression.
bool constant_propagate_trim(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate trim operations.
static optionalt< std::reference_wrapper< const constant_exprt > > try_evaluate_constant(const statet &state, const exprt &expr)
Definition: goto_symex.cpp:365
virtual void symex_va_start(statet &, const exprt &lhs, const side_effect_exprt &)
namespacet ns
Initialized just before symbolic execution begins, to point to both outer_symbol_table and the symbol...
Definition: goto_symex.h:256
static unsigned dynamic_counter
A monotonically increasing index for each created dynamic object.
Definition: goto_symex.h:789
void symex_assign(statet &state, const exprt &lhs, const exprt &rhs)
Symbolically execute an ASSIGN instruction or simulate such an execution for a synthetic assignment.
Definition: goto_symex.cpp:39
const symbolt & get_new_string_data_symbol(statet &state, symex_assignt &symex_assign, const std::string &aux_symbol_name, const ssa_exprt &char_array, const array_exprt &new_char_array)
Installs a new symbol in the symbol table to represent the given character array, and assigns the cha...
Definition: goto_symex.cpp:280
void associate_array_to_pointer(statet &state, symex_assignt &symex_assign, const array_exprt &new_char_array, const address_of_exprt &string_data)
Generate array to pointer association primitive.
Definition: goto_symex.cpp:316
virtual void do_simplify(exprt &expr)
Definition: goto_symex.cpp:33
virtual void symex_cpp_new(statet &state, const exprt &lhs, const side_effect_exprt &code)
Symbolically execute an assignment instruction that has a CPP new or new array or a Java new array on...
bool constant_propagate_set_length(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate setting the length of a string.
Definition: goto_symex.cpp:795
messaget log
The messaget to write log messages to.
Definition: goto_symex.h:276
const symex_configt symex_config
The configuration to use for this symbolic execution.
Definition: goto_symex.h:183
bool constant_propagate_string_substring(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate getting a substring of a string.
Definition: goto_symex.cpp:462
bool constant_propagate_replace(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant proagate character replacement.
bool constant_propagate_string_concat(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate string concatenation.
Definition: goto_symex.cpp:412
void assign_string_constant(statet &state, symex_assignt &symex_assign, const ssa_exprt &length, const constant_exprt &new_length, const ssa_exprt &char_array, const array_exprt &new_char_array)
Assign constant string length and string data given by a char array to given ssa variables.
Definition: goto_symex.cpp:236
optionalt< std::reference_wrapper< const array_exprt > > try_evaluate_constant_string(const statet &state, const exprt &content)
Definition: goto_symex.cpp:344
Array index operator.
Definition: std_expr.h:1243
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:431
const irep_idt & id() const
Definition: irep.h:407
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:51
typet & codomain()
Return the codomain, i.e., the set of values that the function maps to (the "target").
void conditional_output(mstreamt &mstream, const std::function< void(mstreamt &)> &output_generator) const
Generate output to message_stream using output_generator if the configured verbosity is at least as h...
Definition: message.cpp:138
mstreamt & debug() const
Definition: message.h:429
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:140
const exprt & content() const
Definition: string_expr.h:138
An expression containing a side effect.
Definition: std_code.h:1898
const irep_idt & get_statement() const
Definition: std_code.h:1920
Expression providing an SSA-renamed symbol of expressions.
Definition: ssa_expr.h:17
const exprt & get_original_expr() const
Definition: ssa_expr.h:33
const irep_idt & get_identifier() const
Definition: std_expr.h:110
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
Symbol table entry.
Definition: symbol.h:28
bool is_file_local
Definition: symbol.h:66
bool is_static_lifetime
Definition: symbol.h:65
bool is_thread_local
Definition: symbol.h:65
bool is_state_var
Definition: symbol.h:62
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:122
typet type
Type of symbol.
Definition: symbol.h:31
bool is_lvalue
Definition: symbol.h:66
exprt value
Initial value of symbol.
Definition: symbol.h:34
irep_idt mode
Language mode.
Definition: symbol.h:49
Functor for symex assignment.
Definition: symex_assign.h:26
const typet & subtype() const
Definition: type.h:47
Expression skeleton.
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, const namespacet &ns, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with respect to the given namespace ns with the requested name pattern ...
Fresh auxiliary symbol creation.
static std::string get_alnum_string(const array_exprt &char_array)
Maps the given array expression containing constant characters to a string containing only alphanumer...
Definition: goto_symex.cpp:130
Symbolic Execution.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:49
API to expression classes for 'mathematical' expressions.
const function_application_exprt & to_function_application_expr(const exprt &expr)
Cast an exprt to a function_application_exprt.
Mathematical types.
const mathematical_function_typet & to_mathematical_function_type(const typet &type)
Cast a typet to a mathematical_function_typet.
const std::string integer2string(const mp_integer &n, unsigned base)
Definition: mp_arith.cpp:106
BigInt mp_integer
Definition: mp_arith.h:19
nonstd::optional< T > optionalt
Definition: optional.h:35
Pointer Logic.
bool simplify(exprt &expr, const namespacet &ns)
optionalt< std::reference_wrapper< const array_exprt > > try_get_string_data_array(const exprt &content, const namespacet &ns)
Get char sequence from content field of a refined string expression.
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:496
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:504
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:511
#define PRECONDITION(CONDITION)
Definition: invariant.h:464
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:424
const ssa_exprt & to_ssa_expr(const exprt &expr)
Cast a generic exprt to an ssa_exprt.
Definition: ssa_expr.h:145
side_effect_exprt & to_side_effect_expr(exprt &expr)
Definition: std_code.h:1954
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition: std_expr.h:2701
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:190
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:1018
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: std_types.h:1533
const integer_bitvector_typet & to_integer_bitvector_type(const typet &type)
Cast a typet to an integer_bitvector_typet.
Definition: std_types.h:1206
String expressions for the string solver.
refined_string_exprt & to_string_expr(exprt &expr)
Definition: string_expr.h:150
std::string escape_non_alnum(const std::string &to_escape)
Replace non-alphanumeric characters with _xx escapes, where xx are hex digits.
bool hidden_function
Definition: frame.h:34
goto_programt::const_targett pc
Definition: symex_target.h:43
Symbolic Execution of assignments.