cprover
string_builtin_function.cpp
Go to the documentation of this file.
1 
5 
6 #include <algorithm>
7 #include <iterator>
8 
10 
13  const exprt &return_code,
14  const std::vector<exprt> &fun_args,
15  array_poolt &array_pool)
16  : string_builtin_functiont(return_code, array_pool)
17 {
18  PRECONDITION(fun_args.size() > 2);
19  const auto arg1 = expr_checked_cast<struct_exprt>(fun_args[2]);
20  input = array_pool.find(arg1.op1(), arg1.op0());
21  result = array_pool.find(fun_args[1], fun_args[0]);
22 }
23 
25  const array_string_exprt &a,
26  const std::function<exprt(const exprt &)> &get_value)
27 {
28  if(a.id() == ID_if)
29  {
30  const if_exprt &ite = to_if_expr(a);
31  const exprt cond = get_value(ite.cond());
32  if(!cond.is_constant())
33  return {};
34  return cond.is_true()
35  ? eval_string(to_array_string_expr(ite.true_case()), get_value)
36  : eval_string(to_array_string_expr(ite.false_case()), get_value);
37  }
38 
39  const exprt content = get_value(a.content());
40  const auto &array = expr_try_dynamic_cast<array_exprt>(content);
41  if(!array)
42  return {};
43 
44  const auto &ops = array->operands();
45  std::vector<mp_integer> result;
46  const mp_integer unknown('?');
47  const auto &insert = std::back_inserter(result);
49  ops.begin(), ops.end(), insert, [unknown](const exprt &e) { // NOLINT
50  if(const auto i = numeric_cast<mp_integer>(e))
51  return *i;
52  return unknown;
53  });
54  return result;
55 }
56 
57 template <typename Iter>
58 static array_string_exprt
59 make_string(Iter begin, Iter end, const array_typet &array_type)
60 {
61  const typet &char_type = array_type.subtype();
62  array_exprt array_expr({}, array_type);
63  const auto &insert = std::back_inserter(array_expr.operands());
64  std::transform(begin, end, insert, [&](const mp_integer &i) {
65  return from_integer(i, char_type);
66  });
67  return to_array_string_expr(array_expr);
68 }
69 
71 make_string(const std::vector<mp_integer> &array, const array_typet &array_type)
72 {
73  return make_string(array.begin(), array.end(), array_type);
74 }
75 
77  const std::function<exprt(const exprt &)> &get_value) const
78 {
79  auto input_opt = eval_string(input, get_value);
80  if(!input_opt.has_value())
81  return {};
82  const mp_integer char_val = [&] {
83  if(const auto val = numeric_cast<mp_integer>(get_value(character)))
84  return *val;
85  INVARIANT(
86  get_value(character).id() == ID_unknown,
87  "character valuation should only contain constants and unknown");
89  }();
90  input_opt.value().push_back(char_val);
91  const auto length =
92  from_integer(input_opt.value().size(), result.length_type());
93  const array_typet type(result.type().subtype(), length);
94  return make_string(input_opt.value(), type);
95 }
96 
104  string_constraint_generatort &generator) const
105 {
108  constraints.universal.push_back([&] {
109  const symbol_exprt idx =
110  generator.fresh_symbol("QA_index_concat_char", result.length_type());
111  const exprt upper_bound =
113  return string_constraintt(
114  idx, upper_bound, equal_exprt(input[idx], result[idx]));
115  }());
116  constraints.existential.push_back(
118  constraints.existential.push_back(
120  return constraints;
121 }
122 
124 {
126 }
127 
129  const std::function<exprt(const exprt &)> &get_value) const
130 {
131  auto input_opt = eval_string(input, get_value);
132  const auto char_opt = numeric_cast<mp_integer>(get_value(character));
133  const auto position_opt = numeric_cast<mp_integer>(get_value(position));
134  if(!input_opt || !char_opt || !position_opt)
135  return {};
136  if(0 <= *position_opt && *position_opt < input_opt.value().size())
137  input_opt.value()[numeric_cast_v<std::size_t>(*position_opt)] = *char_opt;
138  const auto length =
139  from_integer(input_opt.value().size(), result.length_type());
140  const array_typet type(result.type().subtype(), length);
141  return make_string(input_opt.value(), type);
142 }
143 
154  string_constraint_generatort &generator) const
155 {
159  and_exprt(
164  constraints.universal.push_back([&] {
165  const symbol_exprt q =
166  generator.fresh_symbol("QA_char_set", position.type());
167  const equal_exprt a3_body(result[q], input[q]);
168  return string_constraintt(
169  q,
172  a3_body);
173  }());
174  constraints.universal.push_back([&] {
175  const symbol_exprt q2 =
176  generator.fresh_symbol("QA_char_set2", position.type());
177  const equal_exprt a4_body(result[q2], input[q2]);
178  return string_constraintt(
179  q2,
182  a4_body);
183  }());
184  return constraints;
185 }
186 
188 {
189  const exprt out_of_bounds = or_exprt(
193  position, ID_lt, from_integer(0, input.length_type())));
194  const exprt return_value = if_exprt(
195  out_of_bounds,
198  return and_exprt(
199  equal_exprt(
202  equal_exprt(return_code, return_value));
203 }
204 
205 static bool eval_is_upper_case(const mp_integer &c)
206 {
207  // Characters between 'A' and 'Z' are upper-case
208  // Characters between 0xc0 (latin capital A with grave)
209  // and 0xd6 (latin capital O with diaeresis) are upper-case
210  // Characters between 0xd8 (latin capital O with stroke)
211  // and 0xde (latin capital thorn) are upper-case
212  return ('A' <= c && c <= 'Z') || (0xc0 <= c && c <= 0xd6) ||
213  (0xd8 <= c && c <= 0xde);
214 }
215 
217  const std::function<exprt(const exprt &)> &get_value) const
218 {
219  auto input_opt = eval_string(input, get_value);
220  if(!input_opt)
221  return {};
222  for(mp_integer &c : input_opt.value())
223  {
224  if(eval_is_upper_case(c))
225  c += 0x20;
226  }
227  const auto length =
228  from_integer(input_opt.value().size(), result.length_type());
229  const array_typet type(result.type().subtype(), length);
230  return make_string(input_opt.value(), type);
231 }
232 
235 static exprt is_upper_case(const exprt &character)
236 {
237  const exprt char_A = from_integer('A', character.type());
238  const exprt char_Z = from_integer('Z', character.type());
239  exprt::operandst upper_case;
240  // Characters between 'A' and 'Z' are upper-case
241  upper_case.push_back(and_exprt(
242  binary_relation_exprt(char_A, ID_le, character),
243  binary_relation_exprt(character, ID_le, char_Z)));
244 
245  // Characters between 0xc0 (latin capital A with grave)
246  // and 0xd6 (latin capital O with diaeresis) are upper-case
247  upper_case.push_back(and_exprt(
249  from_integer(0xc0, character.type()), ID_le, character),
251  character, ID_le, from_integer(0xd6, character.type()))));
252 
253  // Characters between 0xd8 (latin capital O with stroke)
254  // and 0xde (latin capital thorn) are upper-case
255  upper_case.push_back(and_exprt(
257  from_integer(0xd8, character.type()), ID_le, character),
259  character, ID_le, from_integer(0xde, character.type()))));
260  return disjunction(upper_case);
261 }
262 
265 static exprt is_lower_case(const exprt &character)
266 {
267  return is_upper_case(
268  minus_exprt(character, from_integer(0x20, character.type())));
269 }
270 
282  string_constraint_generatort &generator) const
283 {
284  // \todo for now, only characters in Basic Latin and Latin-1 supplement
285  // are supported (up to 0x100), we should add others using case mapping
286  // information from the UnicodeData file.
289  constraints.universal.push_back([&] {
290  const symbol_exprt idx =
291  generator.fresh_symbol("QA_lower_case", result.length_type());
292  const exprt conditional_convert = [&] {
293  // The difference between upper-case and lower-case for the basic
294  // latin and latin-1 supplement is 0x20.
295  const typet &char_type = result.type().subtype();
296  const exprt diff = from_integer(0x20, char_type);
297  const exprt converted =
298  equal_exprt(result[idx], plus_exprt(input[idx], diff));
299  const exprt non_converted = equal_exprt(result[idx], input[idx]);
300  return if_exprt(is_upper_case(input[idx]), converted, non_converted);
301  }();
302  return string_constraintt(
303  idx,
305  conditional_convert);
306  }());
307  return constraints;
308 }
309 
311  const std::function<exprt(const exprt &)> &get_value) const
312 {
313  auto input_opt = eval_string(input, get_value);
314  if(!input_opt)
315  return {};
316  for(mp_integer &c : input_opt.value())
317  {
318  if(eval_is_upper_case(c - 0x20))
319  c -= 0x20;
320  }
321  const auto length =
322  from_integer(input_opt.value().size(), result.length_type());
323  const array_typet type(result.type().subtype(), length);
324  return make_string(input_opt.value(), type);
325 }
326 
337  symbol_generatort &fresh_symbol) const
338 {
341  constraints.universal.push_back([&] {
342  const symbol_exprt idx =
343  fresh_symbol("QA_upper_case", result.length_type());
344  const typet &char_type = input.content().type().subtype();
345  const exprt converted =
346  minus_exprt(input[idx], from_integer(0x20, char_type));
347  return string_constraintt(
348  idx,
350  equal_exprt(
351  result[idx],
352  if_exprt(is_lower_case(input[idx]), converted, input[idx])));
353  }());
354  return constraints;
355 }
356 
363  const exprt &return_code,
364  const std::vector<exprt> &fun_args,
365  array_poolt &array_pool)
366  : string_builtin_functiont(return_code, array_pool)
367 {
368  PRECONDITION(fun_args.size() >= 3);
369  result = array_pool.find(fun_args[1], fun_args[0]);
370  arg = fun_args[2];
371 }
372 
374  const std::function<exprt(const exprt &)> &get_value) const
375 {
376  const auto arg_value = numeric_cast<mp_integer>(get_value(arg));
377  if(!arg_value)
378  return {};
379  static std::string digits = "0123456789abcdefghijklmnopqrstuvwxyz";
380  const auto radix_value = numeric_cast<mp_integer>(get_value(radix));
381  if(!radix_value || *radix_value > digits.length())
382  return {};
383 
384  mp_integer current_value = *arg_value;
385  std::vector<mp_integer> right_to_left_characters;
386  if(current_value < 0)
387  current_value = -current_value;
388  if(current_value == 0)
389  right_to_left_characters.emplace_back('0');
390  while(current_value > 0)
391  {
392  const auto digit_value = (current_value % *radix_value).to_ulong();
393  right_to_left_characters.emplace_back(digits.at(digit_value));
394  current_value /= *radix_value;
395  }
396  if(*arg_value < 0)
397  right_to_left_characters.emplace_back('-');
398 
399  const auto length = right_to_left_characters.size();
400  const auto length_expr = from_integer(length, result.length_type());
401  const array_typet type(result.type().subtype(), length_expr);
402  return make_string(
403  right_to_left_characters.rbegin(), right_to_left_characters.rend(), type);
404 }
405 
407  string_constraint_generatort &generator) const
408 {
409  auto pair =
411  pair.second.existential.push_back(equal_exprt(pair.first, return_code));
412  return pair.second;
413 }
414 
416 {
417  const typet &type = result.length_type();
418  const auto radix_opt = numeric_cast<std::size_t>(radix);
419  const auto radix_value = radix_opt.has_value() ? radix_opt.value() : 2;
420  const std::size_t upper_bound =
421  max_printed_string_length(arg.type(), radix_value);
422  const exprt negative_arg =
423  binary_relation_exprt(arg, ID_le, from_integer(0, type));
424  const exprt absolute_arg =
425  if_exprt(negative_arg, unary_minus_exprt(arg), arg);
426 
427  exprt size_expr = from_integer(1, type);
428  exprt min_int_with_current_size = from_integer(1, radix.type());
429  for(std::size_t current_size = 2; current_size <= upper_bound + 1;
430  ++current_size)
431  {
432  min_int_with_current_size = mult_exprt(radix, min_int_with_current_size);
433  const exprt at_least_current_size =
434  binary_relation_exprt(absolute_arg, ID_ge, min_int_with_current_size);
435  size_expr = if_exprt(
436  at_least_current_size, from_integer(current_size, type), size_expr);
437  }
438 
439  const exprt size_expr_with_sign = if_exprt(
440  negative_arg, plus_exprt(size_expr, from_integer(1, type)), size_expr);
441  return equal_exprt(
442  array_pool.get_or_create_length(result), size_expr_with_sign);
443 }
444 
446  const exprt &return_code,
448  array_poolt &array_pool)
449  : string_builtin_functiont(return_code, array_pool), function_application(f)
450 {
451  const std::vector<exprt> &fun_args = f.arguments();
452  std::size_t i = 0;
453  if(fun_args.size() >= 2 && fun_args[1].type().id() == ID_pointer)
454  {
455  string_res = array_pool.find(fun_args[1], fun_args[0]);
456  i = 2;
457  }
458 
459  for(; i < fun_args.size(); ++i)
460  {
461  const auto arg = expr_try_dynamic_cast<struct_exprt>(fun_args[i]);
462  // TODO: use is_refined_string_type ?
463  if(
464  arg && arg->operands().size() == 2 &&
465  arg->op1().type().id() == ID_pointer)
466  {
467  INVARIANT(is_refined_string_type(arg->type()), "should be a string");
468  string_args.push_back(array_pool.find(arg->op1(), arg->op0()));
469  }
470  else
471  args.push_back(fun_args[i]);
472  }
473 }
474 
476  string_constraint_generatort &generator) const
477 {
478  auto pair =
480  pair.second.existential.push_back(equal_exprt(pair.first, return_code));
481  return pair.second;
482 }
static abstract_object_pointert transform(const exprt &expr, const std::vector< abstract_object_pointert > &operands, const abstract_environmentt &environment, const namespacet &ns)
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
bitvector_typet char_type()
Definition: c_types.cpp:114
Boolean AND.
Definition: std_expr.h:1920
Array constructor from list of elements.
Definition: std_expr.h:1467
Correspondance between arrays and pointers string representations.
Definition: array_pool.h:42
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
Definition: array_pool.cpp:26
array_string_exprt find(const exprt &pointer, const exprt &length)
Creates a new array if the pointer is not pointing to an array.
Definition: array_pool.cpp:184
const typet & length_type() const
Definition: string_expr.h:69
exprt & content()
Definition: string_expr.h:74
Arrays with given size.
Definition: std_types.h:763
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:674
Equality.
Definition: std_expr.h:1225
Base class for all expressions.
Definition: expr.h:54
std::vector< exprt > operandst
Definition: expr.h:56
bool is_true() const
Return whether the expression is a constant representing true.
Definition: expr.cpp:33
typet & type()
Return the type of the expression.
Definition: expr.h:82
bool is_constant() const
Return whether the expression is a constant.
Definition: expr.cpp:26
Application of (mathematical) function.
The trinary if-then-else operator.
Definition: std_expr.h:2172
exprt & true_case()
Definition: std_expr.h:2199
exprt & false_case()
Definition: std_expr.h:2209
exprt & cond()
Definition: std_expr.h:2189
Boolean implication.
Definition: std_expr.h:1983
const irep_idt & id() const
Definition: irep.h:407
Binary minus.
Definition: std_expr.h:973
Binary multiplication Associativity is not specified.
Definition: std_expr.h:1019
Boolean OR.
Definition: std_expr.h:2028
The plus expression Associativity is not specified.
Definition: std_expr.h:914
std::vector< array_string_exprt > string_args
optionalt< array_string_exprt > string_res
string_builtin_function_with_no_evalt(const exprt &return_code, const function_application_exprt &f, array_poolt &array_pool)
function_application_exprt function_application
string_constraintst constraints(string_constraint_generatort &generator) const override
Add constraints ensuring that the value of result expression of the builtin function corresponds to t...
Base class for string functions that are built in the solver.
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
string_constraintst constraints(string_constraint_generatort &generator) const override
Set of constraints enforcing that result is the concatenation of input with character.
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
std::pair< exprt, string_constraintst > add_axioms_for_string_of_int_with_radix(const array_string_exprt &res, const exprt &input_int, const exprt &radix, size_t max_size)
Add axioms enforcing that the string corresponds to the result of String.valueOf(II) or String....
std::pair< exprt, string_constraintst > add_axioms_for_function_application(const function_application_exprt &expr)
strings contained in this call are converted to objects of type string_exprt, through adding axioms.
string_creation_builtin_functiont(const exprt &return_code, const std::vector< exprt > &fun_args, array_poolt &array_pool)
Constructor from arguments of a function application.
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
string_constraintst constraints(string_constraint_generatort &generator) const override
Add constraints ensuring that the value of result expression of the builtin function corresponds to t...
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
string_constraintst constraints(string_constraint_generatort &generator) const override
Set of constraints ensuring that result is similar to input where the character at index position is ...
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
string_constraintst constraints(string_constraint_generatort &generator) const override
Set of constraints ensuring result corresponds to input in which uppercase characters have been conve...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
string_constraintst constraints(class symbol_generatort &fresh_symbol) const
Set of constraints ensuring result corresponds to input in which lowercase characters of Basic Latin ...
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
string_transformation_builtin_functiont(exprt return_code, array_string_exprt result, array_string_exprt input, array_poolt &array_pool)
Expression to hold a symbol (variable)
Definition: std_expr.h:80
Generation of fresh symbols of a given type.
Definition: array_pool.h:22
The type of an expression, extends irept.
Definition: type.h:28
const typet & subtype() const
Definition: type.h:47
The unary minus expression.
Definition: std_expr.h:390
nonstd::optional< T > optionalt
Definition: optional.h:35
bool is_refined_string_type(const typet &type)
BigInt mp_integer
Definition: smt_terms.h:12
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:22
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:2237
static exprt is_lower_case(const exprt &character)
Expression which is true for lower_case characters of the Basic Latin and Latin-1 supplement of unico...
static array_string_exprt make_string(Iter begin, Iter end, const array_typet &array_type)
static exprt is_upper_case(const exprt &character)
Expression which is true for uppercase characters of the Basic Latin and Latin-1 supplement of unicod...
static bool eval_is_upper_case(const mp_integer &c)
optionalt< std::vector< mp_integer > > eval_string(const array_string_exprt &a, const std::function< exprt(const exprt &)> &get_value)
Given a function get_value which gives a valuation to expressions, attempt to find the current value ...
#define CHARACTER_FOR_UNKNOWN
Module: String solver Author: Diffblue Ltd.
exprt length_constraint_for_concat_char(const array_string_exprt &res, const array_string_exprt &s1, array_poolt &array_pool)
Add axioms enforcing that the length of res is that of the concatenation of s1 with.
Generates string constraints to link results from string functions with their arguments.
exprt minimum(const exprt &a, const exprt &b)
exprt zero_if_negative(const exprt &expr)
Returns a non-negative version of the argument.
size_t max_printed_string_length(const typet &type, unsigned long ul_radix)
Calculate the string length needed to represent any value of the given type using the given radix.
array_string_exprt & to_array_string_expr(exprt &expr)
Definition: string_expr.h:95
Collection of constraints of different types: existential formulas, universal formulas,...
std::vector< exprt > existential
std::vector< string_constraintt > universal