cprover
java_bytecode_convert_class.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: JAVA Bytecode Language Conversion
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
13 
14 #ifdef DEBUG
15 #include <iostream>
16 #endif
17 
18 #include "java_root_class.h"
19 #include "java_types.h"
21 #include "java_bytecode_language.h"
22 #include "java_utils.h"
23 
24 #include <util/arith_tools.h>
25 #include <util/c_types.h>
26 #include <util/expr_initializer.h>
27 #include <util/namespace.h>
28 #include <util/std_expr.h>
29 #include <util/suffix.h>
30 
32 {
33 public:
35  symbol_tablet &_symbol_table,
36  message_handlert &_message_handler,
37  size_t _max_array_length,
39  java_string_library_preprocesst &_string_preprocess,
40  const std::unordered_set<std::string> &no_load_classes)
41  : messaget(_message_handler),
42  symbol_table(_symbol_table),
43  max_array_length(_max_array_length),
45  string_preprocess(_string_preprocess),
47  {
48  }
49 
65  void operator()(
67  {
68  PRECONDITION(!parse_trees.empty());
69  const java_bytecode_parse_treet &parse_tree = parse_trees.front();
70 
71  // Add array types to the symbol table
73 
74  const bool loading_success =
75  parse_tree.loading_successful &&
76  !no_load_classes.count(id2string(parse_tree.parsed_class.name));
77  if(loading_success)
78  {
79  overlay_classest overlay_classes;
80  for(auto overlay_class_it = std::next(parse_trees.begin());
81  overlay_class_it != parse_trees.end();
82  ++overlay_class_it)
83  {
84  overlay_classes.push_front(std::cref(overlay_class_it->parsed_class));
85  }
86  convert(parse_tree.parsed_class, overlay_classes);
87  }
88 
89  // Add as string type if relevant
90  const irep_idt &class_name = parse_tree.parsed_class.name;
93  else if(!loading_success)
94  // Generate stub if couldn't load from bytecode and wasn't string type
96  class_name,
100  }
101 
106 
107 private:
109  const size_t max_array_length;
112 
113  // conversion
114  typedef std::list<std::reference_wrapper<const classt>> overlay_classest;
115  void convert(const classt &c, const overlay_classest &overlay_classes);
116  void convert(symbolt &class_symbol, const fieldt &f);
117 
118  // see definition below for more info
120 
121  static bool is_overlay_method(const methodt &method);
122 
123  bool check_field_exists(
124  const fieldt &field,
125  const irep_idt &qualified_fieldname,
126  const struct_union_typet::componentst &fields) const;
127 
128  std::unordered_set<std::string> no_load_classes;
129 };
130 
143 {
144  if(signature.has_value())
145  {
146  // skip the (potential) list of generic parameters at the beginning of the
147  // signature
148  const size_t start =
149  signature.value().front() == '<'
150  ? find_closing_delimiter(signature.value(), 0, '<', '>') + 1
151  : 0;
152 
153  // extract the superclass reference
154  const size_t end =
155  find_closing_semi_colon_for_reference_type(signature.value(), start);
156  const std::string superclass_ref =
157  signature.value().substr(start, (end - start) + 1);
158 
159  // if the superclass is generic then the reference is of form
160  // `Lsuperclass-name<generic-types;>;` if it is implicitly generic, then the
161  // reference is of the form
162  // `Lsuperclass-name<Tgeneric-types;>.Innerclass-Name`
163  if(superclass_ref.find('<') != std::string::npos)
164  return superclass_ref;
165  }
166  return {};
167 }
168 
182  const optionalt<std::string> &signature,
183  const std::string &interface_name)
184 {
185  if(signature.has_value())
186  {
187  // skip the (potential) list of generic parameters at the beginning of the
188  // signature
189  size_t start =
190  signature.value().front() == '<'
191  ? find_closing_delimiter(signature.value(), 0, '<', '>') + 1
192  : 0;
193 
194  // skip the superclass reference (if there is at least one interface
195  // reference in the signature, then there is a superclass reference)
196  start =
197  find_closing_semi_colon_for_reference_type(signature.value(), start) + 1;
198 
199  // if the interface name includes package name, convert dots to slashes
200  std::string interface_name_slash_to_dot = interface_name;
201  std::replace(
202  interface_name_slash_to_dot.begin(),
203  interface_name_slash_to_dot.end(),
204  '.',
205  '/');
206 
207  start =
208  signature.value().find("L" + interface_name_slash_to_dot + "<", start);
209  if(start != std::string::npos)
210  {
211  const size_t &end =
212  find_closing_semi_colon_for_reference_type(signature.value(), start);
213  return signature.value().substr(start, (end - start) + 1);
214  }
215  }
216  return {};
217 }
218 
223  const classt &c,
224  const overlay_classest &overlay_classes)
225 {
226  std::string qualified_classname="java::"+id2string(c.name);
227  if(symbol_table.has_symbol(qualified_classname))
228  {
229  debug() << "Skip class " << c.name << " (already loaded)" << eom;
230  return;
231  }
232 
233  java_class_typet class_type;
234  if(c.signature.has_value() && c.signature.value()[0]=='<')
235  {
236  java_generic_class_typet generic_class_type;
237 #ifdef DEBUG
238  std::cout << "INFO: found generic class signature "
239  << c.signature.value()
240  << " in parsed class "
241  << c.name << "\n";
242 #endif
243  try
244  {
245  const std::vector<typet> &generic_types=java_generic_type_from_string(
246  id2string(c.name),
247  c.signature.value());
248  for(const typet &t : generic_types)
249  {
250  generic_class_type.generic_types()
251  .push_back(to_java_generic_parameter(t));
252  }
253  class_type=generic_class_type;
254  }
256  {
257  warning() << "Class: " << c.name
258  << "\n could not parse signature: " << c.signature.value()
259  << "\n " << e.what() << "\n ignoring that the class is generic"
260  << eom;
261  }
262  }
263 
264  class_type.set_tag(c.name);
265  class_type.set(ID_base_name, c.name);
266  class_type.set(ID_abstract, c.is_abstract);
267  class_type.set(ID_is_annotation, c.is_annotation);
268  class_type.set(ID_interface, c.is_interface);
269  class_type.set(ID_synthetic, c.is_synthetic);
270  class_type.set_final(c.is_final);
271  if(c.is_enum)
272  {
274  {
275  warning() << "Java Enum " << c.name << " won't work properly because max "
276  << "array length (" << max_array_length << ") is less than the "
277  << "enum size (" << c.enum_elements << ")" << eom;
278  }
279  class_type.set(
280  ID_java_enum_static_unwind,
282  class_type.set(ID_enumeration, true);
283  }
284 
285  if(c.is_public)
286  class_type.set_access(ID_public);
287  else if(c.is_protected)
288  class_type.set_access(ID_protected);
289  else if(c.is_private)
290  class_type.set_access(ID_private);
291  else
292  class_type.set_access(ID_default);
293 
294  if(!c.extends.empty())
295  {
296  const symbol_typet base("java::" + id2string(c.extends));
297 
298  // if the superclass is generic then the class has the superclass reference
299  // including the generic info in its signature
300  // e.g., signature for class 'A<T>' that extends
301  // 'Generic<Integer>' is '<T:Ljava/lang/Object;>LGeneric<LInteger;>;'
302  const optionalt<std::string> &superclass_ref =
304  if(superclass_ref.has_value())
305  {
306  try
307  {
308  const java_generic_symbol_typet generic_base(
309  base, superclass_ref.value(), qualified_classname);
310  class_type.add_base(generic_base);
311  }
313  {
314  warning() << "Superclass: " << c.extends << " of class: " << c.name
315  << "\n could not parse signature: " << superclass_ref.value()
316  << "\n " << e.what()
317  << "\n ignoring that the superclass is generic" << eom;
318  class_type.add_base(base);
319  }
320  }
321  else
322  {
323  class_type.add_base(base);
324  }
325  class_typet::componentt base_class_field;
326  base_class_field.type() = class_type.bases().at(0).type();
327  base_class_field.set_name("@"+id2string(c.extends));
328  base_class_field.set_base_name("@"+id2string(c.extends));
329  base_class_field.set_pretty_name("@"+id2string(c.extends));
330  class_type.components().push_back(base_class_field);
331  }
332 
333  // interfaces are recorded as bases
334  for(const auto &interface : c.implements)
335  {
336  const symbol_typet base("java::" + id2string(interface));
337 
338  // if the interface is generic then the class has the interface reference
339  // including the generic info in its signature
340  // e.g., signature for class 'A implements GenericInterface<Integer>' is
341  // 'Ljava/lang/Object;LGenericInterface<LInteger;>;'
342  const optionalt<std::string> interface_ref =
344  if(interface_ref.has_value())
345  {
346  try
347  {
348  const java_generic_symbol_typet generic_base(
349  base, interface_ref.value(), qualified_classname);
350  class_type.add_base(generic_base);
351  }
353  {
354  warning() << "Interface: " << interface << " of class: " << c.name
355  << "\n could not parse signature: " << interface_ref.value()
356  << "\n " << e.what()
357  << "\n ignoring that the interface is generic" << eom;
358  class_type.add_base(base);
359  }
360  }
361  else
362  {
363  class_type.add_base(base);
364  }
365  }
366 
367  // now do lambda method handles (bootstrap methods)
368  for(const auto &lambda_entry : c.lambda_method_handle_map)
369  {
370  // if the handle is of unknown type, we still need to store it to preserve
371  // the correct indexing (invokedynamic instructions will retrieve
372  // method handles by index)
373  lambda_entry.second.is_unknown_handle()
374  ? class_type.add_unknown_lambda_method_handle()
375  : class_type.add_lambda_method_handle(
376  "java::" + id2string(lambda_entry.second.lambda_method_ref));
377  }
378 
379  // Load annotations
380  if(!c.annotations.empty())
381  convert_annotations(c.annotations, class_type.get_annotations());
382 
383  // the base name is the name of the class without the package
384  const irep_idt base_name = [](const std::string &full_name) {
385  const size_t last_dot = full_name.find_last_of('.');
386  return last_dot == std::string::npos
387  ? full_name
388  : std::string(full_name, last_dot + 1, std::string::npos);
389  }(id2string(c.name));
390 
391  // produce class symbol
392  symbolt new_symbol;
393  new_symbol.base_name = base_name;
394  new_symbol.pretty_name=c.name;
395  new_symbol.name=qualified_classname;
396  class_type.set(ID_name, new_symbol.name);
397  new_symbol.type=class_type;
398  new_symbol.mode=ID_java;
399  new_symbol.is_type=true;
400 
401  symbolt *class_symbol;
402 
403  // add before we do members
404  debug() << "Adding symbol: class '" << c.name << "'" << eom;
405  if(symbol_table.move(new_symbol, class_symbol))
406  {
407  error() << "failed to add class symbol " << new_symbol.name << eom;
408  throw 0;
409  }
410 
411  // Now do fields
412  const class_typet::componentst &fields =
413  to_class_type(class_symbol->type).components();
414  // Include fields from overlay classes as they will be required by the
415  // methods defined there
416  for(auto overlay_class : overlay_classes)
417  {
418  for(const auto &field : overlay_class.get().fields)
419  {
420  std::string field_id = qualified_classname + "." + id2string(field.name);
421  if(check_field_exists(field, field_id, fields))
422  {
423  std::string err =
424  "Duplicate field definition for " + field_id + " in overlay class";
425  // TODO: This could just be a warning if the types match
426  error() << err << eom;
427  throw err.c_str();
428  }
429  debug()
430  << "Adding symbol from overlay class: field '" << field.name << "'"
431  << eom;
432  convert(*class_symbol, field);
433  POSTCONDITION(check_field_exists(field, field_id, fields));
434  }
435  }
436  for(const auto &field : c.fields)
437  {
438  std::string field_id = qualified_classname + "." + id2string(field.name);
439  if(check_field_exists(field, field_id, fields))
440  {
441  // TODO: This could be a warning if the types match
442  error()
443  << "Field definition for " << field_id
444  << " already loaded from overlay class" << eom;
445  continue;
446  }
447  debug() << "Adding symbol: field '" << field.name << "'" << eom;
448  convert(*class_symbol, field);
449  POSTCONDITION(check_field_exists(field, field_id, fields));
450  }
451 
452  // Now do methods
453  std::set<irep_idt> overlay_methods;
454  for(auto overlay_class : overlay_classes)
455  {
456  for(const methodt &method : overlay_class.get().methods)
457  {
458  const irep_idt method_identifier =
459  qualified_classname + "." + id2string(method.name)
460  + ":" + method.descriptor;
461  if(method_bytecode.contains_method(method_identifier))
462  {
463  // This method has already been discovered and added to method_bytecode
464  // while parsing an overlay class that appears later in the classpath
465  // (we're working backwards)
466  // Warn the user if the definition already found was not an overlay,
467  // otherwise simply don't load this earlier definition
468  if(overlay_methods.count(method_identifier) == 0)
469  {
470  // This method was defined in a previous class definition without
471  // being marked as an overlay method
472  warning()
473  << "Method " << method_identifier
474  << " exists in an overlay class without being marked as an "
475  "overlay and also exists in another overlay class that appears "
476  "earlier in the classpath"
477  << eom;
478  }
479  continue;
480  }
481  // Always run the lazy pre-stage, as it symbol-table
482  // registers the function.
483  debug()
484  << "Adding symbol from overlay class: method '" << method_identifier
485  << "'" << eom;
486  java_bytecode_convert_method_lazy(
487  *class_symbol,
488  method_identifier,
489  method,
490  symbol_table,
491  get_message_handler());
492  method_bytecode.add(qualified_classname, method_identifier, method);
493  if(is_overlay_method(method))
494  overlay_methods.insert(method_identifier);
495  }
496  }
497  for(const methodt &method : c.methods)
498  {
499  const irep_idt method_identifier=
500  qualified_classname + "." + id2string(method.name)
501  + ":" + method.descriptor;
502  if(method_bytecode.contains_method(method_identifier))
503  {
504  // This method has already been discovered while parsing an overlay
505  // class
506  // If that definition is an overlay then we simply don't load this
507  // original definition and we remove it from the list of overlays
508  if(overlay_methods.erase(method_identifier) == 0)
509  {
510  // This method was defined in a previous class definition without
511  // being marked as an overlay method
512  warning()
513  << "Method " << method_identifier
514  << " exists in an overlay class without being marked as an overlay "
515  "and also exists in the underlying class"
516  << eom;
517  }
518  continue;
519  }
520  // Always run the lazy pre-stage, as it symbol-table
521  // registers the function.
522  debug() << "Adding symbol: method '" << method_identifier << "'" << eom;
523  java_bytecode_convert_method_lazy(
524  *class_symbol,
525  method_identifier,
526  method,
527  symbol_table,
528  get_message_handler());
529  method_bytecode.add(qualified_classname, method_identifier, method);
530  if(is_overlay_method(method))
531  {
532  warning()
533  << "Method " << method_identifier
534  << " marked as an overlay where defined in the underlying class" << eom;
535  }
536  }
537  if(!overlay_methods.empty())
538  {
539  error()
540  << "Overlay methods defined in overlay classes did not exist in the "
541  "underlying class:\n";
542  for(const irep_idt &method_id : overlay_methods)
543  error() << " " << method_id << "\n";
544  error() << eom;
545  }
546 
547  // is this a root class?
548  if(c.extends.empty())
549  java_root_class(*class_symbol);
550 }
551 
552 bool java_bytecode_convert_classt::check_field_exists(
553  const java_bytecode_parse_treet::fieldt &field,
554  const irep_idt &qualified_fieldname,
555  const struct_union_typet::componentst &fields) const
556 {
557  if(field.is_static)
558  return symbol_table.has_symbol(qualified_fieldname);
559 
560  auto existing_field = std::find_if(
561  fields.begin(),
562  fields.end(),
563  [&field](const struct_union_typet::componentt &f)
564  {
565  return f.get_name() == field.name;
566  });
567  return existing_field != fields.end();
568 }
569 
573 void java_bytecode_convert_classt::convert(
574  symbolt &class_symbol,
575  const fieldt &f)
576 {
577  typet field_type;
578  if(f.signature.has_value())
579  {
580  field_type=java_type_from_string_with_exception(
581  f.descriptor,
582  f.signature,
583  id2string(class_symbol.name));
584 
586  if(is_java_generic_parameter(field_type))
587  {
588 #ifdef DEBUG
589  std::cout << "fieldtype: generic "
590  << to_java_generic_parameter(field_type).type_variable()
591  .get_identifier()
592  << " name " << f.name << "\n";
593 #endif
594  }
595 
598  else if(is_java_generic_type(field_type))
599  {
600  java_generic_typet &with_gen_type=
601  to_java_generic_type(field_type);
602 #ifdef DEBUG
603  std::cout << "fieldtype: generic container type "
604  << std::to_string(with_gen_type.generic_type_arguments().size())
605  << " type " << with_gen_type.id()
606  << " name " << f.name
607  << " subtype id " << with_gen_type.subtype().id() << "\n";
608 #endif
609  field_type=with_gen_type;
610  }
611  }
612  else
613  field_type=java_type_from_string(f.descriptor);
614 
615  // is this a static field?
616  if(f.is_static)
617  {
618  // Create the symbol; we won't add to the struct type.
619  symbolt new_symbol;
620 
621  new_symbol.is_static_lifetime=true;
622  new_symbol.is_lvalue=true;
623  new_symbol.is_state_var=true;
624  new_symbol.name=id2string(class_symbol.name)+"."+id2string(f.name);
625  new_symbol.base_name=f.name;
626  new_symbol.type=field_type;
627  // Annotating the type with ID_C_class to provide a static field -> class
628  // link matches the method used by java_bytecode_convert_method::convert
629  // for methods.
630  new_symbol.type.set(ID_C_class, class_symbol.name);
631  new_symbol.pretty_name=id2string(class_symbol.pretty_name)+
632  "."+id2string(f.name);
633  new_symbol.mode=ID_java;
634  new_symbol.is_type=false;
635 
636  // These annotations use `ID_C_access` instead of `ID_access` like methods
637  // to avoid type clashes in expressions like `some_static_field = 0`, where
638  // with ID_access the constant '0' would need to have an access modifier
639  // too, or else appear to have incompatible type.
640  if(f.is_public)
641  new_symbol.type.set(ID_C_access, ID_public);
642  else if(f.is_protected)
643  new_symbol.type.set(ID_C_access, ID_protected);
644  else if(f.is_private)
645  new_symbol.type.set(ID_C_access, ID_private);
646  else
647  new_symbol.type.set(ID_C_access, ID_default);
648 
649  const namespacet ns(symbol_table);
650  new_symbol.value=
651  zero_initializer(
652  field_type,
653  class_symbol.location,
654  ns,
655  get_message_handler());
656 
657  // Load annotations
658  if(!f.annotations.empty())
659  {
660  convert_annotations(
661  f.annotations,
662  type_checked_cast<annotated_typet>(new_symbol.type).get_annotations());
663  }
664 
665  // Do we have the static field symbol already?
666  const auto s_it=symbol_table.symbols.find(new_symbol.name);
667  if(s_it!=symbol_table.symbols.end())
668  symbol_table.erase(s_it); // erase, we stubbed it
669 
670  if(symbol_table.add(new_symbol))
671  assert(false && "failed to add static field symbol");
672  }
673  else
674  {
675  class_typet &class_type=to_class_type(class_symbol.type);
676 
677  class_type.components().emplace_back();
678  class_typet::componentt &component=class_type.components().back();
679 
680  component.set_name(f.name);
681  component.set_base_name(f.name);
682  component.set_pretty_name(f.name);
683  component.type()=field_type;
684 
685  if(f.is_private)
686  component.set_access(ID_private);
687  else if(f.is_protected)
688  component.set_access(ID_protected);
689  else if(f.is_public)
690  component.set_access(ID_public);
691  else
692  component.set_access(ID_default);
693 
694  // Load annotations
695  if(!f.annotations.empty())
696  {
697  convert_annotations(
698  f.annotations,
699  static_cast<annotated_typet &>(component.type()).get_annotations());
700  }
701  }
702 }
703 
706 void java_bytecode_convert_classt::add_array_types(symbol_tablet &symbol_table)
707 {
708  const std::string letters="ijsbcfdza";
709 
710  for(const char l : letters)
711  {
712  symbol_typet symbol_type=
713  to_symbol_type(java_array_type(l).subtype());
714 
715  const irep_idt &symbol_type_identifier=symbol_type.get_identifier();
716  if(symbol_table.has_symbol(symbol_type_identifier))
717  return;
718 
719  class_typet class_type;
720  // we have the base class, java.lang.Object, length and data
721  // of appropriate type
722  class_type.set_tag(symbol_type_identifier);
723  // Note that non-array types don't have "java::" at the beginning of their
724  // tag, and their name is "java::" + their tag. Since arrays do have
725  // "java::" at the beginning of their tag we set the name to be the same as
726  // the tag.
727  class_type.set(ID_name, symbol_type_identifier);
728 
729  class_type.components().reserve(3);
730  class_typet::componentt base_class_component(
731  "@java.lang.Object", symbol_typet("java::java.lang.Object"));
732  base_class_component.set_pretty_name("@java.lang.Object");
733  base_class_component.set_base_name("@java.lang.Object");
734  class_type.components().push_back(base_class_component);
735 
736  class_typet::componentt length_component("length", java_int_type());
737  length_component.set_pretty_name("length");
738  length_component.set_base_name("length");
739  class_type.components().push_back(length_component);
740 
741  class_typet::componentt data_component(
742  "data", java_reference_type(java_type_from_char(l)));
743  data_component.set_pretty_name("data");
744  data_component.set_base_name("data");
745  class_type.components().push_back(data_component);
746 
747  class_type.add_base(symbol_typet("java::java.lang.Object"));
748 
749  INVARIANT(
750  is_valid_java_array(class_type),
751  "Constructed a new type representing a Java Array "
752  "object that doesn't match expectations");
753 
754  symbolt symbol;
755  symbol.name=symbol_type_identifier;
756  symbol.base_name=symbol_type.get(ID_C_base_name);
757  symbol.is_type=true;
758  symbol.type = class_type;
759  symbol.mode = ID_java;
760  symbol_table.add(symbol);
761 
762  // Also provide a clone method:
763  // ----------------------------
764 
765  const irep_idt clone_name=
766  id2string(symbol_type_identifier)+".clone:()Ljava/lang/Object;";
767  code_typet::parametert this_param;
768  this_param.set_identifier(id2string(clone_name)+"::this");
769  this_param.set_base_name("this");
770  this_param.set_this();
771  this_param.type()=java_reference_type(symbol_type);
772  const code_typet clone_type({this_param}, java_lang_object_type());
773 
774  parameter_symbolt this_symbol;
775  this_symbol.name=this_param.get_identifier();
776  this_symbol.base_name=this_param.get_base_name();
777  this_symbol.pretty_name=this_symbol.base_name;
778  this_symbol.type=this_param.type();
779  this_symbol.mode=ID_java;
780  symbol_table.add(this_symbol);
781 
782  const irep_idt local_name=
783  id2string(clone_name)+"::cloned_array";
784  auxiliary_symbolt local_symbol;
785  local_symbol.name=local_name;
786  local_symbol.base_name="cloned_array";
787  local_symbol.pretty_name=local_symbol.base_name;
788  local_symbol.type=java_reference_type(symbol_type);
789  local_symbol.mode=ID_java;
790  symbol_table.add(local_symbol);
791  const auto &local_symexpr=local_symbol.symbol_expr();
792 
793  code_blockt clone_body;
794 
795  code_declt declare_cloned(local_symexpr);
796  clone_body.move_to_operands(declare_cloned);
797 
798  side_effect_exprt java_new_array(
799  ID_java_new_array,
800  java_reference_type(symbol_type));
801  dereference_exprt old_array(this_symbol.symbol_expr(), symbol_type);
802  dereference_exprt new_array(local_symexpr, symbol_type);
803  member_exprt old_length(
804  old_array, length_component.get_name(), length_component.type());
805  java_new_array.copy_to_operands(old_length);
806  code_assignt create_blank(local_symexpr, java_new_array);
807  clone_body.move_to_operands(create_blank);
808 
809  member_exprt old_data(
810  old_array, data_component.get_name(), data_component.type());
811  member_exprt new_data(
812  new_array, data_component.get_name(), data_component.type());
813 
814  /*
815  // TODO use this instead of a loop.
816  codet array_copy;
817  array_copy.set_statement(ID_array_copy);
818  array_copy.move_to_operands(new_data);
819  array_copy.move_to_operands(old_data);
820  clone_body.move_to_operands(array_copy);
821  */
822 
823  // Begin for-loop to replace:
824 
825  const irep_idt index_name=
826  id2string(clone_name)+"::index";
827  auxiliary_symbolt index_symbol;
828  index_symbol.name=index_name;
829  index_symbol.base_name="index";
830  index_symbol.pretty_name=index_symbol.base_name;
831  index_symbol.type = length_component.type();
832  index_symbol.mode=ID_java;
833  symbol_table.add(index_symbol);
834  const auto &index_symexpr=index_symbol.symbol_expr();
835 
836  code_declt declare_index(index_symexpr);
837  clone_body.move_to_operands(declare_index);
838 
839  code_fort copy_loop;
840 
841  copy_loop.init()=
842  code_assignt(index_symexpr, from_integer(0, index_symexpr.type()));
843  copy_loop.cond()=
844  binary_relation_exprt(index_symexpr, ID_lt, old_length);
845 
846  side_effect_exprt inc(ID_assign);
847  inc.operands().resize(2);
848  inc.op0()=index_symexpr;
849  inc.op1()=plus_exprt(index_symexpr, from_integer(1, index_symexpr.type()));
850  copy_loop.iter()=inc;
851 
852  dereference_exprt old_cell(
853  plus_exprt(old_data, index_symexpr), old_data.type().subtype());
854  dereference_exprt new_cell(
855  plus_exprt(new_data, index_symexpr), new_data.type().subtype());
856  code_assignt copy_cell(new_cell, old_cell);
857  copy_loop.body()=copy_cell;
858 
859  // End for-loop
860  clone_body.move_to_operands(copy_loop);
861 
862  member_exprt new_base_class(
863  new_array, base_class_component.get_name(), base_class_component.type());
864  address_of_exprt retval(new_base_class);
865  code_returnt return_inst(retval);
866  clone_body.move_to_operands(return_inst);
867 
868  symbolt clone_symbol;
869  clone_symbol.name=clone_name;
870  clone_symbol.pretty_name=
871  id2string(symbol_type_identifier)+".clone:()";
872  clone_symbol.base_name="clone";
873  clone_symbol.type=clone_type;
874  clone_symbol.value=clone_body;
875  clone_symbol.mode=ID_java;
876  symbol_table.add(clone_symbol);
877  }
878 }
879 
880 bool java_bytecode_convert_classt::is_overlay_method(const methodt &method)
881 {
882  return java_bytecode_parse_treet::find_annotation(
883  method.annotations, ID_overlay_method)
884  .has_value();
885 }
886 
887 bool java_bytecode_convert_class(
888  const java_class_loadert::parse_tree_with_overlayst &parse_trees,
889  symbol_tablet &symbol_table,
890  message_handlert &message_handler,
891  size_t max_array_length,
892  method_bytecodet &method_bytecode,
893  java_string_library_preprocesst &string_preprocess,
894  const std::unordered_set<std::string> &no_load_classes)
895 {
896  java_bytecode_convert_classt java_bytecode_convert_class(
897  symbol_table,
898  message_handler,
899  max_array_length,
900  method_bytecode,
901  string_preprocess,
902  no_load_classes);
903 
904  try
905  {
906  java_bytecode_convert_class(parse_trees);
907  return false;
908  }
909 
910  catch(int)
911  {
912  }
913 
914  catch(const char *e)
915  {
916  java_bytecode_convert_class.error() << e << messaget::eom;
917  }
918 
919  catch(const std::string &e)
920  {
921  java_bytecode_convert_class.error() << e << messaget::eom;
922  }
923 
924  return true;
925 }
926 
939 static void find_and_replace_parameter(
940  java_generic_parametert &parameter,
941  const std::vector<java_generic_parametert> &replacement_parameters)
942 {
943  // get the name of the parameter, e.g., `T` from `java::Class::T`
944  const std::string &parameter_full_name =
945  id2string(parameter.type_variable_ref().get_identifier());
946  const std::string &parameter_name =
947  parameter_full_name.substr(parameter_full_name.rfind("::") + 2);
948 
949  // check if there is a replacement parameter with the same name
950  const auto replacement_parameter_p = std::find_if(
951  replacement_parameters.begin(),
952  replacement_parameters.end(),
953  [&parameter_name](const java_generic_parametert &replacement_param)
954  {
955  const std::string &replacement_parameter_full_name =
956  id2string(replacement_param.type_variable().get_identifier());
957  return parameter_name.compare(
958  replacement_parameter_full_name.substr(
959  replacement_parameter_full_name.rfind("::") + 2)) == 0;
960  });
961 
962  // if a replacement parameter was found, update the identifier
963  if(replacement_parameter_p != replacement_parameters.end())
964  {
965  const std::string &replacement_parameter_full_name =
966  id2string(replacement_parameter_p->type_variable().get_identifier());
967 
968  // the replacement parameter is a viable one, i.e., it comes from an outer
969  // class
970  PRECONDITION(
971  parameter_full_name.substr(0, parameter_full_name.rfind("::"))
972  .compare(
973  replacement_parameter_full_name.substr(
974  0, replacement_parameter_full_name.rfind("::"))) > 0);
975 
976  parameter.type_variable_ref().set_identifier(
977  replacement_parameter_full_name);
978  }
979 }
980 
988 static void find_and_replace_parameters(
989  typet &type,
990  const std::vector<java_generic_parametert> &replacement_parameters)
991 {
992  if(is_java_generic_parameter(type))
993  {
994  find_and_replace_parameter(
995  to_java_generic_parameter(type), replacement_parameters);
996  }
997  else if(is_java_generic_type(type))
998  {
999  java_generic_typet &generic_type = to_java_generic_type(type);
1000  std::vector<reference_typet> &arguments =
1001  generic_type.generic_type_arguments();
1002  for(auto &argument : arguments)
1003  {
1004  find_and_replace_parameters(argument, replacement_parameters);
1005  }
1006  }
1007  else if(is_java_generic_symbol_type(type))
1008  {
1009  java_generic_symbol_typet &generic_base = to_java_generic_symbol_type(type);
1010  std::vector<reference_typet> &gen_types = generic_base.generic_types();
1011  for(auto &gen_type : gen_types)
1012  {
1013  find_and_replace_parameters(gen_type, replacement_parameters);
1014  }
1015  }
1016 }
1017 
1021 void convert_annotations(
1022  const java_bytecode_parse_treet::annotationst &parsed_annotations,
1023  std::vector<java_annotationt> &java_annotations)
1024 {
1025  for(const auto &annotation : parsed_annotations)
1026  {
1027  java_annotations.emplace_back(annotation.type);
1028  std::vector<java_annotationt::valuet> &values =
1029  java_annotations.back().get_values();
1030  std::transform(
1031  annotation.element_value_pairs.begin(),
1032  annotation.element_value_pairs.end(),
1033  std::back_inserter(values),
1034  [](const decltype(annotation.element_value_pairs)::value_type &value) {
1035  return java_annotationt::valuet(value.element_name, value.value);
1036  });
1037  }
1038 }
1039 
1044 void convert_java_annotations(
1045  const std::vector<java_annotationt> &java_annotations,
1046  java_bytecode_parse_treet::annotationst &annotations)
1047 {
1048  for(const auto &java_annotation : java_annotations)
1049  {
1050  annotations.emplace_back(java_bytecode_parse_treet::annotationt());
1051  auto &annotation = annotations.back();
1052  annotation.type = java_annotation.get_type();
1053 
1054  std::transform(
1055  java_annotation.get_values().begin(),
1056  java_annotation.get_values().end(),
1057  std::back_inserter(annotation.element_value_pairs),
1058  [](const java_annotationt::valuet &value)
1059  -> java_bytecode_parse_treet::annotationt::element_value_pairt {
1060  return {value.get_name(), value.get_value()};
1061  });
1062  }
1063 }
1064 
1071 void mark_java_implicitly_generic_class_type(
1072  const irep_idt &class_name,
1073  symbol_tablet &symbol_table)
1074 {
1075  const std::string qualified_class_name = "java::" + id2string(class_name);
1076  PRECONDITION(symbol_table.has_symbol(qualified_class_name));
1077  symbolt &class_symbol = symbol_table.get_writeable_ref(qualified_class_name);
1078  java_class_typet &class_type = to_java_class_type(class_symbol.type);
1079 
1080  // the class must be an inner non-static class, i.e., have a field this$*
1081  // TODO this should be simplified once static inner classes are marked
1082  // during parsing
1083  bool no_this_field = std::none_of(
1084  class_type.components().begin(),
1085  class_type.components().end(),
1086  [](const struct_union_typet::componentt &component)
1087  {
1088  return id2string(component.get_name()).substr(0, 5) == "this$";
1089  });
1090  if(no_this_field)
1091  {
1092  return;
1093  }
1094 
1095  // create a vector of all generic type parameters of all outer classes, in
1096  // the order from the outer-most inwards
1097  std::vector<java_generic_parametert> implicit_generic_type_parameters;
1098  std::string::size_type outer_class_delimiter =
1099  qualified_class_name.rfind("$");
1100  while(outer_class_delimiter != std::string::npos)
1101  {
1102  std::string outer_class_name =
1103  qualified_class_name.substr(0, outer_class_delimiter);
1104  if(symbol_table.has_symbol(outer_class_name))
1105  {
1106  const symbolt &outer_class_symbol =
1107  symbol_table.lookup_ref(outer_class_name);
1108  const java_class_typet &outer_class_type =
1109  to_java_class_type(outer_class_symbol.type);
1110  if(is_java_generic_class_type(outer_class_type))
1111  {
1112  const auto &outer_generic_type_parameters =
1113  to_java_generic_class_type(outer_class_type).generic_types();
1114  implicit_generic_type_parameters.insert(
1115  implicit_generic_type_parameters.begin(),
1116  outer_generic_type_parameters.begin(),
1117  outer_generic_type_parameters.end());
1118  }
1119  outer_class_delimiter = outer_class_name.rfind("$");
1120  }
1121  else
1122  {
1123  throw missing_outer_class_symbol_exceptiont(
1124  outer_class_name, qualified_class_name);
1125  }
1126  }
1127 
1128  // if there are any implicit generic type parameters, mark the class
1129  // implicitly generic and update identifiers of type parameters used in fields
1130  if(!implicit_generic_type_parameters.empty())
1131  {
1132  class_symbol.type = java_implicitly_generic_class_typet(
1133  class_type, implicit_generic_type_parameters);
1134 
1135  for(auto &field : class_type.components())
1136  {
1137  find_and_replace_parameters(
1138  field.type(), implicit_generic_type_parameters);
1139  }
1140 
1141  for(auto &base : class_type.bases())
1142  {
1143  find_and_replace_parameters(
1144  base.type(), implicit_generic_type_parameters);
1145  }
1146  }
1147 }
The type of an expression.
Definition: type.h:22
java_bytecode_parse_treet::methodt methodt
const std::string & id2string(const irep_idt &d)
Definition: irep.h:43
java_bytecode_parse_treet::fieldt fieldt
java_bytecode_parse_treet::classt classt
void set_base_name(const irep_idt &base_name)
Definition: std_types.h:197
void set_name(const irep_idt &name)
Definition: std_types.h:187
std::vector< componentt > componentst
Definition: std_types.h:243
java_string_library_preprocesst & string_preprocess
const java_generic_parametert & to_java_generic_parameter(const typet &type)
Definition: java_types.h:288
const componentst & components() const
Definition: std_types.h:245
void set_final(bool is_final)
Definition: java_types.h:119
static optionalt< std::string > extract_generic_superclass_reference(const optionalt< std::string > &signature)
Auxiliary function to extract the generic superclass reference from the class signature.
typet & type()
Definition: expr.h:56
An exception that is raised for unsupported class signature.
Definition: java_types.h:528
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:30
void generate_class_stub(const irep_idt &class_name, symbol_table_baset &symbol_table, message_handlert &message_handler, const struct_union_typet::componentst &componentst)
Definition: java_utils.cpp:66
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
void set_access(const irep_idt &access)
Definition: java_types.h:109
std::unordered_set< std::string > no_load_classes
std::list< java_bytecode_parse_treet > parse_tree_with_overlayst
mstreamt & warning() const
Definition: message.h:307
size_t find_closing_delimiter(const std::string &src, size_t open_pos, char open_char, char close_char)
Finds the corresponding closing delimiter to the given opening delimiter.
Definition: java_utils.cpp:200
Expression Initialization.
Class to hold a class with generics, extends the java class type with a vector of java generic type p...
Definition: java_types.h:375
A reference into the symbol table.
Definition: std_types.h:110
bool is_known_string_type(irep_idt class_name)
Check whether a class name is known as a string type.
JAVA Bytecode Language Conversion.
nonstd::optional< T > optionalt
Definition: optional.h:35
API to expression classes.
The symbol table.
Definition: symbol_table.h:19
#define PRECONDITION(CONDITION)
Definition: invariant.h:230
void set_tag(const irep_idt &tag)
Definition: std_types.h:267
size_t find_closing_semi_colon_for_reference_type(const std::string src, size_t starting_point)
Finds the closing semi-colon ending a ClassTypeSignature that starts at starting_point.
Definition: java_types.cpp:390
java_bytecode_convert_classt(symbol_tablet &_symbol_table, message_handlert &_message_handler, size_t _max_array_length, method_bytecodet &method_bytecode, java_string_library_preprocesst &_string_preprocess, const std::unordered_set< std::string > &no_load_classes)
Type for a generic symbol, extends symbol_typet with a vector of java generic types.
Definition: java_types.h:587
bool check_field_exists(const fieldt &field, const irep_idt &qualified_fieldname, const struct_union_typet::componentst &fields) const
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
static optionalt< std::string > extract_generic_interface_reference(const optionalt< std::string > &signature, const std::string &interface_name)
Auxiliary function to extract the generic interface reference of an interface with the specified name...
message_handlert & get_message_handler()
Definition: message.h:153
java_bytecode_parse_treet::annotationt annotationt
void set_pretty_name(const irep_idt &name)
Definition: std_types.h:217
std::string to_string(const string_constraintt &expr)
Used for debug printing.
static void add_array_types(symbol_tablet &symbol_table)
Register in the symbol_table new symbols for the objects java::array[X] where X is byte...
void convert(const classt &c, const overlay_classest &overlay_classes)
Convert a class, adding symbols to the symbol table for its members.
const basest & bases() const
Definition: std_types.h:386
void add_string_type(const irep_idt &class_name, symbol_tablet &symbol_table)
Add to the symbol table type declaration for a String-like Java class.
std::vector< typet > java_generic_type_from_string(const std::string &class_name, const std::string &src)
Converts the content of a generic class type into a vector of Java types, that is each type variable ...
Definition: java_types.cpp:610
mstreamt & debug() const
Definition: message.h:332
JAVA Bytecode Language Conversion.
void operator()(const java_class_loadert::parse_tree_with_overlayst &parse_trees)
Converts all the class parse trees into a class symbol and adds it to the symbol table.
bool empty() const
Definition: dstring.h:61
const generic_typest & generic_types() const
Definition: java_types.h:385
void add_base(const typet &base)
Definition: std_types.h:396
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:214
std::list< std::reference_wrapper< const classt > > overlay_classest
static bool is_overlay_method(const methodt &method)