cprover
java_bytecode_parser.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "java_bytecode_parser.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <map>
14 #include <string>
15 
16 #include <util/arith_tools.h>
17 #include <util/ieee_float.h>
18 #include <util/parser.h>
19 #include <util/prefix.h>
20 #include <util/std_expr.h>
21 #include <util/string_constant.h>
22 #include <util/optional.h>
23 
25 #include "java_types.h"
26 #include "bytecode_info.h"
27 
28 #ifdef DEBUG
29 #include <iostream>
30 #endif
31 
33 {
34 public:
36  {
37  get_bytecodes();
38  }
39 
40  virtual bool parse();
41 
56 
58 
59  struct pool_entryt
60  {
66  pool_entryt():tag(0), ref1(0), ref2(0), number(0) { }
67  };
68 
69  typedef std::vector<pool_entryt> constant_poolt;
71 
72 protected:
73  class bytecodet
74  {
75  public:
77  char format;
78  };
79 
80  std::vector<bytecodet> bytecodes;
81 
83  {
84  if(index==0 || index>=constant_pool.size())
85  {
86  error() << "invalid constant pool index (" << index << ")" << eom;
87  error() << "constant pool size: " << constant_pool.size() << eom;
88  throw 0;
89  }
90 
91  return constant_pool[index];
92  }
93 
94  exprt &constant(u2 index)
95  {
96  return pool_entry(index).expr;
97  }
98 
99  const typet type_entry(u2 index)
100  {
101  return java_type_from_string(id2string(pool_entry(index).s));
102  }
103 
105  {
106  // pre-hash the mnemonics, so we do this only once
107  bytecodes.resize(256);
108  for(const bytecode_infot *p=bytecode_info; p->mnemonic!=nullptr; p++)
109  {
110  assert(p->opcode<bytecodes.size());
111  bytecodes[p->opcode].mnemonic=p->mnemonic;
112  bytecodes[p->opcode].format=p->format;
113  }
114  }
115 
116  void rClassFile();
117  void rconstant_pool();
118  void rinterfaces(classt &parsed_class);
119  void rfields(classt &parsed_class);
120  void rmethods(classt &parsed_class);
121  void rmethod(classt &parsed_class);
122  void rclass_attribute(classt &parsed_class);
126  void relement_value_pair(annotationt::element_value_pairt &);
127  void rmethod_attribute(methodt &method);
128  void rfield_attribute(fieldt &);
129  void rcode_attribute(methodt &method);
130  void read_verification_type_info(methodt::verification_type_infot &);
132  void get_class_refs();
133  void get_class_refs_rec(const typet &);
136  parse_method_handle(const class method_handle_infot &entry);
138 
139  void skip_bytes(std::size_t bytes)
140  {
141  for(std::size_t i=0; i<bytes; i++)
142  {
143  if(!*in)
144  {
145  error() << "unexpected end of bytecode file" << eom;
146  throw 0;
147  }
148  in->get();
149  }
150  }
151 
152  u8 read_bytes(size_t bytes)
153  {
154  u8 result=0;
155  for(size_t i=0; i<bytes; i++)
156  {
157  if(!*in)
158  {
159  error() << "unexpected end of bytecode file" << eom;
160  throw 0;
161  }
162  result<<=8;
163  result|=in->get();
164  }
165  return result;
166  }
167 
169  {
170  return (u1)read_bytes(1);
171  }
172 
173  inline u2 read_u2()
174  {
175  return (u2)read_bytes(2);
176  }
177 
179  {
180  return (u4)read_bytes(4);
181  }
182 
184  {
185  return read_bytes(8);
186  }
187 
189  classt &parsed_class,
190  size_t bootstrap_method_index,
191  u2_valuest u2_values) const;
192 };
193 
194 #define CONSTANT_Class 7
195 #define CONSTANT_Fieldref 9
196 #define CONSTANT_Methodref 10
197 #define CONSTANT_InterfaceMethodref 11
198 #define CONSTANT_String 8
199 #define CONSTANT_Integer 3
200 #define CONSTANT_Float 4
201 #define CONSTANT_Long 5
202 #define CONSTANT_Double 6
203 #define CONSTANT_NameAndType 12
204 #define CONSTANT_Utf8 1
205 #define CONSTANT_MethodHandle 15
206 #define CONSTANT_MethodType 16
207 #define CONSTANT_InvokeDynamic 18
208 
209 #define VTYPE_INFO_TOP 0
210 #define VTYPE_INFO_INTEGER 1
211 #define VTYPE_INFO_FLOAT 2
212 #define VTYPE_INFO_LONG 3
213 #define VTYPE_INFO_DOUBLE 4
214 #define VTYPE_INFO_ITEM_NULL 5
215 #define VTYPE_INFO_UNINIT_THIS 6
216 #define VTYPE_INFO_OBJECT 7
217 #define VTYPE_INFO_UNINIT 8
218 
220 {
221 public:
223  : tag(entry.tag)
224  {
225  }
226 
227  u1 get_tag() const
228  {
229  return tag;
230  }
231 
232  typedef std::function<java_bytecode_parsert::pool_entryt &(u2)>
235 
236 protected:
237  static std::string read_utf8_constant(const pool_entryt &entry)
238  {
239  INVARIANT(
240  entry.tag == CONSTANT_Utf8, "Name entry must be a constant UTF-8");
241  return id2string(entry.s);
242  }
243 
244 private:
246 };
247 
251 {
252 public:
253  explicit class_infot(const pool_entryt &entry) : structured_pool_entryt(entry)
254  {
255  PRECONDITION(entry.tag == CONSTANT_Class);
256  name_index = entry.ref1;
257  }
258 
259  std::string get_name(pool_entry_lookupt pool_entry) const
260  {
261  const pool_entryt &name_entry = pool_entry(name_index);
262  return read_utf8_constant(name_entry);
263  }
264 
265 private:
267 };
268 
272 {
273 public:
275  : structured_pool_entryt(entry)
276  {
278  name_index = entry.ref1;
279  descriptor_index = entry.ref2;
280  }
281 
282  std::string get_name(pool_entry_lookupt pool_entry) const
283  {
284  const pool_entryt &name_entry = pool_entry(name_index);
285  return read_utf8_constant(name_entry);
286  }
287 
288  std::string get_descriptor(pool_entry_lookupt pool_entry) const
289  {
290  const pool_entryt &descriptor_entry = pool_entry(descriptor_index);
291  return read_utf8_constant(descriptor_entry);
292  }
293 
294 private:
297 };
298 
300 {
301 public:
303  {
304  static std::set<u1> info_tags = {
306  PRECONDITION(info_tags.find(entry.tag) != info_tags.end());
307  class_index = entry.ref1;
308  name_and_type_index = entry.ref2;
309  }
310 
312  {
313  return class_index;
314  }
316  {
317  return name_and_type_index;
318  }
319 
321  {
322  const pool_entryt &name_and_type_entry = pool_entry(name_and_type_index);
323 
324  INVARIANT(
325  name_and_type_entry.tag == CONSTANT_NameAndType,
326  "name_and_typeindex did not correspond to a name_and_type in the "
327  "constant pool");
328 
329  return name_and_type_infot{name_and_type_entry};
330  }
331 
333  {
334  const pool_entryt &class_entry = pool_entry(class_index);
335 
336  return class_infot{class_entry};
337  }
338 
339 private:
342 };
343 
345 {
346 public:
351  {
352  REF_getField = 1,
353  REF_getStatic = 2,
354  REF_putField = 3,
355  REF_putStatic = 4,
356  REF_invokeVirtual = 5,
357  REF_invokeStatic = 6,
358  REF_invokeSpecial = 7,
361  };
362 
364  : structured_pool_entryt(entry)
365  {
367  PRECONDITION(entry.ref1 > 0 && entry.ref1 < 10); // Java 8 spec 4.4.8
368  reference_kind = static_cast<method_handle_kindt>(entry.ref1);
369  reference_index = entry.ref2;
370  }
371 
373  {
374  const base_ref_infot ref_entry{pool_entry(reference_index)};
375 
376  // validate the correctness of the constant pool entry
377  switch(reference_kind)
378  {
383  {
384  INVARIANT(ref_entry.get_tag() == CONSTANT_Fieldref, "4.4.2");
385  break;
386  }
389  {
390  INVARIANT(ref_entry.get_tag() == CONSTANT_Methodref, "4.4.2");
391  break;
392  }
395  {
396  INVARIANT(
397  ref_entry.get_tag() == CONSTANT_Methodref ||
398  ref_entry.get_tag() == CONSTANT_InterfaceMethodref,
399  "4.4.2");
400  break;
401  }
403  {
404  INVARIANT(ref_entry.get_tag() == CONSTANT_InterfaceMethodref, "");
405  break;
406  }
407  }
408 
409  return ref_entry;
410  }
411 
412 private:
415 };
416 
418 {
419  try
420  {
421  rClassFile();
422  }
423 
424  catch(const char *message)
425  {
426  error() << message << eom;
427  return true;
428  }
429 
430  catch(const std::string &message)
431  {
432  error() << message << eom;
433  return true;
434  }
435 
436  catch(...)
437  {
438  error() << "parsing error" << eom;
439  return true;
440  }
441 
442  return false;
443 }
444 
445 #define ACC_PUBLIC 0x0001
446 #define ACC_PRIVATE 0x0002
447 #define ACC_PROTECTED 0x0004
448 #define ACC_STATIC 0x0008
449 #define ACC_FINAL 0x0010
450 #define ACC_SYNCHRONIZED 0x0020
451 #define ACC_BRIDGE 0x0040
452 #define ACC_VARARGS 0x0080
453 #define ACC_NATIVE 0x0100
454 #define ACC_INTERFACE 0x0200
455 #define ACC_ABSTRACT 0x0400
456 #define ACC_STRICT 0x0800
457 #define ACC_SYNTHETIC 0x1000
458 #define ACC_ANNOTATION 0x2000
459 #define ACC_ENUM 0x4000
460 
461 #ifdef _MSC_VER
462 #define UNUSED
463 #else
464 #define UNUSED __attribute__((unused))
465 #endif
466 
468 {
470 
471  u4 magic=read_u4();
472  u2 UNUSED minor_version=read_u2();
473  u2 major_version=read_u2();
474 
475  if(magic!=0xCAFEBABE)
476  {
477  error() << "wrong magic" << eom;
478  throw 0;
479  }
480 
481  if(major_version<44)
482  {
483  error() << "unexpected major version" << eom;
484  throw 0;
485  }
486 
487  rconstant_pool();
488 
489  classt &parsed_class=parse_tree.parsed_class;
490 
491  u2 access_flags=read_u2();
492  u2 this_class=read_u2();
493  u2 super_class=read_u2();
494 
495  parsed_class.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
496  parsed_class.is_enum=(access_flags&ACC_ENUM)!=0;
497  parsed_class.is_public=(access_flags&ACC_PUBLIC)!=0;
498  parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
499  parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
500  parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
501  parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
502  parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
503  parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
504  parsed_class.name=
505  constant(this_class).type().get(ID_C_base_name);
506 
507  if(super_class!=0)
508  parsed_class.extends=
509  constant(super_class).type().get(ID_C_base_name);
510 
511  rinterfaces(parsed_class);
512  rfields(parsed_class);
513  rmethods(parsed_class);
514 
515  // count elements of enum
516  if(parsed_class.is_enum)
517  for(fieldt &field : parse_tree.parsed_class.fields)
518  if(field.is_enum)
520 
521  u2 attributes_count=read_u2();
522 
523  for(std::size_t j=0; j<attributes_count; j++)
524  rclass_attribute(parsed_class);
525 
526  get_class_refs();
527 
529 }
530 
532 {
533  // Get the class references for the benefit of a dependency
534  // analysis.
535 
536  for(const auto &c : constant_pool)
537  {
538  switch(c.tag)
539  {
540  case CONSTANT_Class:
541  get_class_refs_rec(c.expr.type());
542  break;
543 
545  {
548  }
549  break;
550 
551  default: {}
552  }
553  }
554 
555  for(const auto &field : parse_tree.parsed_class.fields)
556  {
557  typet field_type;
558  if(field.signature.has_value())
559  {
561  field.descriptor,
562  field.signature,
564 
565  // add generic type args to class refs as dependencies, same below for
566  // method types and entries from the local variable type table
568  field_type, parse_tree.class_refs);
569  }
570  else
571  field_type=java_type_from_string(field.descriptor);
572 
573  get_class_refs_rec(field_type);
574  }
575 
576  for(const auto &method : parse_tree.parsed_class.methods)
577  {
578  typet method_type;
579  if(method.signature.has_value())
580  {
582  method.descriptor,
583  method.signature,
586  method_type, parse_tree.class_refs);
587  }
588  else
589  method_type=java_type_from_string(method.descriptor);
590 
591  get_class_refs_rec(method_type);
592  for(const auto &var : method.local_variable_table)
593  {
594  typet var_type;
595  if(var.signature.has_value())
596  {
598  var.descriptor,
599  var.signature,
602  var_type, parse_tree.class_refs);
603  }
604  else
605  var_type=java_type_from_string(var.descriptor);
606  get_class_refs_rec(var_type);
607  }
608  }
609 }
610 
612 {
613  if(src.id()==ID_code)
614  {
615  const code_typet &ct=to_code_type(src);
616  const typet &rt=ct.return_type();
617  get_class_refs_rec(rt);
618  for(const auto &p : ct.parameters())
619  get_class_refs_rec(p.type());
620  }
621  else if(src.id()==ID_symbol)
622  {
623  irep_idt name=src.get(ID_C_base_name);
624  if(has_prefix(id2string(name), "array["))
625  {
626  const typet &element_type=
627  static_cast<const typet &>(src.find(ID_C_element_type));
628  get_class_refs_rec(element_type);
629  }
630  else
631  parse_tree.class_refs.insert(name);
632  }
633  else if(src.id()==ID_struct)
634  {
635  const struct_typet &struct_type=to_struct_type(src);
636  for(const auto &c : struct_type.components())
637  get_class_refs_rec(c.type());
638  }
639  else if(src.id()==ID_pointer)
641 }
642 
644 {
645  u2 constant_pool_count=read_u2();
646  if(constant_pool_count==0)
647  {
648  error() << "invalid constant_pool_count" << eom;
649  throw 0;
650  }
651 
652  constant_pool.resize(constant_pool_count);
653 
654  for(constant_poolt::iterator
655  it=constant_pool.begin();
656  it!=constant_pool.end();
657  it++)
658  {
659  // the first entry isn't used
660  if(it==constant_pool.begin())
661  continue;
662 
663  it->tag=read_u1();
664 
665  switch(it->tag)
666  {
667  case CONSTANT_Class:
668  it->ref1=read_u2();
669  break;
670 
671  case CONSTANT_Fieldref:
672  case CONSTANT_Methodref:
676  it->ref1=read_u2();
677  it->ref2=read_u2();
678  break;
679 
680  case CONSTANT_String:
681  case CONSTANT_MethodType:
682  it->ref1=read_u2();
683  break;
684 
685  case CONSTANT_Integer:
686  case CONSTANT_Float:
687  it->number=read_u4();
688  break;
689 
690  case CONSTANT_Long:
691  case CONSTANT_Double:
692  it->number=read_u8();
693  // Eight-byte constants take up two entries
694  // in the constant_pool table, for annoying this programmer.
695  if(it==constant_pool.end())
696  {
697  error() << "invalid double entry" << eom;
698  throw 0;
699  }
700  it++;
701  it->tag=0;
702  break;
703 
704  case CONSTANT_Utf8:
705  {
706  u2 bytes=read_u2();
707  std::string s;
708  s.resize(bytes);
709  for(std::string::iterator s_it=s.begin(); s_it!=s.end(); s_it++)
710  *s_it=read_u1();
711  it->s=s; // hashes
712  }
713  break;
714 
716  it->ref1=read_u1();
717  it->ref2=read_u2();
718  break;
719 
720  default:
721  error() << "unknown constant pool entry (" << it->tag << ")"
722  << eom;
723  throw 0;
724  }
725  }
726 
727  // we do a bit of post-processing after we have them all
728  for(constant_poolt::iterator
729  it=constant_pool.begin();
730  it!=constant_pool.end();
731  it++)
732  {
733  // the first entry isn't used
734  if(it==constant_pool.begin())
735  continue;
736 
737  switch(it->tag)
738  {
739  case CONSTANT_Class:
740  {
741  const std::string &s=id2string(pool_entry(it->ref1).s);
742  it->expr=type_exprt(java_classname(s));
743  }
744  break;
745 
746  case CONSTANT_Fieldref:
747  {
748  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
749  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
750  const pool_entryt &class_entry=pool_entry(it->ref1);
751  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
752  typet type=type_entry(nameandtype_entry.ref2);
753 
754  symbol_typet class_symbol=
755  java_classname(id2string(class_name_entry.s));
756 
757  fieldref_exprt fieldref(
758  type, name_entry.s, class_symbol.get_identifier());
759 
760  it->expr=fieldref;
761  }
762  break;
763 
764  case CONSTANT_Methodref:
766  {
767  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
768  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
769  const pool_entryt &class_entry=pool_entry(it->ref1);
770  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
771  typet type=type_entry(nameandtype_entry.ref2);
772 
773  symbol_typet class_symbol=
774  java_classname(id2string(class_name_entry.s));
775 
776  irep_idt component_name=
777  id2string(name_entry.s)+
778  ":"+id2string(pool_entry(nameandtype_entry.ref2).s);
779 
780  irep_idt class_name=
781  class_symbol.get_identifier();
782 
783  irep_idt identifier=
784  id2string(class_name)+"."+id2string(component_name);
785 
786  exprt virtual_function(ID_virtual_function, type);
787  virtual_function.set(ID_component_name, component_name);
788  virtual_function.set(ID_C_class, class_name);
789  virtual_function.set(ID_C_base_name, name_entry.s);
790  virtual_function.set(ID_identifier, identifier);
791 
792  it->expr=virtual_function;
793  }
794  break;
795 
796  case CONSTANT_String:
797  {
798  // ldc turns these into references to java.lang.String
799  exprt string_literal(ID_java_string_literal);
800  string_literal.set(ID_value, pool_entry(it->ref1).s);
801  it->expr=string_literal;
802  }
803  break;
804 
805  case CONSTANT_Integer:
806  it->expr=from_integer(it->number, java_int_type());
807  break;
808 
809  case CONSTANT_Float:
810  {
812  value.unpack(it->number);
813  it->expr=value.to_expr();
814  }
815  break;
816 
817  case CONSTANT_Long:
818  it->expr=from_integer(it->number, java_long_type());
819  break;
820 
821  case CONSTANT_Double:
822  {
824  value.unpack(it->number);
825  it->expr=value.to_expr();
826  }
827  break;
828 
830  {
831  it->expr.id("nameandtype");
832  }
833  break;
834 
836  {
837  it->expr.id("methodhandle");
838  }
839  break;
840 
841  case CONSTANT_MethodType:
842  {
843  it->expr.id("methodtype");
844  }
845  break;
846 
848  {
849  it->expr.id("invokedynamic");
850  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
851  typet type=type_entry(nameandtype_entry.ref2);
852  type.set(ID_java_lambda_method_handle_index, it->ref1);
853  it->expr.type()=type;
854  }
855  break;
856 
857  default:{};
858  }
859  }
860 }
861 
863 {
864  u2 interfaces_count=read_u2();
865 
866  for(std::size_t i=0; i<interfaces_count; i++)
867  parsed_class.implements
868  .push_back(constant(read_u2()).type().get(ID_C_base_name));
869 }
870 
872 {
873  u2 fields_count=read_u2();
874 
875  for(std::size_t i=0; i<fields_count; i++)
876  {
877  fieldt &field=parsed_class.add_field();
878 
879  u2 access_flags=read_u2();
880  u2 name_index=read_u2();
881  u2 descriptor_index=read_u2();
882  u2 attributes_count=read_u2();
883 
884  field.name=pool_entry(name_index).s;
885  field.is_static=(access_flags&ACC_STATIC)!=0;
886  field.is_final=(access_flags&ACC_FINAL)!=0;
887  field.is_enum=(access_flags&ACC_ENUM)!=0;
888 
889  field.descriptor=id2string(pool_entry(descriptor_index).s);
890  field.is_public=(access_flags&ACC_PUBLIC)!=0;
891  field.is_protected=(access_flags&ACC_PROTECTED)!=0;
892  field.is_private=(access_flags&ACC_PRIVATE)!=0;
893  size_t flags=(field.is_public?1:0)+
894  (field.is_protected?1:0)+
895  (field.is_private?1:0);
896  DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
897 
898  for(std::size_t j=0; j<attributes_count; j++)
899  rfield_attribute(field);
900  }
901 }
902 
903 #define T_BOOLEAN 4
904 #define T_CHAR 5
905 #define T_FLOAT 6
906 #define T_DOUBLE 7
907 #define T_BYTE 8
908 #define T_SHORT 9
909 #define T_INT 10
910 #define T_LONG 11
911 
913  methodt::instructionst &instructions)
914 {
915  u4 code_length=read_u4();
916 
917  u4 address;
918  size_t bytecode_index=0; // index of bytecode instruction
919 
920  for(address=0; address<code_length; address++)
921  {
922  bool wide_instruction=false;
923  u4 start_of_instruction=address;
924 
925  u1 bytecode=read_u1();
926 
927  if(bytecode==0xc4) // wide
928  {
929  wide_instruction=true;
930  address++;
931  bytecode=read_u1();
932  // The only valid instructions following a wide byte are
933  // [ifald]load, [ifald]store, ret and iinc
934  // All of these have either format of v, or V
935  INVARIANT(
936  bytecodes[bytecode].format == 'v' || bytecodes[bytecode].format == 'V',
937  "Unexpected wide instruction: " +
938  id2string(bytecodes[bytecode].mnemonic));
939  }
940 
941  instructions.push_back(instructiont());
942  instructiont &instruction=instructions.back();
943  instruction.statement=bytecodes[bytecode].mnemonic;
944  instruction.address=start_of_instruction;
945  instruction.source_location
946  .set_java_bytecode_index(std::to_string(bytecode_index));
947 
948  switch(bytecodes[bytecode].format)
949  {
950  case ' ': // no further bytes
951  break;
952 
953  case 'c': // a constant_pool index (one byte)
954  if(wide_instruction)
955  {
956  instruction.args.push_back(constant(read_u2()));
957  address+=2;
958  }
959  else
960  {
961  instruction.args.push_back(constant(read_u1()));
962  address+=1;
963  }
964  break;
965 
966  case 'C': // a constant_pool index (two bytes)
967  instruction.args.push_back(constant(read_u2()));
968  address+=2;
969  break;
970 
971  case 'b': // a signed byte
972  {
973  s1 c=read_u1();
974  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
975  }
976  address+=1;
977  break;
978 
979  case 'o': // two byte branch offset, signed
980  {
981  s2 offset=read_u2();
982  // By converting the signed offset into an absolute address (by adding
983  // the current address) the number represented becomes unsigned.
984  instruction.args.push_back(
985  from_integer(address+offset, unsignedbv_typet(16)));
986  }
987  address+=2;
988  break;
989 
990  case 'O': // four byte branch offset, signed
991  {
992  s4 offset=read_u4();
993  // By converting the signed offset into an absolute address (by adding
994  // the current address) the number represented becomes unsigned.
995  instruction.args.push_back(
996  from_integer(address+offset, unsignedbv_typet(32)));
997  }
998  address+=4;
999  break;
1000 
1001  case 'v': // local variable index (one byte)
1002  {
1003  if(wide_instruction)
1004  {
1005  u2 v = read_u2();
1006  instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
1007  address += 2;
1008  }
1009  else
1010  {
1011  u1 v = read_u1();
1012  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1013  address += 1;
1014  }
1015  }
1016 
1017  break;
1018 
1019  case 'V':
1020  // local variable index (two bytes) plus two signed bytes
1021  if(wide_instruction)
1022  {
1023  u2 v=read_u2();
1024  instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
1025  s2 c=read_u2();
1026  instruction.args.push_back(from_integer(c, signedbv_typet(16)));
1027  address+=4;
1028  }
1029  else // local variable index (one byte) plus one signed byte
1030  {
1031  u1 v=read_u1();
1032  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1033  s1 c=read_u1();
1034  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
1035  address+=2;
1036  }
1037  break;
1038 
1039  case 'I': // two byte constant_pool index plus two bytes
1040  {
1041  u2 c=read_u2();
1042  instruction.args.push_back(constant(c));
1043  u1 b1=read_u1();
1044  instruction.args.push_back(from_integer(b1, unsignedbv_typet(8)));
1045  u1 b2=read_u1();
1046  instruction.args.push_back(from_integer(b2, unsignedbv_typet(8)));
1047  }
1048  address+=4;
1049  break;
1050 
1051  case 'L': // lookupswitch
1052  {
1053  u4 base_offset=address;
1054 
1055  // first a pad to 32-bit align
1056  while(((address+1)&3)!=0) { read_u1(); address++; }
1057 
1058  // now default value
1059  s4 default_value=read_u4();
1060  // By converting the signed offset into an absolute address (by adding
1061  // the current address) the number represented becomes unsigned.
1062  instruction.args.push_back(
1063  from_integer(base_offset+default_value, unsignedbv_typet(32)));
1064  address+=4;
1065 
1066  // number of pairs
1067  u4 npairs=read_u4();
1068  address+=4;
1069 
1070  for(std::size_t i=0; i<npairs; i++)
1071  {
1072  s4 match=read_u4();
1073  s4 offset=read_u4();
1074  instruction.args.push_back(
1075  from_integer(match, signedbv_typet(32)));
1076  // By converting the signed offset into an absolute address (by adding
1077  // the current address) the number represented becomes unsigned.
1078  instruction.args.push_back(
1079  from_integer(base_offset+offset, unsignedbv_typet(32)));
1080  address+=8;
1081  }
1082  }
1083  break;
1084 
1085  case 'T': // tableswitch
1086  {
1087  size_t base_offset=address;
1088 
1089  // first a pad to 32-bit align
1090  while(((address+1)&3)!=0) { read_u1(); address++; }
1091 
1092  // now default value
1093  s4 default_value=read_u4();
1094  instruction.args.push_back(
1095  from_integer(base_offset+default_value, signedbv_typet(32)));
1096  address+=4;
1097 
1098  // now low value
1099  s4 low_value=read_u4();
1100  address+=4;
1101 
1102  // now high value
1103  s4 high_value=read_u4();
1104  address+=4;
1105 
1106  // there are high-low+1 offsets, and they are signed
1107  for(s4 i=low_value; i<=high_value; i++)
1108  {
1109  s4 offset=read_u4();
1110  instruction.args.push_back(from_integer(i, signedbv_typet(32)));
1111  // By converting the signed offset into an absolute address (by adding
1112  // the current address) the number represented becomes unsigned.
1113  instruction.args.push_back(
1114  from_integer(base_offset+offset, unsignedbv_typet(32)));
1115  address+=4;
1116  }
1117  }
1118  break;
1119 
1120  case 'm': // multianewarray: constant-pool index plus one unsigned byte
1121  {
1122  u2 c=read_u2(); // constant-pool index
1123  instruction.args.push_back(constant(c));
1124  u1 dimensions=read_u1(); // number of dimensions
1125  instruction.args.push_back(
1126  from_integer(dimensions, unsignedbv_typet(8)));
1127  address+=3;
1128  }
1129  break;
1130 
1131  case 't': // array subtype, one byte
1132  {
1133  typet t;
1134  switch(read_u1())
1135  {
1136  case T_BOOLEAN: t.id(ID_bool); break;
1137  case T_CHAR: t.id(ID_char); break;
1138  case T_FLOAT: t.id(ID_float); break;
1139  case T_DOUBLE: t.id(ID_double); break;
1140  case T_BYTE: t.id(ID_byte); break;
1141  case T_SHORT: t.id(ID_short); break;
1142  case T_INT: t.id(ID_int); break;
1143  case T_LONG: t.id(ID_long); break;
1144  default:{};
1145  }
1146  instruction.args.push_back(type_exprt(t));
1147  }
1148  address+=1;
1149  break;
1150 
1151  case 's': // a signed short
1152  {
1153  s2 s=read_u2();
1154  instruction.args.push_back(from_integer(s, signedbv_typet(16)));
1155  }
1156  address+=2;
1157  break;
1158 
1159  default:
1160  throw "unknown JVM bytecode instruction";
1161  }
1162  bytecode_index++;
1163  }
1164 
1165  if(address!=code_length)
1166  {
1167  error() << "bytecode length mismatch" << eom;
1168  throw 0;
1169  }
1170 }
1171 
1173 {
1174  u2 attribute_name_index=read_u2();
1175  u4 attribute_length=read_u4();
1176 
1177  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1178 
1179  if(attribute_name=="Code")
1180  {
1181  u2 UNUSED max_stack=read_u2();
1182  u2 UNUSED max_locals=read_u2();
1183 
1184  rbytecode(method.instructions);
1185 
1186  u2 exception_table_length=read_u2();
1187  method.exception_table.resize(exception_table_length);
1188 
1189  for(std::size_t e=0; e<exception_table_length; e++)
1190  {
1191  u2 start_pc=read_u2();
1192  u2 end_pc=read_u2();
1193 
1194  // from the class file format spec ("4.7.3. The Code Attribute" for Java8)
1195  INVARIANT(
1196  start_pc < end_pc,
1197  "The start_pc must be less than the end_pc as this is the range the "
1198  "exception is active");
1199 
1200  u2 handler_pc=read_u2();
1201  u2 catch_type=read_u2();
1202  method.exception_table[e].start_pc=start_pc;
1203  method.exception_table[e].end_pc=end_pc;
1204  method.exception_table[e].handler_pc=handler_pc;
1205  if(catch_type!=0)
1206  method.exception_table[e].catch_type=
1207  to_symbol_type(pool_entry(catch_type).expr.type());
1208  }
1209 
1210  u2 attributes_count=read_u2();
1211 
1212  for(std::size_t j=0; j<attributes_count; j++)
1213  rcode_attribute(method);
1214 
1215  irep_idt line_number;
1216 
1217  // add missing line numbers
1218  for(methodt::instructionst::iterator
1219  it=method.instructions.begin();
1220  it!=method.instructions.end();
1221  it++)
1222  {
1223  if(!it->source_location.get_line().empty())
1224  line_number=it->source_location.get_line();
1225  else if(!line_number.empty())
1226  it->source_location.set_line(line_number);
1227  it->source_location
1228  .set_function(
1229  "java::"+id2string(parse_tree.parsed_class.name)+"."+
1230  id2string(method.name)+":"+method.descriptor);
1231  }
1232 
1233  // line number of method
1234  if(!method.instructions.empty())
1235  method.source_location.set_line(
1236  method.instructions.begin()->source_location.get_line());
1237  }
1238  else if(attribute_name=="Signature")
1239  {
1240  u2 signature_index=read_u2();
1241  method.signature=id2string(pool_entry(signature_index).s);
1242  }
1243  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1244  attribute_name=="RuntimeVisibleAnnotations")
1245  {
1247  }
1248  else
1249  skip_bytes(attribute_length);
1250 }
1251 
1253 {
1254  u2 attribute_name_index=read_u2();
1255  u4 attribute_length=read_u4();
1256 
1257  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1258 
1259  if(attribute_name=="Signature")
1260  {
1261  u2 signature_index=read_u2();
1262  field.signature=id2string(pool_entry(signature_index).s);
1263  }
1264  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1265  attribute_name=="RuntimeVisibleAnnotations")
1266  {
1268  }
1269  else
1270  skip_bytes(attribute_length);
1271 }
1272 
1274 {
1275  u2 attribute_name_index=read_u2();
1276  u4 attribute_length=read_u4();
1277 
1278  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1279 
1280  if(attribute_name=="LineNumberTable")
1281  {
1282  // address -> instructiont
1283  typedef std::map<unsigned,
1284  methodt::instructionst::iterator> instruction_mapt;
1285  instruction_mapt instruction_map;
1286 
1287  for(methodt::instructionst::iterator
1288  it=method.instructions.begin();
1289  it!=method.instructions.end();
1290  it++)
1291  {
1292  instruction_map[it->address]=it;
1293  }
1294 
1295  u2 line_number_table_length=read_u2();
1296 
1297  for(std::size_t i=0; i<line_number_table_length; i++)
1298  {
1299  u2 start_pc=read_u2();
1300  u2 line_number=read_u2();
1301 
1302  // annotate the bytecode program
1303  instruction_mapt::const_iterator it=
1304  instruction_map.find(start_pc);
1305 
1306  if(it!=instruction_map.end())
1307  it->second->source_location.set_line(line_number);
1308  }
1309  }
1310  else if(attribute_name=="LocalVariableTable")
1311  {
1312  u2 local_variable_table_length=read_u2();
1313 
1314  method.local_variable_table.resize(local_variable_table_length);
1315 
1316  for(std::size_t i=0; i<local_variable_table_length; i++)
1317  {
1318  u2 start_pc=read_u2();
1319  u2 length=read_u2();
1320  u2 name_index=read_u2();
1321  u2 descriptor_index=read_u2();
1322  u2 index=read_u2();
1323 
1324  method.local_variable_table[i].index=index;
1325  method.local_variable_table[i].name=pool_entry(name_index).s;
1326  method.local_variable_table[i].descriptor=
1327  id2string(pool_entry(descriptor_index).s);
1328  method.local_variable_table[i].start_pc=start_pc;
1329  method.local_variable_table[i].length=length;
1330  }
1331  }
1332  else if(attribute_name=="LocalVariableTypeTable")
1333  {
1335  }
1336  else if(attribute_name=="StackMapTable")
1337  {
1338  u2 stack_map_entries=read_u2();
1339 
1340  method.stack_map_table.resize(stack_map_entries);
1341 
1342  for(size_t i=0; i<stack_map_entries; i++)
1343  {
1344  u1 frame_type=read_u1();
1345  if(frame_type<=63)
1346  {
1348  method.stack_map_table[i].locals.resize(0);
1349  method.stack_map_table[i].stack.resize(0);
1350  }
1351  else if(64<=frame_type && frame_type<=127)
1352  {
1353  method.stack_map_table[i].type=
1355  method.stack_map_table[i].locals.resize(0);
1356  method.stack_map_table[i].stack.resize(1);
1357  methodt::verification_type_infot verification_type_info;
1358  read_verification_type_info(verification_type_info);
1359  method.stack_map_table[i].stack[0]=verification_type_info;
1360  }
1361  else if(frame_type==247)
1362  {
1363  method.stack_map_table[i].type=
1365  method.stack_map_table[i].locals.resize(0);
1366  method.stack_map_table[i].stack.resize(1);
1367  methodt::verification_type_infot verification_type_info;
1368  u2 offset_delta=read_u2();
1369  read_verification_type_info(verification_type_info);
1370  method.stack_map_table[i].stack[0]=verification_type_info;
1371  method.stack_map_table[i].offset_delta=offset_delta;
1372  }
1373  else if(248<=frame_type && frame_type<=250)
1374  {
1376  method.stack_map_table[i].locals.resize(0);
1377  method.stack_map_table[i].stack.resize(0);
1378  u2 offset_delta=read_u2();
1379  method.stack_map_table[i].offset_delta=offset_delta;
1380  }
1381  else if(frame_type==251)
1382  {
1383  method.stack_map_table[i].type
1385  method.stack_map_table[i].locals.resize(0);
1386  method.stack_map_table[i].stack.resize(0);
1387  u2 offset_delta=read_u2();
1388  method.stack_map_table[i].offset_delta=offset_delta;
1389  }
1390  else if(252<=frame_type && frame_type<=254)
1391  {
1392  size_t new_locals=(size_t) (frame_type-251);
1394  method.stack_map_table[i].locals.resize(new_locals);
1395  method.stack_map_table[i].stack.resize(0);
1396  u2 offset_delta=read_u2();
1397  method.stack_map_table[i].offset_delta=offset_delta;
1398  for(size_t k=0; k<new_locals; k++)
1399  {
1400  method.stack_map_table[i].locals
1401  .push_back(methodt::verification_type_infot());
1403  method.stack_map_table[i].locals.back();
1405  }
1406  }
1407  else if(frame_type==255)
1408  {
1410  u2 offset_delta=read_u2();
1411  method.stack_map_table[i].offset_delta=offset_delta;
1412  u2 number_locals=read_u2();
1413  method.stack_map_table[i].locals.resize(number_locals);
1414  for(size_t k=0; k<(size_t) number_locals; k++)
1415  {
1416  method.stack_map_table[i].locals
1417  .push_back(methodt::verification_type_infot());
1419  method.stack_map_table[i].locals.back();
1421  }
1422  u2 number_stack_items=read_u2();
1423  method.stack_map_table[i].stack.resize(number_stack_items);
1424  for(size_t k=0; k<(size_t) number_stack_items; k++)
1425  {
1426  method.stack_map_table[i].stack
1427  .push_back(methodt::verification_type_infot());
1429  method.stack_map_table[i].stack.back();
1431  }
1432  }
1433  else
1434  throw "error: unknown stack frame type encountered";
1435  }
1436  }
1437  else
1438  skip_bytes(attribute_length);
1439 }
1440 
1443 {
1444  u1 tag=read_u1();
1445  switch(tag)
1446  {
1447  case VTYPE_INFO_TOP:
1449  break;
1450  case VTYPE_INFO_INTEGER:
1452  break;
1453  case VTYPE_INFO_FLOAT:
1455  break;
1456  case VTYPE_INFO_LONG:
1458  break;
1459  case VTYPE_INFO_DOUBLE:
1461  break;
1462  case VTYPE_INFO_ITEM_NULL:
1464  break;
1467  break;
1468  case VTYPE_INFO_OBJECT:
1470  v.cpool_index=read_u2();
1471  break;
1472  case VTYPE_INFO_UNINIT:
1474  v.offset=read_u2();
1475  break;
1476  default:
1477  throw "error: unknown verification type info encountered";
1478  }
1479 }
1480 
1482  annotationst &annotations)
1483 {
1484  u2 num_annotations=read_u2();
1485 
1486  for(u2 number=0; number<num_annotations; number++)
1487  {
1488  annotationt annotation;
1489  rRuntimeAnnotation(annotation);
1490  annotations.push_back(annotation);
1491  }
1492 }
1493 
1495  annotationt &annotation)
1496 {
1497  u2 type_index=read_u2();
1498  annotation.type=type_entry(type_index);
1500 }
1501 
1503  annotationt::element_value_pairst &element_value_pairs)
1504 {
1505  u2 num_element_value_pairs=read_u2();
1506  element_value_pairs.resize(num_element_value_pairs);
1507 
1508  for(auto &element_value_pair : element_value_pairs)
1509  {
1510  u2 element_name_index=read_u2();
1511  element_value_pair.element_name=pool_entry(element_name_index).s;
1512 
1513  relement_value_pair(element_value_pair);
1514  }
1515 }
1516 
1521  annotationt::element_value_pairt &element_value_pair)
1522 {
1523  u1 tag=read_u1();
1524 
1525  switch(tag)
1526  {
1527  case 'e':
1528  {
1529  UNUSED u2 type_name_index=read_u2();
1530  UNUSED u2 const_name_index=read_u2();
1531  // todo: enum
1532  }
1533  break;
1534 
1535  case 'c':
1536  {
1537  u2 class_info_index = read_u2();
1538  element_value_pair.value = symbol_exprt(pool_entry(class_info_index).s);
1539  }
1540  break;
1541 
1542  case '@':
1543  {
1544  // another annotation, recursively
1545  annotationt annotation;
1546  rRuntimeAnnotation(annotation);
1547  }
1548  break;
1549 
1550  case '[':
1551  {
1552  u2 num_values=read_u2();
1553  for(std::size_t i=0; i<num_values; i++)
1554  {
1555  annotationt::element_value_pairt element_value;
1556  relement_value_pair(element_value); // recursive call
1557  }
1558  }
1559  break;
1560 
1561  case 's':
1562  {
1563  u2 const_value_index=read_u2();
1564  element_value_pair.value=string_constantt(
1565  pool_entry(const_value_index).s);
1566  }
1567  break;
1568 
1569  default:
1570  {
1571  u2 const_value_index=read_u2();
1572  element_value_pair.value=constant(const_value_index);
1573  }
1574 
1575  break;
1576  }
1577 }
1578 
1580 {
1581  u2 attribute_name_index=read_u2();
1582  u4 attribute_length=read_u4();
1583 
1584  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1585 
1586  if(attribute_name=="SourceFile")
1587  {
1588  u2 sourcefile_index=read_u2();
1589  irep_idt sourcefile_name;
1590 
1591  std::string fqn(id2string(parsed_class.name));
1592  size_t last_index=fqn.find_last_of(".");
1593  if(last_index==std::string::npos)
1594  sourcefile_name=pool_entry(sourcefile_index).s;
1595  else
1596  {
1597  std::string package_name=fqn.substr(0, last_index+1);
1598  std::replace(package_name.begin(), package_name.end(), '.', '/');
1599  const std::string &full_file_name=
1600  package_name+id2string(pool_entry(sourcefile_index).s);
1601  sourcefile_name=full_file_name;
1602  }
1603 
1604  for(methodst::iterator m_it=parsed_class.methods.begin();
1605  m_it!=parsed_class.methods.end();
1606  m_it++)
1607  {
1608  m_it->source_location.set_file(sourcefile_name);
1609  for(instructionst::iterator i_it=m_it->instructions.begin();
1610  i_it!=m_it->instructions.end();
1611  i_it++)
1612  {
1613  if(!i_it->source_location.get_line().empty())
1614  i_it->source_location.set_file(sourcefile_name);
1615  }
1616  }
1617  }
1618  else if(attribute_name=="Signature")
1619  {
1620  u2 signature_index=read_u2();
1621  parsed_class.signature=id2string(pool_entry(signature_index).s);
1623  parsed_class.signature.value(),
1625  }
1626  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1627  attribute_name=="RuntimeVisibleAnnotations")
1628  {
1630  }
1631  else if(attribute_name == "BootstrapMethods")
1632  {
1633  // for this attribute
1634  // cf. https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.23
1635  INVARIANT(
1636  !parsed_class.attribute_bootstrapmethods_read,
1637  "only one BootstrapMethods argument is allowed in a class file");
1638 
1639  // mark as read in parsed class
1640  parsed_class.attribute_bootstrapmethods_read = true;
1641  read_bootstrapmethods_entry(parsed_class);
1642  }
1643  else
1644  skip_bytes(attribute_length);
1645 }
1646 
1648 {
1649  u2 methods_count=read_u2();
1650 
1651  for(std::size_t j=0; j<methods_count; j++)
1652  rmethod(parsed_class);
1653 }
1654 
1655 #define ACC_PUBLIC 0x0001
1656 #define ACC_PRIVATE 0x0002
1657 #define ACC_PROTECTED 0x0004
1658 #define ACC_STATIC 0x0008
1659 #define ACC_FINAL 0x0010
1660 #define ACC_SUPER 0x0020
1661 #define ACC_VOLATILE 0x0040
1662 #define ACC_TRANSIENT 0x0080
1663 #define ACC_INTERFACE 0x0200
1664 #define ACC_ABSTRACT 0x0400
1665 #define ACC_SYNTHETIC 0x1000
1666 #define ACC_ANNOTATION 0x2000
1667 #define ACC_ENUM 0x4000
1668 
1670 {
1671  methodt &method=parsed_class.add_method();
1672 
1673  u2 access_flags=read_u2();
1674  u2 name_index=read_u2();
1675  u2 descriptor_index=read_u2();
1676 
1677  method.is_final=(access_flags&ACC_FINAL)!=0;
1678  method.is_static=(access_flags&ACC_STATIC)!=0;
1679  method.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
1680  method.is_public=(access_flags&ACC_PUBLIC)!=0;
1681  method.is_protected=(access_flags&ACC_PROTECTED)!=0;
1682  method.is_private=(access_flags&ACC_PRIVATE)!=0;
1683  method.is_synchronized=(access_flags&ACC_SYNCHRONIZED)!=0;
1684  method.is_native=(access_flags&ACC_NATIVE)!=0;
1685  method.name=pool_entry(name_index).s;
1686  method.base_name=pool_entry(name_index).s;
1687  method.descriptor=id2string(pool_entry(descriptor_index).s);
1688 
1689  size_t flags=(method.is_public?1:0)+
1690  (method.is_protected?1:0)+
1691  (method.is_private?1:0);
1692  DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
1693  u2 attributes_count=read_u2();
1694 
1695  for(std::size_t j=0; j<attributes_count; j++)
1696  rmethod_attribute(method);
1697 }
1698 
1701 {
1702  java_bytecode_parsert java_bytecode_parser;
1703  java_bytecode_parser.in=&istream;
1704  java_bytecode_parser.set_message_handler(message_handler);
1705 
1706  bool parser_result=java_bytecode_parser.parse();
1707 
1708  if(parser_result)
1709  {
1710  return {};
1711  }
1712 
1713  return std::move(java_bytecode_parser.parse_tree);
1714 }
1715 
1718 {
1719  std::ifstream in(file, std::ios::binary);
1720 
1721  if(!in)
1722  {
1723  messaget message(message_handler);
1724  message.error() << "failed to open input file `"
1725  << file << '\'' << messaget::eom;
1726  return {};
1727  }
1728 
1730 }
1731 
1736 {
1737  u2 local_variable_type_table_length=read_u2();
1738 
1739  INVARIANT(
1740  local_variable_type_table_length<=method.local_variable_table.size(),
1741  "Local variable type table cannot have more elements "
1742  "than the local variable table.");
1743  for(std::size_t i=0; i<local_variable_type_table_length; i++)
1744  {
1745  u2 start_pc=read_u2();
1746  u2 length=read_u2();
1747  u2 name_index=read_u2();
1748  u2 signature_index=read_u2();
1749  u2 index=read_u2();
1750 
1751  bool found=false;
1752  for(auto &lvar : method.local_variable_table)
1753  {
1754  // compare to entry in LVT
1755  if(lvar.index==index &&
1756  lvar.name==pool_entry(name_index).s &&
1757  lvar.start_pc==start_pc &&
1758  lvar.length==length)
1759  {
1760  found=true;
1761  lvar.signature=id2string(pool_entry(signature_index).s);
1762  break;
1763  }
1764  }
1765  INVARIANT(
1766  found,
1767  "Entry in LocalVariableTypeTable must be present in LVT");
1768  }
1769 }
1770 
1778 {
1779  const std::function<pool_entryt &(u2)> pool_entry_lambda =
1780  [this](u2 index) -> pool_entryt & { return pool_entry(index); };
1781 
1782  const base_ref_infot &ref_entry = entry.get_reference(pool_entry_lambda);
1783 
1784  const class_infot &class_entry = ref_entry.get_class(pool_entry_lambda);
1785  const name_and_type_infot &name_and_type =
1786  ref_entry.get_name_and_type(pool_entry_lambda);
1787 
1788  std::string class_name = class_entry.get_name(pool_entry_lambda);
1789  // replace '.' for '$' (inner classes)
1790  std::replace(class_name.begin(), class_name.end(), '.', '$');
1791  // replace '/' for '.' (package)
1792  std::replace(class_name.begin(), class_name.end(), '/', '.');
1793  const std::string method_ref =
1794  class_name + "." + name_and_type.get_name(pool_entry_lambda) + ':' +
1795  name_and_type.get_descriptor(pool_entry_lambda);
1796 
1797  lambda_method_handlet lambda_method_handle;
1798 
1799  if(has_prefix(name_and_type.get_name(pool_entry_lambda), "lambda$"))
1800  {
1801  // names seem to be lambda$POSTFIX$NUM
1802  // where POSTFIX is FUN for a function name in which the lambda is define
1803  // "static" when it is a static member of the class
1804  // "new" when it is a class variable, instantiated in <init>
1805  lambda_method_handle.lambda_method_name =
1806  name_and_type.get_name(pool_entry_lambda);
1807  lambda_method_handle.lambda_method_ref = method_ref;
1808  lambda_method_handle.handle_type =
1810 
1811  return lambda_method_handle;
1812  }
1813 
1814  return {};
1815 }
1816 
1821 {
1822  u2 num_bootstrap_methods = read_u2();
1823  for(size_t bootstrap_method_index = 0;
1824  bootstrap_method_index < num_bootstrap_methods;
1825  ++bootstrap_method_index)
1826  {
1827  u2 bootstrap_methodhandle_ref = read_u2();
1828  const pool_entryt &entry = pool_entry(bootstrap_methodhandle_ref);
1829 
1830  method_handle_infot method_handle{entry};
1831 
1832  u2 num_bootstrap_arguments = read_u2();
1833  debug() << "INFO: parse BootstrapMethod handle " << num_bootstrap_arguments
1834  << " #args" << eom;
1835 
1836  // read u2 values of entry into vector
1837  u2_valuest u2_values(num_bootstrap_arguments);
1838  for(size_t i = 0; i < num_bootstrap_arguments; i++)
1839  u2_values[i] = read_u2();
1840 
1841  // try parsing bootstrap method handle
1842  // each entry contains a MethodHandle structure
1843  // u2 tag
1844  // u2 reference kind which must be in the range from 1 to 9
1845  // u2 reference index into the constant pool
1846  //
1847  // reference kinds use the following
1848  // 1 to 4 must point to a CONSTANT_Fieldref structure
1849  // 5 or 8 must point to a CONSTANT_Methodref structure
1850  // 6 or 7 must point to a CONSTANT_Methodref or
1851  // CONSTANT_InterfaceMethodref structure, if the class file version
1852  // number is 52.0 or above, to a CONSTANT_Methodref only in the case
1853  // of less than 52.0
1854  // 9 must point to a CONSTANT_InterfaceMethodref structure
1855 
1856  // the index must point to a CONSTANT_String
1857  // CONSTANT_Class
1858  // CONSTANT_Integer
1859  // CONSTANT_Long
1860  // CONSTANT_Float
1861  // CONSTANT_Double
1862  // CONSTANT_MethodHandle
1863  // CONSTANT_MethodType
1864 
1865  // We read the three arguments here to see whether they correspond to
1866  // our hypotheses for this being a lambda function entry.
1867 
1868  // Need at least 3 arguments, the interface type, the method hanlde
1869  // and the method_type, otherwise it doesn't look like a call that we
1870  // understand
1871  if(num_bootstrap_arguments < 3)
1872  {
1874  parsed_class, bootstrap_method_index, std::move(u2_values));
1875  debug()
1876  << "format of BootstrapMethods entry not recognized: too few arguments"
1877  << eom;
1878  continue;
1879  }
1880 
1881  u2 interface_type_index = u2_values[0];
1882  u2 method_handle_index = u2_values[1];
1883  u2 method_type_index = u2_values[2];
1884 
1885  // The additional arguments for the altmetafactory call are skipped,
1886  // as they are currently not used. We verify though that they are of
1887  // CONSTANT_Integer type, cases where this does not hold will be
1888  // analyzed further.
1889  bool recognized = true;
1890  for(size_t i = 3; i < num_bootstrap_arguments; i++)
1891  {
1892  u2 skipped_argument = u2_values[i];
1893  recognized &= pool_entry(skipped_argument).tag == CONSTANT_Integer;
1894  }
1895 
1896  if(!recognized)
1897  {
1898  debug() << "format of BootstrapMethods entry not recognized: extra "
1899  "arguments of wrong type"
1900  << eom;
1902  parsed_class, bootstrap_method_index, std::move(u2_values));
1903  continue;
1904  }
1905 
1906  const pool_entryt &interface_type_argument =
1907  pool_entry(interface_type_index);
1908  const pool_entryt &method_handle_argument = pool_entry(method_handle_index);
1909  const pool_entryt &method_type_argument = pool_entry(method_type_index);
1910 
1911  if(
1912  interface_type_argument.tag != CONSTANT_MethodType ||
1913  method_handle_argument.tag != CONSTANT_MethodHandle ||
1914  method_type_argument.tag != CONSTANT_MethodType)
1915  {
1916  debug() << "format of BootstrapMethods entry not recognized: arguments "
1917  "wrong type"
1918  << eom;
1920  parsed_class, bootstrap_method_index, std::move(u2_values));
1921  continue;
1922  }
1923 
1924  debug() << "INFO: parse lambda handle" << eom;
1925  optionalt<lambda_method_handlet> lambda_method_handle =
1926  parse_method_handle(method_handle_infot{method_handle_argument});
1927 
1928  if(!lambda_method_handle.has_value())
1929  {
1930  debug() << "format of BootstrapMethods entry not recognized: method "
1931  "handle not recognised"
1932  << eom;
1934  parsed_class, bootstrap_method_index, std::move(u2_values));
1935  continue;
1936  }
1937 
1938  // If parse_method_handle can't parse the lambda method, it should return {}
1939  POSTCONDITION(
1940  lambda_method_handle->handle_type != method_handle_typet::UNKNOWN_HANDLE);
1941 
1942  lambda_method_handle->interface_type =
1943  pool_entry(interface_type_argument.ref1).s;
1944  lambda_method_handle->method_type = pool_entry(method_type_argument.ref1).s;
1945  lambda_method_handle->u2_values = std::move(u2_values);
1946  debug() << "lambda function reference "
1947  << id2string(lambda_method_handle->lambda_method_name)
1948  << " in class \"" << parsed_class.name << "\""
1949  << "\n interface type is "
1950  << id2string(pool_entry(interface_type_argument.ref1).s)
1951  << "\n method type is "
1952  << id2string(pool_entry(method_type_argument.ref1).s) << eom;
1953  parsed_class.add_method_handle(
1954  bootstrap_method_index, *lambda_method_handle);
1955  }
1956 }
1957 
1964  java_bytecode_parsert::classt &parsed_class,
1965  size_t bootstrap_method_index,
1966  java_bytecode_parsert::u2_valuest u2_values) const
1967 {
1968  const lambda_method_handlet lambda_method_handle =
1970  parsed_class.add_method_handle(bootstrap_method_index, lambda_method_handle);
1971 }
#define VTYPE_INFO_OBJECT
The type of an expression.
Definition: type.h:22
static lambda_method_handlet create_unknown_handle(const u2_valuest params)
java_bytecode_parse_treet::classt::lambda_method_handlet lambda_method_handlet
Fixed-width bit-vector with unsigned binary interpretation.
Definition: std_types.h:1220
void rinterfaces(classt &parsed_class)
void store_unknown_method_handle(classt &parsed_class, size_t bootstrap_method_index, u2_valuest u2_values) const
Creates an unknown method handle and puts it into the parsed_class.
Base type of functions.
Definition: std_types.h:764
const std::string & id2string(const irep_idt &d)
Definition: irep.h:43
void set_java_bytecode_index(const irep_idt &index)
#define ACC_NATIVE
#define VTYPE_INFO_LONG
void get_dependencies_from_generic_parameters(const std::string &signature, std::set< irep_idt > &refs)
Collect information about generic type parameters from a given signature.
Definition: java_types.cpp:805
java_bytecode_parse_treet::classt::u2_valuest u2_valuest
void rRuntimeAnnotation_attribute(annotationst &)
uint64_t u8
Definition: bytecode_info.h:58
#define CONSTANT_Methodref
constant_exprt to_expr() const
Definition: ieee_float.cpp:686
#define CONSTANT_MethodType
pool_entryt & pool_entry(u2 index)
void rmethod(classt &parsed_class)
std::istream * in
Definition: parser.h:26
method_handle_kindt
Correspond to the different valid values for field reference_kind From Java 8 spec 4...
#define ACC_PROTECTED
const code_typet & to_code_type(const typet &type)
Cast a generic typet to a code_typet.
Definition: std_types.h:987
typet java_int_type()
Definition: java_types.cpp:32
java_bytecode_parse_treet::annotationst annotationst
void rmethods(classt &parsed_class)
#define CONSTANT_InterfaceMethodref
struct bytecode_infot const bytecode_info[]
const symbol_typet & to_symbol_type(const typet &type)
Cast a generic typet to a symbol_typet.
Definition: std_types.h:139
An expression denoting a type.
Definition: std_expr.h:4410
java_bytecode_parse_treet parse_tree
#define ACC_PRIVATE
#define VTYPE_INFO_DOUBLE
#define CONSTANT_Integer
#define UNUSED
const componentst & components() const
Definition: std_types.h:245
#define CONSTANT_Fieldref
Represents the argument of an instruction that uses a CONSTANT_Fieldref This is used for example as a...
typet java_long_type()
Definition: java_types.cpp:42
typet & type()
Definition: expr.h:56
#define CONSTANT_Float
Corresponds to the CONSTANT_Class_info Structure Described in Java 8 specification 4...
class_infot(const pool_entryt &entry)
#define VTYPE_INFO_ITEM_NULL
#define CONSTANT_Class
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
Structure type.
Definition: std_types.h:297
uint32_t u4
Definition: bytecode_info.h:57
#define POSTCONDITION(CONDITION)
Definition: invariant.h:237
#define T_FLOAT
std::string get_name(pool_entry_lookupt pool_entry) const
Definition: parser.h:23
#define ACC_ABSTRACT
#define INVARIANT(CONDITION, REASON)
Definition: invariant.h:205
#define T_BYTE
static ieee_float_spect double_precision()
Definition: ieee_float.h:80
class_infot get_class(pool_entry_lookupt pool_entry) const
void relement_value_pairs(annotationt::element_value_pairst &)
#define CONSTANT_MethodHandle
Parser utilities.
#define CONSTANT_Long
java_bytecode_parsert::pool_entryt pool_entryt
std::vector< annotationt > annotationst
const irep_idt & id() const
Definition: irep.h:189
typet java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof...
Definition: java_types.cpp:428
java_bytecode_parse_treet::instructiont instructiont
#define ACC_FINAL
void unpack(const mp_integer &i)
Definition: ieee_float.cpp:312
A reference into the symbol table.
Definition: std_types.h:110
std::vector< pool_entryt > constant_poolt
name_and_type_infot get_name_and_type(pool_entry_lookupt pool_entry) const
#define CONSTANT_Double
#define VTYPE_INFO_UNINIT
#define CONSTANT_NameAndType
Fixed-width bit-vector with two&#39;s complement interpretation.
Definition: std_types.h:1262
void parse_local_variable_type_table(methodt &method)
Parses the local variable type table of a method.
#define CONSTANT_Utf8
nonstd::optional< T > optionalt
Definition: optional.h:35
java_bytecode_parse_treet::classt classt
API to expression classes.
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
mstreamt & error() const
Definition: message.h:302
void set_line(const irep_idt &line)
static ieee_float_spect single_precision()
Definition: ieee_float.h:74
std::vector< instructiont > instructionst
name_and_type_infot(java_bytecode_parsert::pool_entryt entry)
base_ref_infot get_reference(pool_entry_lookupt pool_entry) const
#define PRECONDITION(CONDITION)
Definition: invariant.h:230
#define VTYPE_INFO_UNINIT_THIS
#define T_DOUBLE
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
#define ACC_SYNCHRONIZED
#define ACC_SYNTHETIC
uint16_t u2
Definition: bytecode_info.h:56
#define VTYPE_INFO_TOP
base_ref_infot(pool_entryt entry)
virtual void set_message_handler(message_handlert &_message_handler)
Definition: message.h:148
void rbytecode(methodt::instructionst &)
int32_t s4
Definition: bytecode_info.h:61
#define T_LONG
void skip_bytes(std::size_t bytes)
#define CONSTANT_String
const struct_typet & to_struct_type(const typet &type)
Cast a generic typet to a struct_typet.
Definition: std_types.h:318
optionalt< class java_bytecode_parse_treet > java_bytecode_parse(std::istream &istream, message_handlert &message_handler)
void rclass_attribute(classt &parsed_class)
void rmethod_attribute(methodt &method)
#define T_INT
void get_class_refs_rec(const typet &)
#define VTYPE_INFO_FLOAT
#define ACC_ENUM
typet java_type_from_string_with_exception(const std::string &descriptor, const optionalt< std::string > &signature, const std::string &class_name)
Definition: java_types.h:538
void relement_value_pair(annotationt::element_value_pairt &)
Corresponds to the element_value structure Described in Java 8 specification 4.7.16.1 https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.16.1.
std::string get_name(pool_entry_lookupt pool_entry) const
#define T_BOOLEAN
java_bytecode_parse_treet::classt::fieldst fieldst
method_handle_kindt reference_kind
std::vector< element_value_pairt > element_value_pairst
void rfields(classt &parsed_class)
#define ACC_STATIC
std::vector< bytecodet > bytecodes
java_bytecode_parse_treet::annotationt annotationt
mstreamt & result() const
Definition: message.h:312
const char * mnemonic
Definition: bytecode_info.h:46
#define ACC_INTERFACE
uint8_t u1
Definition: bytecode_info.h:55
std::function< java_bytecode_parsert::pool_entryt &(u2)> pool_entry_lookupt
structured_pool_entryt(java_bytecode_parsert::pool_entryt entry)
Base class for all expressions.
Definition: expr.h:42
#define VTYPE_INFO_INTEGER
java_bytecode_parse_treet::methodt::instructionst instructionst
const parameterst & parameters() const
Definition: std_types.h:905
void read_bootstrapmethods_entry(classt &)
Read all entries of the BootstrapMethods attribute of a class file.
#define ACC_PUBLIC
java_bytecode_parse_treet::fieldt fieldt
#define T_SHORT
void read_verification_type_info(methodt::verification_type_infot &)
u2 get_name_and_type_index() const
std::string to_string(const string_constraintt &expr)
Used for debug printing.
#define T_CHAR
method_handle_infot(java_bytecode_parsert::pool_entryt entry)
java_bytecode_parse_treet::methodt methodt
const typet type_entry(u2 index)
#define ACC_ANNOTATION
void rcode_attribute(methodt &method)
Corresponds to the CONSTANT_NameAndType_info Structure Described in Java 8 specification 4...
Expression to hold a symbol (variable)
Definition: std_expr.h:90
void add_method_handle(size_t bootstrap_index, lambda_method_handlet handle)
int8_t s1
Definition: bytecode_info.h:59
goto_programt coverage_criteriont message_handlert & message_handler
Definition: cover.cpp:66
int16_t s2
Definition: bytecode_info.h:60
mstreamt & debug() const
Definition: message.h:332
void rRuntimeAnnotation(annotationt &)
const typet & subtype() const
Definition: type.h:33
#define DATA_INVARIANT(CONDITION, REASON)
Definition: invariant.h:257
java_bytecode_parse_treet::classt::methodst methodst
symbol_typet java_classname(const std::string &id)
Definition: java_types.cpp:668
std::string get_descriptor(pool_entry_lookupt pool_entry) const
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
bool empty() const
Definition: dstring.h:61
java_bytecode_parse_treet::classt::method_handle_typet method_handle_typet
const irept & find(const irep_namet &name) const
Definition: irep.cpp:285
#define CONSTANT_InvokeDynamic
const typet & return_type() const
Definition: std_types.h:895
static std::string read_utf8_constant(const pool_entryt &entry)
const irep_idt & get_identifier() const
Definition: std_types.h:123
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:214
optionalt< lambda_method_handlet > parse_method_handle(const class method_handle_infot &entry)
Read method handle pointed to from constant pool entry at index, return type of method handle and nam...
static format_containert< T > format(const T &o)
Definition: format.h:35
Definition: kdev_t.h:19