cprover
compile.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Compile and link source and object files.
4 
5 Author: CM Wintersteiger
6 
7 Date: June 2006
8 
9 \*******************************************************************/
10 
13 
14 #include "compile.h"
15 
16 #include <cstring>
17 #include <fstream>
18 #include <iostream>
19 
20 #include <util/cmdline.h>
21 #include <util/config.h>
22 #include <util/file_util.h>
23 #include <util/get_base_name.h>
24 #include <util/suffix.h>
25 #include <util/tempdir.h>
26 #include <util/unicode.h>
27 
28 #include <ansi-c/ansi_c_language.h>
30 
35 
36 #include <langapi/mode.h>
37 
39 
40 #define DOTGRAPHSETTINGS "color=black;" \
41  "orientation=portrait;" \
42  "fontsize=20;"\
43  "compound=true;"\
44  "size=\"30,40\";"\
45  "ratio=compress;"
46 
47 // the following are for chdir
48 
49 #if defined(__linux__) || \
50  defined(__FreeBSD_kernel__) || \
51  defined(__GNU__) || \
52  defined(__unix__) || \
53  defined(__CYGWIN__) || \
54  defined(__MACH__)
55 #include <unistd.h>
56 #endif
57 
58 #ifdef _WIN32
59 #include <direct.h>
60 #include <windows.h>
61 #define chdir _chdir
62 #define popen _popen
63 #define pclose _pclose
64 #endif
65 
70 {
72 
74 
75  // Parse command line for source and object file names
76  for(std::size_t i=0; i<_cmdline.args.size(); i++)
78  return true;
79 
80  for(std::list<std::string>::const_iterator it = libraries.begin();
81  it!=libraries.end();
82  it++)
83  {
84  if(!find_library(*it))
85  // GCC is going to complain if this doesn't exist
86  debug() << "Library not found: " << *it << " (ignoring)" << eom;
87  }
88 
89  statistics() << "No. of source files: " << source_files.size() << eom;
90  statistics() << "No. of object files: " << object_files.size() << eom;
91 
92  // Work through the given source files
93 
94  if(source_files.empty() && object_files.empty())
95  {
96  error() << "no input files" << eom;
97  return true;
98  }
99 
100  if(mode==LINK_LIBRARY && !source_files.empty())
101  {
102  error() << "cannot link source files" << eom;
103  return true;
104  }
105 
106  if(mode==PREPROCESS_ONLY && !object_files.empty())
107  {
108  error() << "cannot preprocess object files" << eom;
109  return true;
110  }
111 
112  const unsigned warnings_before=
114 
115  if(!source_files.empty())
116  if(compile())
117  return true;
118 
119  if(mode==LINK_LIBRARY ||
120  mode==COMPILE_LINK ||
122  {
123  if(link())
124  return true;
125  }
126 
127  return
130  warnings_before;
131 }
132 
133 enum class file_typet
134 {
136  UNKNOWN,
137  SOURCE_FILE,
139  THIN_ARCHIVE,
140  GOTO_BINARY,
141  ELF_OBJECT
142 };
143 
144 static file_typet detect_file_type(const std::string &file_name)
145 {
146  // first of all, try to open the file
147  std::ifstream in(file_name);
148  if(!in)
150 
151  const std::string::size_type r = file_name.rfind('.');
152 
153  const std::string ext =
154  r == std::string::npos ? "" : file_name.substr(r + 1, file_name.length());
155 
156  if(
157  ext == "c" || ext == "cc" || ext == "cp" || ext == "cpp" || ext == "CPP" ||
158  ext == "c++" || ext == "C" || ext == "i" || ext == "ii" || ext == "class" ||
159  ext == "jar" || ext == "jsil")
160  {
162  }
163 
164  char hdr[8];
165  in.get(hdr, 8);
166  if((ext == "a" || ext == "o") && strncmp(hdr, "!<thin>", 8) == 0)
168 
169  if(ext == "a")
171 
172  if(is_goto_binary(file_name))
174 
175  if(hdr[0] == 0x7f && memcmp(hdr + 1, "ELF", 3) == 0)
176  return file_typet::ELF_OBJECT;
177 
178  return file_typet::UNKNOWN;
179 }
180 
183 bool compilet::add_input_file(const std::string &file_name)
184 {
185  switch(detect_file_type(file_name))
186  {
188  warning() << "failed to open file `" << file_name
189  << "': " << std::strerror(errno) << eom;
190  return warning_is_fatal; // generously ignore unless -Werror
191 
192  case file_typet::UNKNOWN:
193  // unknown extension, not a goto binary, will silently ignore
194  debug() << "unknown file type in `" << file_name << "'" << eom;
195  return false;
196 
198  // ELF file without goto-cc section, silently ignore
199  debug() << "ELF object without goto-cc section: `" << file_name << "'"
200  << eom;
201  return false;
202 
204  source_files.push_back(file_name);
205  return false;
206 
208  return add_files_from_archive(file_name, false);
209 
211  return add_files_from_archive(file_name, true);
212 
214  object_files.push_back(file_name);
215  return false;
216  }
217 
218  UNREACHABLE;
219 }
220 
224  const std::string &file_name,
225  bool thin_archive)
226 {
227  std::stringstream cmd;
228  FILE *stream;
229 
230  std::string tstr = working_directory;
231 
232  if(!thin_archive)
233  {
234  tstr = get_temporary_directory("goto-cc.XXXXXX");
235 
236  if(tstr=="")
237  {
238  error() << "Cannot create temporary directory" << eom;
239  return true;
240  }
241 
242  tmp_dirs.push_back(tstr);
243  if(chdir(tmp_dirs.back().c_str())!=0)
244  {
245  error() << "Cannot switch to temporary directory" << eom;
246  return true;
247  }
248 
249  // unpack now
250  cmd << "ar x " << concat_dir_file(working_directory, file_name);
251 
252  stream=popen(cmd.str().c_str(), "r");
253  pclose(stream);
254 
255  cmd.clear();
256  cmd.str("");
257  }
258 
259  // add the files from "ar t"
260  cmd << "ar t " << concat_dir_file(working_directory, file_name);
261 
262  stream = popen(cmd.str().c_str(), "r");
263 
264  if(stream != nullptr)
265  {
266  std::string line;
267  int ch; // fgetc returns an int, not char
268  while((ch = fgetc(stream)) != EOF)
269  {
270  if(ch != '\n')
271  {
272  line += static_cast<char>(ch);
273  }
274  else
275  {
276  std::string t = concat_dir_file(tstr, line);
277 
278  if(is_goto_binary(t))
279  object_files.push_back(t);
280  else
281  debug() << "Object file is not a goto binary: " << line << eom;
282 
283  line = "";
284  }
285  }
286 
287  pclose(stream);
288  }
289 
290  if(!thin_archive && chdir(working_directory.c_str()) != 0)
291  error() << "Could not change back to working directory" << eom;
292 
293  return false;
294 }
295 
299 bool compilet::find_library(const std::string &name)
300 {
301  std::string tmp;
302 
303  for(std::list<std::string>::const_iterator
304  it=library_paths.begin();
305  it!=library_paths.end();
306  it++)
307  {
308  #ifdef _WIN32
309  tmp = *it + "\\lib";
310  #else
311  tmp = *it + "/lib";
312  #endif
313 
314  std::ifstream in(tmp+name+".a");
315 
316  if(in.is_open())
317  return !add_input_file(tmp+name+".a");
318  else
319  {
320  std::string libname=tmp+name+".so";
321 
322  switch(detect_file_type(libname))
323  {
325  return !add_input_file(libname);
326 
328  warning() << "Warning: Cannot read ELF library " << libname << eom;
329  return warning_is_fatal;
330 
331  default:
332  break;
333  }
334  }
335  }
336 
337  return false;
338 }
339 
343 {
344  // "compile" hitherto uncompiled functions
345  statistics() << "Compiling functions" << eom;
347 
348  // parse object files
349  for(const auto &file_name : object_files)
350  {
351  if(read_object_and_link(file_name, symbol_table,
353  return true;
354  }
355 
356  // produce entry point?
357 
359  {
360  // new symbols may have been added to a previously linked file
361  // make sure a new entry point is created that contains all
362  // static initializers
364 
367 
369  return true;
370 
371  // entry_point may (should) add some more functions.
373  }
374 
377  return true;
378 
380 }
381 
386 {
387  while(!source_files.empty())
388  {
389  std::string file_name=source_files.front();
390  source_files.pop_front();
391 
392  // Visual Studio always prints the name of the file it's doing
393  if(echo_file_name)
394  status() << file_name << eom;
395 
396  bool r=parse_source(file_name); // don't break the program!
397 
398  if(r)
399  {
400  const std::string &debug_outfile=
401  cmdline.get_value("print-rejected-preprocessed-source");
402  if(!debug_outfile.empty())
403  {
404  std::ifstream in(file_name, std::ios::binary);
405  std::ofstream out(debug_outfile, std::ios::binary);
406  out << in.rdbuf();
407  warning() << "Failed sources in " << debug_outfile << eom;
408  }
409 
410  return true; // parser/typecheck error
411  }
412 
414  {
415  // output an object file for every source file
416 
417  // "compile" functions
419 
420  std::string cfn;
421 
422  if(output_file_object=="")
423  cfn=get_base_name(file_name, true)+"."+object_file_extension;
424  else
425  cfn=output_file_object;
426 
428  return true;
429 
431  return true;
432 
433  symbol_table.clear(); // clean symbol table for next source file.
435  }
436  }
437 
438  return false;
439 }
440 
443 bool compilet::parse(const std::string &file_name)
444 {
445  if(file_name=="-")
446  return parse_stdin();
447 
448  #ifdef _MSC_VER
449  std::ifstream infile(widen(file_name));
450  #else
451  std::ifstream infile(file_name);
452  #endif
453 
454  if(!infile)
455  {
456  error() << "failed to open input file `" << file_name << "'" << eom;
457  return true;
458  }
459 
460  std::unique_ptr<languaget> languagep;
461 
462  // Using '-x', the type of a file can be overridden;
463  // otherwise, it's guessed from the extension.
464 
465  if(override_language!="")
466  {
467  if(override_language=="c++" || override_language=="c++-header")
468  languagep = get_language_from_mode(ID_cpp);
469  else
470  languagep = get_language_from_mode(ID_C);
471  }
472  else
473  languagep=get_language_from_filename(file_name);
474 
475  if(languagep==nullptr)
476  {
477  error() << "failed to figure out type of file `" << file_name << "'" << eom;
478  return true;
479  }
480 
482 
483  language_filet &lf=language_files.add_file(file_name);
484  lf.language=std::move(languagep);
485 
486  if(mode==PREPROCESS_ONLY)
487  {
488  statistics() << "Preprocessing: " << file_name << eom;
489 
490  std::ostream *os = &std::cout;
491  std::ofstream ofs;
492 
493  if(cmdline.isset('o'))
494  {
495  ofs.open(cmdline.get_value('o'));
496  os = &ofs;
497 
498  if(!ofs.is_open())
499  {
500  error() << "failed to open output file `"
501  << cmdline.get_value('o') << "'" << eom;
502  return true;
503  }
504  }
505 
506  lf.language->preprocess(infile, file_name, *os);
507  }
508  else
509  {
510  statistics() << "Parsing: " << file_name << eom;
511 
512  if(lf.language->parse(infile, file_name))
513  {
515  error() << "PARSING ERROR" << eom;
516  return true;
517  }
518  }
519 
520  lf.get_modules();
521  return false;
522 }
523 
527 {
528  ansi_c_languaget language;
529 
531 
532  statistics() << "Parsing: (stdin)" << eom;
533 
534  if(mode==PREPROCESS_ONLY)
535  {
536  std::ostream *os = &std::cout;
537  std::ofstream ofs;
538 
539  if(cmdline.isset('o'))
540  {
541  ofs.open(cmdline.get_value('o'));
542  os = &ofs;
543 
544  if(!ofs.is_open())
545  {
546  error() << "failed to open output file `"
547  << cmdline.get_value('o') << "'" << eom;
548  return true;
549  }
550  }
551 
552  language.preprocess(std::cin, "", *os);
553  }
554  else
555  {
556  if(language.parse(std::cin, ""))
557  {
559  error() << "PARSING ERROR" << eom;
560  return true;
561  }
562  }
563 
564  return false;
565 }
566 
572  const std::string &file_name,
573  const symbol_tablet &lsymbol_table,
574  goto_functionst &functions)
575 {
576  return write_bin_object_file(file_name, lsymbol_table, functions);
577 }
578 
584  const std::string &file_name,
585  const symbol_tablet &lsymbol_table,
586  goto_functionst &functions)
587 {
588  statistics() << "Writing binary format object `"
589  << file_name << "'" << eom;
590 
591  // symbols
592  statistics() << "Symbols in table: "
593  << lsymbol_table.symbols.size() << eom;
594 
595  std::ofstream outfile(file_name, std::ios::binary);
596 
597  if(!outfile.is_open())
598  {
599  error() << "Error opening file `" << file_name << "'" << eom;
600  return true;
601  }
602 
603  if(write_goto_binary(outfile, lsymbol_table, functions))
604  return true;
605 
606  unsigned cnt=function_body_count(functions);
607 
608  statistics() << "Functions: " << functions.function_map.size()
609  << "; " << cnt << " have a body." << eom;
610 
611  outfile.close();
612  wrote_object=true;
613 
614  return false;
615 }
616 
619 bool compilet::parse_source(const std::string &file_name)
620 {
621  if(parse(file_name))
622  return true;
623 
624  if(typecheck()) // we just want to typecheck this one file here
625  return true;
626 
627  if((has_suffix(file_name, ".class") ||
628  has_suffix(file_name, ".jar")) &&
629  final())
630  return true;
631 
632  // so we remove it from the list afterwards
633  language_files.remove_file(file_name);
634  return false;
635 }
636 
639 compilet::compilet(cmdlinet &_cmdline, ui_message_handlert &mh, bool Werror):
640  language_uit(_cmdline, mh),
641  ns(symbol_table),
642  cmdline(_cmdline),
643  warning_is_fatal(Werror)
644 {
646  echo_file_name=false;
647  wrote_object=false;
649 }
650 
654 {
655  // clean up temp dirs
656 
657  for(std::list<std::string>::const_iterator it = tmp_dirs.begin();
658  it!=tmp_dirs.end();
659  it++)
660  delete_directory(*it);
661 }
662 
663 unsigned compilet::function_body_count(const goto_functionst &functions) const
664 {
665  int fbs=0;
666 
667  for(goto_functionst::function_mapt::const_iterator it=
668  functions.function_map.begin();
669  it != functions.function_map.end();
670  it++)
671  if(it->second.body_available())
672  fbs++;
673 
674  return fbs;
675 }
676 
678 {
679  config.ansi_c.defines.push_back("__GOTO_CC_VERSION__=" CBMC_VERSION);
680 }
681 
683 {
685 
686  // the compilation may add symbols!
687 
689 
690  while(before!=symbol_table.symbols.size())
691  {
692  before=symbol_table.symbols.size();
693 
694  typedef std::set<irep_idt> symbols_sett;
695  symbols_sett symbols;
696 
697  for(const auto &named_symbol : symbol_table.symbols)
698  symbols.insert(named_symbol.first);
699 
700  // the symbol table iterators aren't stable
701  for(symbols_sett::const_iterator
702  it=symbols.begin();
703  it!=symbols.end();
704  ++it)
705  {
706  symbol_tablet::symbolst::const_iterator s_it=
707  symbol_table.symbols.find(*it);
708  assert(s_it!=symbol_table.symbols.end());
709 
710  if(s_it->second.type.id()==ID_code &&
711  !s_it->second.is_macro &&
712  !s_it->second.is_type &&
713  s_it->second.value.id()!="compiled" &&
714  s_it->second.value.is_not_nil())
715  {
716  debug() << "Compiling " << s_it->first << eom;
717  converter.convert_function(s_it->first, dest.function_map[s_it->first]);
718  symbol_table.get_writeable_ref(*it).value=exprt("compiled");
719  }
720  }
721  }
722 }
723 
725 {
726  for(const auto &pair : symbol_table.symbols)
727  {
728  const irep_idt &name=pair.second.name;
729  const typet &new_type=pair.second.type;
730  if(!(has_prefix(id2string(name), CPROVER_PREFIX) && new_type.id()==ID_code))
731  continue;
732 
733  bool inserted;
734  std::map<irep_idt, symbolt>::iterator old;
735  std::tie(old, inserted)=written_macros.insert({name, pair.second});
736 
737  if(!inserted && old->second.type!=new_type)
738  {
739  error() << "Incompatible CPROVER macro symbol types:" << eom
740  << old->second.type.pretty() << "(at " << old->second.location
741  << ")" << eom << "and" << eom << new_type.pretty()
742  << "(at " << pair.second.location << ")" << eom;
743  return true;
744  }
745  }
746  return false;
747 }
std::string concat_dir_file(const std::string &directory, const std::string &file_name)
Definition: file_util.cpp:134
The type of an expression.
Definition: type.h:22
symbol_tablet symbol_table
Definition: language_ui.h:25
bool add_input_file(const std::string &)
puts input file names into a list and does preprocessing for libraries.
Definition: compile.cpp:183
struct configt::ansi_ct ansi_c
std::string output_file_executable
Definition: compile.h:48
compilet(cmdlinet &_cmdline, ui_message_handlert &mh, bool Werror)
constructor
Definition: compile.cpp:639
static int8_t r
Definition: irep_hash.h:59
const std::string & id2string(const irep_idt &d)
Definition: irep.h:43
Globally accessible architectural configuration.
Definition: config.h:24
virtual bool parse()
Definition: language_ui.cpp:38
virtual void clear() override
Definition: symbol_table.h:97
static file_typet detect_file_type(const std::string &file_name)
Definition: compile.cpp:144
std::wstring widen(const char *s)
Definition: unicode.cpp:56
std::string pretty(unsigned indent=0, unsigned max_indent=0) const
Definition: irep.cpp:641
#define CPROVER_PREFIX
std::unique_ptr< languaget > get_language_from_mode(const irep_idt &mode)
Get the language corresponding to the given mode.
Definition: mode.cpp:50
uit get_ui()
Definition: language_ui.h:48
std::list< std::string > tmp_dirs
Definition: compile.h:44
cmdlinet & cmdline
Definition: compile.h:94
std::list< std::string > libraries
Definition: compile.h:43
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
std::list< std::string > defines
Definition: config.h:112
enum compilet::@2 mode
bool is_goto_binary(const std::string &filename)
Read Goto Programs.
std::list< std::string > library_paths
Definition: compile.h:40
language_filest language_files
Definition: language_ui.h:24
language_filet & add_file(const std::string &filename)
Definition: language_file.h:76
exprt value
Initial value of symbol.
Definition: symbol.h:37
std::string get_value(char option) const
Definition: cmdline.cpp:45
virtual bool typecheck()
Definition: language_ui.cpp:92
function_mapt function_map
const cmdlinet & _cmdline
Definition: language_ui.h:54
unsignedbv_typet size_type()
Definition: c_types.cpp:58
bool doit()
reads and source and object files, compiles and links them into goto program objects.
Definition: compile.cpp:69
void convert_function(const irep_idt &identifier, goto_functionst::goto_functiont &result)
std::unique_ptr< languaget > get_language_from_filename(const std::string &filename)
Get the language corresponding to the registered file name extensions.
Definition: mode.cpp:101
bool find_library(const std::string &)
tries to find a library object file that matches the given library name.
Definition: compile.cpp:299
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
bool parse(std::istream &instream, const std::string &path) override
configt config
Definition: config.cpp:23
std::string get_base_name(const std::string &in, bool strip_suffix)
cleans a filename from path and extension
file_typet
Definition: compile.cpp:133
bool write_bin_object_file(const std::string &, const symbol_tablet &, goto_functionst &)
writes the goto functions in the function table to a binary format object file.
Definition: compile.cpp:583
std::string get_current_working_directory()
Definition: file_util.cpp:45
bool parse_stdin()
parses a source file (low-level parsing)
Definition: compile.cpp:526
mstreamt & warning() const
Definition: message.h:307
std::unique_ptr< languaget > language
Definition: language_file.h:46
const irep_idt & id() const
Definition: irep.h:189
argst args
Definition: cmdline.h:37
virtual bool isset(char option) const
Definition: cmdline.cpp:27
bool compile()
parses source files and writes object files, or keeps the symbols in the symbol_table depending on th...
Definition: compile.cpp:385
bool write_goto_binary(std::ostream &out, const goto_modelt &goto_model, int version)
Writes a goto program to disc.
bool preprocess(std::istream &instream, const std::string &path, std::ostream &outstream) override
ANSI-C preprocessing.
bool warning_is_fatal
Definition: compile.h:95
bool parse_source(const std::string &)
parses a source file
Definition: compile.cpp:619
#define INITIALIZE_FUNCTION
goto_functionst compiled_functions
Definition: compile.h:27
void delete_directory(const std::string &path)
deletes all files in &#39;path&#39; and then the directory itself
Definition: file_util.cpp:95
The symbol table.
Definition: symbol_table.h:19
mstreamt & error() const
Definition: message.h:302
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
virtual void set_message_handler(message_handlert &_message_handler)
Definition: message.h:148
bool ansi_c_entry_point(symbol_tablet &symbol_table, message_handlert &message_handler)
bool wrote_object
Definition: compile.h:111
bool echo_file_name
Definition: compile.h:28
const symbolst & symbols
std::string working_directory
Definition: compile.h:29
unsigned function_body_count(const goto_functionst &) const
Definition: compile.cpp:663
bool write_object_file(const std::string &, const symbol_tablet &, goto_functionst &)
writes the goto functions in the function table to a binary format object file.
Definition: compile.cpp:571
std::map< irep_idt, symbolt > written_macros
Definition: compile.h:104
Compile and link source and object files.
bool add_files_from_archive(const std::string &file_name, bool thin_archive)
extracts goto binaries from AR archive and add them as input files.
Definition: compile.cpp:223
message_handlert & get_message_handler()
Definition: message.h:153
Goto Programs with Functions.
static irep_idt entry_point()
std::string get_temporary_directory(const std::string &name_template)
Definition: tempdir.cpp:32
mstreamt & status() const
Definition: message.h:317
void convert_symbols(goto_functionst &dest)
Definition: compile.cpp:682
bool remove(const irep_idt &name)
Remove a symbol from the symbol table.
Base class for all expressions.
Definition: expr.h:42
#define UNREACHABLE
Definition: invariant.h:250
Program Transformation.
std::string object_file_extension
Definition: compile.h:47
std::list< std::string > source_files
Definition: compile.h:41
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:15
std::list< std::string > object_files
Definition: compile.h:42
mstreamt & debug() const
Definition: message.h:332
void remove_file(const std::string &filename)
Definition: language_file.h:82
bool link()
parses object files and links them
Definition: compile.cpp:342
void add_compiler_specific_defines(class configt &config) const
Definition: compile.cpp:677
bool read_object_and_link(const std::string &file_name, goto_modelt &dest, message_handlert &message_handler)
reads an object file
std::string override_language
Definition: compile.h:30
Write GOTO binaries.
bool add_written_cprover_symbols(const symbol_tablet &symbol_table)
Definition: compile.cpp:724
std::string output_file_object
Definition: compile.h:48
mstreamt & statistics() const
Definition: message.h:322
~compilet()
cleans up temporary files
Definition: compile.cpp:653
unsigned get_message_count(unsigned level) const
Definition: message.h:68