cprover
c_preprocess.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "c_preprocess.h"
10 
11 #include <util/c_types.h>
12 #include <util/config.h>
13 #include <util/message.h>
14 #include <util/prefix.h>
15 #include <util/run.h>
16 #include <util/suffix.h>
17 #include <util/tempfile.h>
18 
19 #ifdef _MSC_VER
20 # include <util/unicode.h>
21 #endif
22 
23 #include <fstream>
24 
25 static void error_parse_line(
26  const std::string &line,
27  bool warning_only,
28  messaget &message)
29 {
30  std::string error_msg=line;
31  source_locationt saved_error_location;
32 
33  if(has_prefix(line, "file "))
34  {
35  const char *tptr=line.c_str();
36  int state=0;
37  std::string file, line_no, column, _error_msg, function;
38 
39  tptr+=5;
40 
41  char previous=0;
42 
43  while(*tptr!=0)
44  {
45  if(has_prefix(tptr, " line ") && state != 4)
46  {
47  state=1;
48  tptr+=6;
49  continue;
50  }
51  else if(has_prefix(tptr, " column ") && state != 4)
52  {
53  state=2;
54  tptr+=8;
55  continue;
56  }
57  else if(has_prefix(tptr, " function ") && state != 4)
58  {
59  state=3;
60  tptr+=10;
61  continue;
62  }
63  else if(*tptr==':' && state!=4)
64  {
65  if(tptr[1]==' ' && previous!=':')
66  {
67  state=4;
68  tptr++;
69  while(*tptr==' ') tptr++;
70  continue;
71  }
72  }
73 
74  if(state==0) // file
75  file+=*tptr;
76  else if(state==1) // line number
77  line_no+=*tptr;
78  else if(state==2) // column
79  column+=*tptr;
80  else if(state==3) // function
81  function+=*tptr;
82  else if(state==4) // error message
83  _error_msg+=*tptr;
84 
85  previous=*tptr;
86 
87  tptr++;
88  }
89 
90  if(state==4)
91  {
92  saved_error_location.set_file(file);
93  saved_error_location.set_function(function);
94  saved_error_location.set_line(line_no);
95  saved_error_location.set_column(column);
96  error_msg=_error_msg;
97  }
98  }
99  else if(has_prefix(line, "In file included from "))
100  {
101  }
102  else
103  {
104  const char *tptr=line.c_str();
105  int state=0;
106  std::string file, line_no;
107 
108  while(*tptr!=0)
109  {
110  if(state==0)
111  {
112  if(*tptr==':')
113  state++;
114  else
115  file+=*tptr;
116  }
117  else if(state==1)
118  {
119  if(*tptr==':')
120  state++;
121  else if(isdigit(*tptr))
122  line_no+=*tptr;
123  else
124  state=3;
125  }
126 
127  tptr++;
128  }
129 
130  if(state==2)
131  {
132  saved_error_location.set_file(file);
133  saved_error_location.set_function(irep_idt());
134  saved_error_location.set_line(line_no);
135  saved_error_location.set_column(irep_idt());
136  }
137  }
138 
140  warning_only ? message.warning() : message.error();
141  m.source_location=saved_error_location;
142  m << error_msg << messaget::eom;
143 }
144 
145 static void error_parse(
146  std::istream &errors,
147  bool warning_only,
148  messaget &message)
149 {
150  std::string line;
151 
152  while(std::getline(errors, line))
153  error_parse_line(line, warning_only, message);
154 }
155 
158  std::istream &instream,
159  std::ostream &outstream,
160  message_handlert &message_handler)
161 {
162  temporary_filet tmp_file("tmp.stdin", ".c");
163 
164  std::ofstream tmp(tmp_file());
165 
166  if(!tmp)
167  {
168  messaget message(message_handler);
169  message.error() << "failed to open temporary file" << messaget::eom;
170  return true; // error
171  }
172 
173  tmp << instream.rdbuf(); // copy
174 
175  tmp.close(); // flush
176 
177  bool result=c_preprocess(tmp_file(), outstream, message_handler);
178 
179  return result;
180 }
181 
183 static bool is_dot_i_file(const std::string &path)
184 {
185  return has_suffix(path, ".i") || has_suffix(path, ".ii");
186 }
187 
190  const std::string &, std::ostream &, message_handlert &);
191 bool c_preprocess_arm(
192  const std::string &, std::ostream &, message_handlert &);
194  const std::string &,
195  std::ostream &,
198 bool c_preprocess_none(
199  const std::string &, std::ostream &, message_handlert &);
201  const std::string &, std::ostream &, message_handlert &);
202 
204  const std::string &path,
205  std::ostream &outstream,
206  message_handlert &message_handler)
207 {
208  switch(config.ansi_c.preprocessor)
209  {
211  return c_preprocess_codewarrior(path, outstream, message_handler);
212 
214  return
216  path, outstream, message_handler, config.ansi_c.preprocessor);
217 
219  return
221  path, outstream, message_handler, config.ansi_c.preprocessor);
222 
224  return c_preprocess_visual_studio(path, outstream, message_handler);
225 
227  return c_preprocess_arm(path, outstream, message_handler);
228 
230  return c_preprocess_none(path, outstream, message_handler);
231  }
232 
233  // not reached
234  return true;
235 }
236 
239  const std::string &file,
240  std::ostream &outstream,
241  message_handlert &message_handler)
242 {
243  // check extension
244  if(is_dot_i_file(file))
245  return c_preprocess_none(file, outstream, message_handler);
246 
247  messaget message(message_handler);
248 
249  // use Visual Studio's CL
250 
251  temporary_filet stderr_file("tmp.stderr", "");
252  temporary_filet command_file_name("tmp.cl-cmd", "");
253 
254  {
255  std::ofstream command_file(command_file_name());
256 
257  // This marks the command file as UTF-8, which Visual Studio
258  // understands.
259  command_file << char(0xef) << char(0xbb) << char(0xbf);
260 
261  command_file << "/nologo" << '\n';
262  command_file << "/E" << '\n';
263 
264  // This option will make CL produce utf-8 output, as
265  // opposed to 8-bit with some code page.
266  // It only works on Visual Studio 2015 or newer.
267  command_file << "/source-charset:utf-8" << '\n';
268 
269  command_file << "/D__CPROVER__" << "\n";
270  command_file << "/D__WORDSIZE=" << config.ansi_c.pointer_width << "\n";
271 
273  {
274  command_file << "\"/D__PTRDIFF_TYPE__=long long int\"" << "\n";
275  // yes, both _WIN32 and _WIN64 get defined
276  command_file << "/D_WIN64" << "\n";
277  }
278  else if(config.ansi_c.int_width == 16 && config.ansi_c.pointer_width == 32)
279  {
280  // 16-bit LP32 is an artificial architecture we simulate when using --16
283  "Pointer difference expected to be long int typed");
284  command_file << "/D__PTRDIFF_TYPE__=long" << '\n';
285  }
286  else
287  {
290  "Pointer difference expected to be int typed");
291  command_file << "/D__PTRDIFF_TYPE__=int" << "\n";
292  }
293 
295  command_file << "/J" << "\n"; // This causes _CHAR_UNSIGNED to be defined
296 
297  for(const auto &define : config.ansi_c.defines)
298  command_file << "/D" << shell_quote(define) << "\n";
299 
300  for(const auto &include_path : config.ansi_c.include_paths)
301  command_file << "/I" << shell_quote(include_path) << "\n";
302 
303  for(const auto &include_file : config.ansi_c.include_files)
304  command_file << "/FI" << shell_quote(include_file) << "\n";
305 
306  // Finally, the file to be preprocessed
307  // (this is already in UTF-8).
308  command_file << shell_quote(file) << "\n";
309  }
310 
311  // _popen isn't very reliable on WIN32
312  // that's why we use run()
313  int result =
314  run("cl", {"cl", "@" + command_file_name()}, "", outstream, stderr_file());
315 
316  // errors/warnings
317  std::ifstream stderr_stream(stderr_file());
318  error_parse(stderr_stream, result==0, message);
319 
320  if(result!=0)
321  {
322  message.error() << "CL Preprocessing failed" << messaget::eom;
323  return true;
324  }
325 
326  return false;
327 }
328 
331  std::istream &instream,
332  std::ostream &outstream)
333 {
334  // CodeWarrior prepends some header to the file,
335  // marked with '#' signs.
336  // We skip over it.
337  //
338  // CodeWarrior has an ugly way of marking lines, e.g.:
339  //
340  // /* #line 1 "__ppc_eabi_init.cpp" /* stack depth 0 */
341  //
342  // We remove the initial '/* ' prefix
343 
344  std::string line;
345 
346  while(instream)
347  {
348  std::getline(instream, line);
349 
350  if(line.size()>=2 &&
351  line[0]=='#' && (line[1]=='#' || line[1]==' ' || line[1]=='\t'))
352  {
353  // skip the line!
354  }
355  else if(line.size()>=3 &&
356  line[0]=='/' && line[1]=='*' && line[2]==' ')
357  {
358  outstream << line.c_str()+3 << "\n"; // strip the '/* '
359  }
360  else
361  outstream << line << "\n";
362  }
363 }
364 
367  const std::string &file,
368  std::ostream &outstream,
369  message_handlert &message_handler)
370 {
371  // check extension
372  if(is_dot_i_file(file))
373  return c_preprocess_none(file, outstream, message_handler);
374 
375  // preprocessing
376  messaget message(message_handler);
377 
378  temporary_filet stderr_file("tmp.stderr", "");
379 
380  std::vector<std::string> command = {
381  "mwcceppc", "-E", "-P", "-D__CPROVER__", "-ppopt", "line", "-ppopt full"};
382 
383  for(const auto &define : config.ansi_c.defines)
384  command.push_back(" -D" + define);
385 
386  for(const auto &include_path : config.ansi_c.include_paths)
387  command.push_back(" -I" + include_path);
388 
389  for(const auto &include_file : config.ansi_c.include_files)
390  {
391  command.push_back(" -include");
392  command.push_back(include_file);
393  }
394 
395  for(const auto &opt : config.ansi_c.preprocessor_options)
396  command.push_back(opt);
397 
398  temporary_filet tmpi("tmp.cl", "");
399  command.push_back(file);
400  command.push_back("-o");
401  command.push_back(tmpi());
402 
403  int result = run(command[0], command, "", "", stderr_file());
404 
405  std::ifstream stream_i(tmpi());
406 
407  if(stream_i)
408  {
409  postprocess_codewarrior(stream_i, outstream);
410 
411  stream_i.close();
412  }
413  else
414  {
415  message.error() << "Preprocessing failed (fopen failed)"
416  << messaget::eom;
417  return true;
418  }
419 
420  // errors/warnings
421  std::ifstream stderr_stream(stderr_file());
422  error_parse(stderr_stream, result==0, message);
423 
424  if(result!=0)
425  {
426  message.error() << "Preprocessing failed" << messaget::eom;
427  return true;
428  }
429 
430  return false;
431 }
432 
435  const std::string &file,
436  std::ostream &outstream,
437  message_handlert &message_handler,
438  configt::ansi_ct::preprocessort preprocessor)
439 {
440  // check extension
441  if(is_dot_i_file(file))
442  return c_preprocess_none(file, outstream, message_handler);
443 
444  // preprocessing
445  messaget message(message_handler);
446 
447  temporary_filet stderr_file("tmp.stderr", "");
448 
449  std::vector<std::string> argv;
450 
452  argv.push_back("clang");
453  else
454  argv.push_back("gcc");
455 
456  argv.push_back("-E");
457  argv.push_back("-D__CPROVER__");
458 
459  const irep_idt &arch = config.ansi_c.arch;
460 
461  if(config.ansi_c.pointer_width == 16)
462  {
463  if(arch == "i386" || arch == "x86_64" || arch == "x32")
464  argv.push_back("-m16");
465  else if(has_prefix(id2string(arch), "mips"))
466  argv.push_back("-mips16");
467  }
468  else if(config.ansi_c.pointer_width == 32)
469  {
470  if(arch == "i386" || arch == "x86_64")
471  argv.push_back("-m32");
472  else if(arch == "x32")
473  argv.push_back("-mx32");
474  else if(has_prefix(id2string(arch), "mips"))
475  argv.push_back("-mabi=32");
476  else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
477  argv.push_back("-m32");
478  else if(arch == "s390" || arch == "s390x")
479  argv.push_back("-m31"); // yes, 31, not 32!
480  else if(arch == "sparc" || arch == "sparc64")
481  argv.push_back("-m32");
482  }
483  else if(config.ansi_c.pointer_width == 64)
484  {
485  if(arch == "i386" || arch == "x86_64" || arch == "x32")
486  argv.push_back("-m64");
487  else if(has_prefix(id2string(arch), "mips"))
488  argv.push_back("-mabi=64");
489  else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
490  argv.push_back("-m64");
491  else if(arch == "s390" || arch == "s390x")
492  argv.push_back("-m64");
493  else if(arch == "sparc" || arch == "sparc64")
494  argv.push_back("-m64");
495  }
496 
497  // The width of wchar_t depends on the OS!
499  argv.push_back("-fshort-wchar");
500 
502  argv.push_back("-funsigned-char");
503 
505  argv.push_back("-nostdinc");
506 
507  // Set the standard
508  if(
509  has_suffix(file, ".cpp") || has_suffix(file, ".CPP") ||
510 #ifndef _WIN32
511  has_suffix(file, ".C") ||
512 #endif
513  has_suffix(file, ".c++") || has_suffix(file, ".C++") ||
514  has_suffix(file, ".cp") || has_suffix(file, ".CP") ||
515  has_suffix(file, ".cc") || has_suffix(file, ".cxx"))
516  {
517  switch(config.cpp.cpp_standard)
518  {
520 #if defined(__OpenBSD__)
521  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
522  argv.push_back("-std=c++98");
523  else
524 #endif
525  argv.push_back("-std=gnu++98");
526  break;
527 
529 #if defined(__OpenBSD__)
530  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
531  argv.push_back("-std=c++03");
532  else
533 #endif
534  argv.push_back("-std=gnu++03");
535  break;
536 
538 #if defined(__OpenBSD__)
539  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
540  argv.push_back("-std=c++11");
541  else
542 #endif
543  argv.push_back("-std=gnu++11");
544  break;
545 
547 #if defined(__OpenBSD__)
548  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
549  argv.push_back("-std=c++14");
550  else
551 #endif
552  argv.push_back("-std=gnu++14");
553  break;
554  }
555  }
556  else
557  {
558  switch(config.ansi_c.c_standard)
559  {
561 #if defined(__OpenBSD__)
562  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
563  argv.push_back("-std=c89");
564  else
565 #endif
566  argv.push_back("-std=gnu89");
567  break;
568 
570 #if defined(__OpenBSD__)
571  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
572  argv.push_back("-std=c99");
573  else
574 #endif
575  argv.push_back("-std=gnu99");
576  break;
577 
579 #if defined(__OpenBSD__)
580  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
581  argv.push_back("-std=c11");
582  else
583 #endif
584  argv.push_back("-std=gnu11");
585  break;
586  }
587  }
588 
589  for(const auto &define : config.ansi_c.defines)
590  argv.push_back("-D" + define);
591 
592  for(const auto &include_path : config.ansi_c.include_paths)
593  argv.push_back("-I" + include_path);
594 
595  for(const auto &include_file : config.ansi_c.include_files)
596  {
597  argv.push_back("-include");
598  argv.push_back(include_file);
599  }
600 
601  for(const auto &opt : config.ansi_c.preprocessor_options)
602  argv.push_back(opt);
603 
604  int result;
605 
606  #if 0
607  // the following forces the mode
608  switch(config.ansi_c.mode)
609  {
610  case configt::ansi_ct::flavourt::GCC_C: command+=" -x c"; break;
611  case configt::ansi_ct::flavourt::GCC_CPP: command+=" -x c++"; break;
612  default:
613  {
614  }
615  }
616  #endif
617 
618  // the file that is to be preprocessed
619  argv.push_back(file);
620 
621  // execute clang or gcc
622  result = run(argv[0], argv, "", outstream, stderr_file());
623 
624  // errors/warnings
625  std::ifstream stderr_stream(stderr_file());
626  error_parse(stderr_stream, result==0, message);
627 
628  if(result!=0)
629  {
630  message.error() << "GCC preprocessing failed" << messaget::eom;
631  return true;
632  }
633 
634  return false;
635 }
636 
639  const std::string &file,
640  std::ostream &outstream,
641  message_handlert &message_handler)
642 {
643  // check extension
644  if(is_dot_i_file(file))
645  return c_preprocess_none(file, outstream, message_handler);
646 
647  // preprocessing using armcc
648  messaget message(message_handler);
649 
650  temporary_filet stderr_file("tmp.stderr", "");
651 
652  std::vector<std::string> argv;
653 
654  argv.push_back("armcc");
655  argv.push_back("-E");
656  argv.push_back("-D__CPROVER__");
657 
659  argv.push_back("--bigend");
660  else
661  argv.push_back("--littleend");
662 
664  argv.push_back("--unsigned_chars");
665  else
666  argv.push_back("--signed_chars");
667 
668  // Set the standard
669  switch(config.ansi_c.c_standard)
670  {
672  argv.push_back("--c90");
673  break;
674 
677  argv.push_back("--c99");
678  break;
679  }
680 
681  for(const auto &define : config.ansi_c.defines)
682  argv.push_back("-D" + define);
683 
684  for(const auto &include_path : config.ansi_c.include_paths)
685  argv.push_back("-I" + include_path);
686 
687  // the file that is to be preprocessed
688  argv.push_back(file);
689 
690  int result;
691 
692  // execute armcc
693  result = run(argv[0], argv, "", outstream, stderr_file());
694 
695  // errors/warnings
696  std::ifstream stderr_stream(stderr_file());
697  error_parse(stderr_stream, result==0, message);
698 
699  if(result!=0)
700  {
701  message.error() << "ARMCC preprocessing failed" << messaget::eom;
702  return true;
703  }
704 
705  return false;
706 }
707 
710  const std::string &file,
711  std::ostream &outstream,
712  message_handlert &message_handler)
713 {
714  #ifdef _MSC_VER
715  std::ifstream infile(widen(file));
716  #else
717  std::ifstream infile(file);
718  #endif
719 
720  if(!infile)
721  {
722  messaget message(message_handler);
723  message.error() << "failed to open '" << file << "'" << messaget::eom;
724  return true;
725  }
726 
728  {
729  // special treatment for "/* #line"
730  postprocess_codewarrior(infile, outstream);
731  }
732  else
733  {
734  char ch;
735 
736  while(infile.read(&ch, 1))
737  outstream << ch;
738  }
739 
740  return false;
741 }
742 
744 const char c_test_program[]=
745  "#include <stdlib.h>\n"
746  "\n"
747  "int main() { }\n";
748 
749 bool test_c_preprocessor(message_handlert &message_handler)
750 {
751  std::ostringstream out;
752  std::istringstream in(c_test_program);
753 
754  return c_preprocess(in, out, message_handler);
755 }
bool c_preprocess_visual_studio(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
bool c_preprocess_arm(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
bool c_preprocess_none(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
static void error_parse(std::istream &errors, bool warning_only, messaget &message)
const char c_test_program[]
tests ANSI-C preprocessing
bool c_preprocess_codewarrior(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
static void error_parse_line(const std::string &line, bool warning_only, messaget &message)
void postprocess_codewarrior(std::istream &instream, std::ostream &outstream)
post-processing specifically for CodeWarrior
bool c_preprocess(std::istream &instream, std::ostream &outstream, message_handlert &message_handler)
ANSI-C preprocessing.
bool test_c_preprocessor(message_handlert &message_handler)
bool c_preprocess_gcc_clang(const std::string &, std::ostream &, message_handlert &, configt::ansi_ct::preprocessort)
ANSI-C preprocessing.
static bool is_dot_i_file(const std::string &path)
ANSI-C preprocessing.
signedbv_typet signed_long_int_type()
Definition: c_types.cpp:80
signedbv_typet signed_int_type()
Definition: c_types.cpp:30
signedbv_typet pointer_diff_type()
Definition: c_types.cpp:228
signedbv_typet signed_long_long_int_type()
Definition: c_types.cpp:87
struct configt::cppt cpp
struct configt::ansi_ct ansi_c
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
source_locationt source_location
Definition: message.h:247
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
mstreamt & error() const
Definition: message.h:399
mstreamt & warning() const
Definition: message.h:404
static eomt eom
Definition: message.h:297
void set_column(const irep_idt &column)
void set_file(const irep_idt &file)
void set_line(const irep_idt &line)
void set_function(const irep_idt &function)
configt config
Definition: config.cpp:25
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
dstringt irep_idt
Definition: irep.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:49
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
std::string shell_quote(const std::string &src)
quote a string for bash and CMD
Definition: run.cpp:451
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:510
std::list< std::string > include_paths
Definition: config.h:204
enum configt::ansi_ct::c_standardt c_standard
endiannesst endianness
Definition: config.h:156
std::size_t pointer_width
Definition: config.h:116
std::list< std::string > include_files
Definition: config.h:205
irep_idt arch
Definition: config.h:164
std::list< std::string > preprocessor_options
Definition: config.h:203
std::list< std::string > defines
Definition: config.h:201
std::size_t wchar_t_width
Definition: config.h:120
preprocessort preprocessor
Definition: config.h:199
bool char_is_unsigned
Definition: config.h:123
std::size_t short_int_width
Definition: config.h:114
flavourt mode
Definition: config.h:195
std::size_t int_width
Definition: config.h:110
enum configt::cppt::cpp_standardt cpp_standard
Definition: kdev_t.h:19
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:17
std::wstring widen(const char *s)
Definition: unicode.cpp:48