cprover
unified_diff.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Unified diff (using LCSS) of goto functions
4 
5 Author: Michael Tautschnig
6 
7 Date: April 2016
8 
9 \*******************************************************************/
10 
13 
14 #include "unified_diff.h"
15 
16 #include <algorithm>
17 
19 
21  const goto_modelt &model_old,
22  const goto_modelt &model_new)
23  : old_goto_functions(model_old.goto_functions),
24  ns_old(model_old.symbol_table),
25  new_goto_functions(model_new.goto_functions),
26  ns_new(model_new.symbol_table)
27 {
28 }
29 
31 unified_difft::get_diff(const irep_idt &function) const
32 {
33  differences_mapt::const_iterator entry = differences_map_.find(function);
34  if(entry == differences_map_.end())
35  return {};
36 
37  goto_functionst::function_mapt::const_iterator old_fit =
38  old_goto_functions.function_map.find(function);
39  goto_functionst::function_mapt::const_iterator new_fit =
40  new_goto_functions.function_map.find(function);
41 
42  goto_programt empty;
43 
44  const goto_programt &old_goto_program =
45  old_fit == old_goto_functions.function_map.end() ? empty
46  : old_fit->second.body;
47  const goto_programt &new_goto_program =
48  new_fit == new_goto_functions.function_map.end() ? empty
49  : new_fit->second.body;
50 
51  return get_diff(old_goto_program, new_goto_program, entry->second);
52 }
53 
55  const goto_programt &old_goto_program,
56  const goto_programt &new_goto_program,
57  const differencest &differences)
58 {
59  goto_programt::instructionst::const_iterator old_it =
60  old_goto_program.instructions.begin();
61  goto_programt::instructionst::const_iterator new_it =
62  new_goto_program.instructions.begin();
63 
64  goto_program_difft dest;
65 
66  for(differencest::const_reverse_iterator rit = differences.rbegin();
67  rit != differences.rend();
68  ++rit)
69  {
70  switch(*rit)
71  {
72  case differencet::SAME:
73  dest.push_back(std::make_pair(new_it, differencet::SAME));
74  INVARIANT(
75  old_it != old_goto_program.instructions.end(),
76  "old iterator reached the final goto instruction");
77  ++old_it;
78  INVARIANT(
79  new_it != new_goto_program.instructions.end(),
80  "new iterator reached the final goto instruction");
81  ++new_it;
82  break;
84  dest.push_back(std::make_pair(old_it, differencet::DELETED));
85  INVARIANT(
86  old_it != old_goto_program.instructions.end(),
87  "old iterator reached the final goto instruction");
88  ++old_it;
89  break;
90  case differencet::NEW:
91  dest.push_back(std::make_pair(new_it, differencet::NEW));
92  INVARIANT(
93  new_it != new_goto_program.instructions.end(),
94  "new iterator reached the final goto instruction");
95  ++new_it;
96  break;
97  }
98  }
99 
100  return dest;
101 }
102 
104  const irep_idt &identifier,
105  const goto_programt &old_goto_program,
106  const goto_programt &new_goto_program,
107  const differencest &differences,
108  std::ostream &os) const
109 {
110  goto_program_difft diff =
111  get_diff(old_goto_program, new_goto_program, differences);
112 
113  bool has_diff = false;
114  for(const auto &d : diff)
115  {
116  if(d.second != differencet::SAME)
117  {
118  has_diff = true;
119  break;
120  }
121  }
122  if(!has_diff)
123  return;
124 
125  os << "/** " << identifier << " **/\n";
126 
127  for(const auto &d : diff)
128  {
129  switch(d.second)
130  {
131  case differencet::SAME:
132  os << ' ';
133  new_goto_program.output_instruction(ns_new, identifier, os, *d.first);
134  break;
136  os << '-';
137  old_goto_program.output_instruction(ns_old, identifier, os, *d.first);
138  break;
139  case differencet::NEW:
140  os << '+';
141  new_goto_program.output_instruction(ns_new, identifier, os, *d.first);
142  break;
143  }
144  }
145 }
146 
148  const irep_idt &identifier,
149  const goto_programt &old_goto_program,
150  const goto_programt &new_goto_program)
151 {
152  std::size_t old_count = old_goto_program.instructions.size();
153  std::size_t new_count = new_goto_program.instructions.size();
154 
155  differencest differences;
156  differences.reserve(old_count + new_count);
157 
158  // skip common prefix
159  goto_programt::instructionst::const_iterator old_it =
160  old_goto_program.instructions.begin();
161  goto_programt::instructionst::const_iterator new_it =
162  new_goto_program.instructions.begin();
163 
164  for(; old_it != old_goto_program.instructions.end() &&
165  new_it != new_goto_program.instructions.end();
166  ++old_it, ++new_it)
167  {
168  if(!instructions_equal(*old_it, *new_it))
169  break;
170 
171  --old_count;
172  --new_count;
173  }
174  // old_it, new_it are now iterators to the first instructions that
175  // differ
176 
177  // skip common suffix
178  goto_programt::instructionst::const_iterator old_rit =
179  old_goto_program.instructions.end();
180  goto_programt::instructionst::const_iterator new_rit =
181  new_goto_program.instructions.end();
182 
183  while(old_rit != old_it && new_rit != new_it)
184  {
185  --old_rit;
186  --new_rit;
187 
188  if(!instructions_equal(*old_rit, *new_rit))
189  {
190  ++old_rit;
191  ++new_rit;
192  break;
193  }
194 
195  --old_count;
196  --new_count;
197  differences.push_back(differencet::SAME);
198  }
199  // old_rit, new_rit are now iterators to the first instructions of
200  // the common tail
201 
202  if(old_count == 0 && new_count == 0)
203  return differences;
204 
205  // apply longest common subsequence (LCSS)
206  typedef std::vector<std::vector<std::size_t>> lcss_matrixt;
207  lcss_matrixt lcss_matrix(
208  old_count + 1, std::vector<size_t>(new_count + 1, 0));
209 
210  // fill the matrix
211  std::size_t i = 1, j = 1;
212  for(goto_programt::instructionst::const_iterator old_it2 = old_it;
213  old_it2 != old_rit;
214  ++old_it2)
215  {
216  j = 1;
217  for(goto_programt::instructionst::const_iterator new_it2 = new_it;
218  new_it2 != new_rit;
219  ++new_it2)
220  {
221  if(instructions_equal(*old_it2, *new_it2))
222  lcss_matrix[i][j] += lcss_matrix[i - 1][j - 1] + 1;
223  else
224  lcss_matrix[i][j] =
225  std::max(lcss_matrix[i][j - 1], lcss_matrix[i - 1][j]);
226 
227  ++j;
228  }
229  ++i;
230  }
231 
232 #if 0
233  std::cerr << "old_count=" << old_count << '\n';
234  std::cerr << "new_count=" << new_count << '\n';
235  for(i=0; i<=old_count; ++i)
236  {
237  for(j=0; j<=new_count; ++j)
238  {
239  std::cerr << " ";
240  if(lcss_matrix[i][j]<10)
241  std::cerr << " ";
242  std::cerr << lcss_matrix[i][j];
243  }
244  std::cerr << '\n';
245  }
246 #endif
247 
248  // backtracking
249  // note that the order in case of multiple possible matches of
250  // the if-clauses is important to provide a convenient ordering
251  // of - and + lines (- goes before +)
252  i = old_count;
253  j = new_count;
254  --old_rit;
255  --new_rit;
256 
257  while(i > 0 || j > 0)
258  {
259  if(i == 0)
260  {
261  differences.push_back(differencet::NEW);
262  --j;
263  if(new_goto_program.instructions.begin()!=new_rit)
264  --new_rit;
265  }
266  else if(j == 0)
267  {
268  differences.push_back(differencet::DELETED);
269  --i;
270  if(old_goto_program.instructions.begin()!=old_rit)
271  --old_rit;
272  }
273  else if(instructions_equal(*old_rit, *new_rit))
274  {
275  differences.push_back(differencet::SAME);
276  --i;
277  if(old_goto_program.instructions.begin()!=old_rit)
278  --old_rit;
279  --j;
280  if(new_goto_program.instructions.begin()!=new_rit)
281  --new_rit;
282  }
283  else if(lcss_matrix[i][j - 1] < lcss_matrix[i][j])
284  {
285  differences.push_back(differencet::DELETED);
286  --i;
287  if(old_goto_program.instructions.begin()!=old_rit)
288  --old_rit;
289  }
290  else
291  {
292  differences.push_back(differencet::NEW);
293  --j;
294  if(new_goto_program.instructions.begin()!=new_rit)
295  --new_rit;
296  }
297  }
298 
299  // add common prefix (if any)
300  for(; old_it != old_goto_program.instructions.begin(); --old_it)
301  differences.push_back(differencet::SAME);
302 
303  return differences;
304 }
305 
307  const irep_idt &identifier,
308  const goto_programt &old_goto_program,
309  const goto_programt &new_goto_program)
310 {
311  differencest &differences = differences_map_[identifier];
312  differences.clear();
313 
314  if(
315  old_goto_program.instructions.empty() ||
316  new_goto_program.instructions.empty())
317  {
318  if(new_goto_program.instructions.empty())
319  differences.resize(
320  old_goto_program.instructions.size(), differencet::DELETED);
321  else
322  differences.resize(
323  new_goto_program.instructions.size(), differencet::NEW);
324  }
325  else
326  differences=lcss(identifier, old_goto_program, new_goto_program);
327 }
328 
330 {
331  typedef std::map<irep_idt, goto_functionst::function_mapt::const_iterator>
332  function_mapt;
333 
334  function_mapt old_funcs, new_funcs;
335 
337  old_funcs.insert(std::make_pair(it->first, it));
339  new_funcs.insert(std::make_pair(it->first, it));
340 
341  goto_programt empty;
342 
343  function_mapt::const_iterator ito = old_funcs.begin();
344  for(function_mapt::const_iterator itn = new_funcs.begin();
345  itn != new_funcs.end();
346  ++itn)
347  {
348  for(; ito != old_funcs.end() && ito->first < itn->first; ++ito)
349  unified_diff(ito->first, ito->second->second.body, empty);
350 
351  if(ito == old_funcs.end() || itn->first < ito->first)
352  unified_diff(itn->first, empty, itn->second->second.body);
353  else
354  {
355  INVARIANT(
356  ito->first == itn->first, "old and new function names do not match");
357  unified_diff(
358  itn->first, ito->second->second.body, itn->second->second.body);
359  ++ito;
360  }
361  }
362  for(; ito != old_funcs.end(); ++ito)
363  unified_diff(ito->first, ito->second->second.body, empty);
364 
365  return !differences_map_.empty();
366 }
367 
368 void unified_difft::output(std::ostream &os) const
369 {
370  goto_programt empty;
371 
372  for(const std::pair<irep_idt, differencest> &p : differences_map_)
373  {
374  const irep_idt &function = p.first;
375 
376  goto_functionst::function_mapt::const_iterator old_fit =
377  old_goto_functions.function_map.find(function);
378  goto_functionst::function_mapt::const_iterator new_fit =
379  new_goto_functions.function_map.find(function);
380 
381  const goto_programt &old_goto_program =
382  old_fit == old_goto_functions.function_map.end() ? empty
383  : old_fit->second.body;
384  const goto_programt &new_goto_program =
385  new_fit == new_goto_functions.function_map.end() ? empty
386  : new_fit->second.body;
387 
388  output_diff(function, old_goto_program, new_goto_program, p.second, os);
389  }
390 }
391 
393  const goto_programt::instructiont &ins1,
394  const goto_programt::instructiont &ins2)
395 {
396  return ins1.code == ins2.code && ins1.function == ins2.function &&
397  ins1.type == ins2.type && ins1.guard == ins2.guard &&
398  ins1.targets.size() == ins2.targets.size() &&
399  (ins1.targets.empty() ||
400  instructions_equal(*ins1.get_target(), *ins2.get_target()));
401 }
402 
404 {
405  return differences_map_;
406 }
exprt guard
Guard for gotos, assume, assert.
Definition: goto_program.h:188
irep_idt function
The function this instruction belongs to.
Definition: goto_program.h:179
const differences_mapt & differences_map() const
goto_program_instruction_typet type
What kind of instruction?
Definition: goto_program.h:185
const goto_functionst & old_goto_functions
Definition: unified_diff.h:51
static bool instructions_equal(const goto_programt::instructiont &ins1, const goto_programt::instructiont &ins2)
function_mapt function_map
targett get_target() const
Returns the first (and only) successor for the usual case of a single target.
Definition: goto_program.h:202
std::map< irep_idt, differencest > differences_mapt
Definition: unified_diff.h:57
goto_program_difft get_diff(const irep_idt &function) const
#define INVARIANT(CONDITION, REASON)
Definition: invariant.h:205
targetst targets
The list of successor instructions.
Definition: goto_program.h:198
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:173
void output_diff(const irep_idt &identifier, const goto_programt &old_goto_program, const goto_programt &new_goto_program, const differencest &differences, std::ostream &os) const
Symbol Table + CFG.
unified_difft(const goto_modelt &model_old, const goto_modelt &model_new)
std::vector< differencet > differencest
Definition: unified_diff.h:56
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:403
const namespacet ns_old
Definition: unified_diff.h:52
differences_mapt differences_map_
Definition: unified_diff.h:88
Unified diff (using LCSS) of goto functions.
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:70
static differencest lcss(const irep_idt &identifier, const goto_programt &old_goto_program, const goto_programt &new_goto_program)
void output(std::ostream &os) const
void unified_diff(const irep_idt &identifier, const goto_programt &old_goto_program, const goto_programt &new_goto_program)
std::ostream & output_instruction(const namespacet &ns, const irep_idt &identifier, std::ostream &out, const instructionst::value_type &it) const
Output a single instruction.
std::list< std::pair< goto_programt::const_targett, differencet > > goto_program_difft
Definition: unified_diff.h:47
#define forall_goto_functions(it, functions)
const goto_functionst & new_goto_functions
Definition: unified_diff.h:53
const namespacet ns_new
Definition: unified_diff.h:54