RegExp.hpp

Go to the documentation of this file.
00001 #ifndef wali_graph__REGEXP_H_
00002 #define wali_graph__REGEXP_H_
00003 
00004 #include <iostream>
00005 #include <list>
00006 #include <vector>
00007 #include <set>
00008 #include <map>
00009 
00010 #include "wali/SemElem.hpp"
00011 #include "wali/ref_ptr.hpp"
00012 #include "wali/HashMap.hpp"
00013 
00014 #include "wali/graph/GraphCommon.hpp"
00015 
00016 #ifdef _WIN32
00017 #include <time.h>
00018 #endif
00019 
00020 using namespace std;
00021 
00022 /* RegExp usage:
00023  *
00024  * (1) The calls to RegExp::startSatProcess() and RegExp::stopSatProcess() must alternate.
00025  * (2) RegExp::startSatProcess() must be called once before RegExps can be used.
00026  * (3) The period between startSatProcess() and stopSatProcess() is available to carry out a saturation process
00027  * (4) RegExp::stopSatProcess() resets the set of updatable nodes and fixes old updatable nodes to act as
00028  *     constants forever. Old RegExps can still be reevaluated if they were not evaluated before
00029  *     stopSatProcess was called, but once evaluated there would be no need to evaluate them again.
00030  * (5) RegExp created in different startSatProcess()-stopSatProcess periods can be mixed together with the understanding
00031  *     that RegExps are always used as constant and are never mutated (its hopefully enforced by the compiler ---
00032  *     keep the constructors private)
00033  * (6) To achieve the above, we use the static value currentSatProcess (an unsigned int) and non-static
00034  *     value satProcess (also an unsigned int)
00035  * (7) One can also change the semiring being used by calling startSatProcess() but then it should not be
00036  *     mixed with old RegExps.
00037  */
00038 
00039 namespace wali {
00040 
00041     namespace graph {
00042 
00043         enum reg_exp_type {Constant, Updatable, Extend, Combine, Star};
00044 
00045         class RegExp;
00046         typedef ref_ptr<RegExp> reg_exp_t;
00047         typedef long unsigned int node_no_t;
00048 
00049         struct cmp_reg_exp {
00050             bool operator() (reg_exp_t r1, reg_exp_t r2) const {
00051                 return (r1.get_ptr() < r2.get_ptr());
00052             }
00053         };
00054 
00055         struct reg_exp_key_t {
00056             reg_exp_type type;
00057             reg_exp_t c1;
00058             reg_exp_t c2;
00059             reg_exp_key_t() {
00060                 c1 = 0;
00061                 c2 = 0;
00062                 type = Constant;
00063             }
00064             reg_exp_key_t(reg_exp_type t, reg_exp_t _c1, reg_exp_t _c2 = 0) {
00065                 type = t;
00066                 c1 = _c1;
00067                 c2 = _c2;
00068             }
00069             bool operator() (reg_exp_key_t r1, reg_exp_key_t r2) const {
00070                 return (r1.type == r2.type && r1.c1.get_ptr() == r2.c1.get_ptr() &&
00071                         r1.c2.get_ptr() == r2.c2.get_ptr());
00072             }
00073         };
00074 
00075         struct hash_reg_exp_key {
00076             size_t operator() (const reg_exp_key_t &k) const {
00077                 return ((size_t)k.type ^ (size_t)k.c1.get_ptr() ^ (size_t)k.c2.get_ptr());
00078             }
00079         };
00080 
00081         struct hash_sem_elem {
00082             size_t operator() (const sem_elem_t &se) const {
00083                 return (size_t)se.get_ptr();
00084             }
00085         };
00086 
00087         struct sem_elem_equal {
00088             size_t operator() (sem_elem_t s1, sem_elem_t s2) const {
00089                 return (s1.get_ptr() == s2.get_ptr());
00090                 //return (s1->equal(s2));
00091             }
00092         };
00093 
00094         struct sem_elem_less {
00095             size_t operator() (sem_elem_t s1, sem_elem_t s2) const {
00096                 return (s1.get_ptr() < s2.get_ptr());
00097                 //return (s1->equal(s2));
00098             }
00099         };
00100 
00101         typedef pair<long int, long int> out_node_height_t;
00102         typedef map<long int, out_node_height_t> out_node_stat_t;
00103 
00104         struct RegExpStats {
00105             int nstar;
00106             int nextend;
00107             int ncombine;
00108             int hashmap_hits;
00109             int hashmap_misses;
00110             long int height;
00111             long int out_nodes;
00112             long int lnd;
00113             RegExpStats() {
00114                 nstar = ncombine = nextend = 0;
00115                 hashmap_hits = 0;
00116                 hashmap_misses = 0;
00117                 height = lnd = out_nodes = 0;
00118             }
00119         };
00120 
00121         ostream &operator << (ostream &out, const RegExpStats &s);
00122 
00123         typedef map<reg_exp_t, reg_exp_t, cmp_reg_exp> reg_exp_cache_t;
00124 
00125         typedef map<unsigned int, sem_elem_t> delta_map_t;
00126 
00127 
00128         typedef wali::HashMap<reg_exp_key_t, reg_exp_t, hash_reg_exp_key, reg_exp_key_t> reg_exp_hash_t;
00129         typedef wali::HashMap<sem_elem_t, reg_exp_t, hash_sem_elem, sem_elem_equal> const_reg_exp_hash_t;
00130 
00131         class RegExpSatProcess {
00132           public:
00133           unsigned int update_count;
00134           RegExpSatProcess() : update_count(1) { }
00135         };
00136 
00137 
00138         class RegExp {
00139             public:
00140                 unsigned int count; // for reference counting
00141             private:
00142                 reg_exp_type type;
00143                 sem_elem_t value;
00144 #ifdef DWPDS
00145                 delta_map_t delta;
00146 #endif
00147                 node_no_t updatable_node_no;
00148                 list<reg_exp_t> children;
00149                 unsigned int last_change;
00150                 unsigned int last_seen;
00151 
00152                 set<long int> outnodes; // set of out-nodes contained in this RegExp
00153                 out_node_stat_t outnode_height;
00154                 int samechange,differentchange,lastchange;
00155 
00156                 map<sem_elem_t, sem_elem_t, sem_elem_less> eval_map; // value under certain contexts
00157                 static bool saturation_complete;
00158                 static bool executing_poststar;
00159                 static bool initialized;
00160                 static bool top_down_eval;
00161                 int nevals; // for gathering stats: no of times eval was called on this regexp
00162 
00163                 static vector<reg_exp_t> updatable_nodes;
00164 
00165                 static vector<RegExpSatProcess> satProcesses;
00166                 static long unsigned int currentSatProcess;
00167                 long unsigned int satProcess;
00168 
00169                 static bool extend_backwards;
00170                 static reg_exp_hash_t reg_exp_hash;
00171                 static const_reg_exp_hash_t const_reg_exp_hash;
00172                 static RegExpStats stats;
00173                 static reg_exp_t reg_exp_zero, reg_exp_one;
00174 
00175                 // For debugging
00176                 bool uptodate;
00177 
00178                 RegExp(node_no_t nno, sem_elem_t se) {
00179                     type = Updatable;
00180                     value = se;
00181                     updatable_node_no = nno;
00182                     count = 0;
00183                     last_change = 1;
00184                     last_seen = 1;
00185                     uptodate = false;
00186                     nevals = 0;
00187                     samechange = differentchange = 0;
00188                     lastchange=-1;
00189                     satProcess = currentSatProcess;
00190                 }
00191                 RegExp(reg_exp_type t, const reg_exp_t r1, const reg_exp_t r2 = 0) {
00192                     count = 0;
00193                     nevals = 0;
00194                     if(t == Extend || t == Combine) {
00195                         type = t;
00196                         assert(r1.get_ptr() != 0 && r2.get_ptr() != 0);
00197                         value = r1->value->zero();
00198                         children.push_back(r1);
00199                         children.push_back(r2);
00200                     } else if(t == Star) {
00201                         type = Star;
00202                         assert(r1.get_ptr() != 0);
00203                         value = r1->value->zero();
00204                         children.push_back(r1);
00205                     } else {
00206                         assert(0);
00207                     }
00208                     last_change = 0;
00209                     last_seen = 0;
00210                     uptodate = false;
00211                     samechange = differentchange = 0;
00212                     lastchange=-1;
00213                     satProcess = currentSatProcess;
00214                 }
00215                 RegExp(sem_elem_t se) {
00216                     type = Constant;
00217                     value = se;
00218                     updatable_node_no = 0; // default value
00219                     count = 0;
00220                     last_change = 1;
00221                     last_seen = 0;
00222                     uptodate = false;
00223                     nevals = 0;
00224                     samechange = differentchange = 0;
00225                     lastchange=-1;
00226                     satProcess = currentSatProcess;
00227                 }
00228 
00229             public:
00230 
00231                 static void extendDirectionBackwards(bool b) {
00232                     extend_backwards = b;
00233                 }
00234 
00235                 static void saturationComplete() {
00236                     saturation_complete = true;
00237                 }
00238 
00239                 static void executingPoststar(bool f) {
00240                     executing_poststar = f;
00241                 }
00242 
00243                 static void topDownEval(bool f) {
00244                     top_down_eval = f;
00245                 }
00246 
00247                 ostream &print(ostream &out);
00248                 static reg_exp_t star(reg_exp_t r);
00249                 static reg_exp_t extend(reg_exp_t r1, reg_exp_t r2);
00250                 static reg_exp_t combine(reg_exp_t r1, reg_exp_t r2);
00251                 static reg_exp_t combine(list<reg_exp_t> &ls);
00252                 static reg_exp_t constant(sem_elem_t se);
00253                 static void startSatProcess(const sem_elem_t se);
00254                 static void stopSatProcess();
00255 
00256                 static reg_exp_t updatable(node_no_t nno, sem_elem_t se);
00257                 static reg_exp_t compress(reg_exp_t r, reg_exp_cache_t &cache);
00258                 static reg_exp_t minimize_height(reg_exp_t r, reg_exp_cache_t &cache);
00259                 static size_t getNextUpdatableNumber() {
00260                     return updatable_nodes.size();
00261                 }
00262                 static void update(node_no_t nno, sem_elem_t se);
00263                 int updatableNumber();
00264 #ifdef DWPDS
00265                 sem_elem_t get_delta(unsigned int ls);
00266 #endif
00267                 static ostream &print_stats(ostream & out) {
00268                     out << stats;
00269                     return out;
00270                 }
00271                 static int out_node_height(set<RegExp *> reg_equations);
00272                 sem_elem_t get_weight();
00273 
00274                 int get_nevals() {
00275                     return nevals;
00276                 }
00277 
00278                 static RegExpStats get_stats() {
00279                     return stats;
00280                 }
00281 
00282                 bool isZero() {
00283                     return (type == Constant && value->equal(value->zero()));
00284                 }
00285 
00286                 bool isOne() {
00287                     return (type == Constant && value->equal(value->one()));
00288                 }
00289 
00290                 bool isCyclic();
00291                 sem_elem_t reevaluate();
00292 
00293             private:
00294 
00295                 void evaluate();
00296                 void evaluate_iteratively();
00297                 sem_elem_t evaluate(sem_elem_t w);
00298                 sem_elem_t evaluateRev(sem_elem_t w);
00299                 sem_elem_t reevaluateIter();
00300                 static reg_exp_t compressExtend(reg_exp_t r1, reg_exp_t r2);
00301                 static reg_exp_t compressCombine(reg_exp_t r1, reg_exp_t r2);
00302 
00303                 bool dfs(set<RegExp *> &, set<RegExp *> &);
00304                 int calculate_height(set<RegExp *> &visited, out_node_stat_t &stat_map);
00305         };
00306 
00307     } // namespace graph
00308 } // namespace wali
00309 
00310 #endif // wali_graph__REGEXP_H_