Up: Getting Started with Shore Previous: Using Indexes

Subsections

Appendix: Program Sources

stree.sdl

module stree {
    // interfaces defined here
    interface SearchTree;   // the top-level construct
    interface Word;         // a binary search tree node -- represents one word
    interface Document;     // a document stored in the repository
    interface Cite;         // a reference to a line in a document

    // A binary search tree of Word objects
    interface SearchTree {
      private:
        attribute ref<Word> root;   // the root of the tree

        // Update the entry matching WORD to add a citation.
        // Add a new Word if necessary.
        void insert(in lref<char> word, in ref<Cite> cite);

      public:
        // Constructor:  make an empty tree
        void initialize();

        // Insert a new Document into the repository.  The argument is a
        // pathname to be interpreted in the Unix name space as the name of a
        // Unix file containing the raw data.  A new Document object with
        // the same base name is created in the current Shore directory,
        // filled with a copy of the file's context, and indexed by all of
        // its words.
        void insert_file(in lref<char> src);

        // Retrieve the Word object matching the argument.
        // Return NULL if not found.
        ref<Word> find(in lref<char> word) const;
    };

    // There is one Word object for each distinct word appearing in any
    // document in the repository.
    interface Word {
    private:
        attribute string value;
        attribute ref<Word> left, right;
    public:
        relationship set<Cite> cited_by inverse cites;
        // Constructor: empty occurrences list
        void initialize(in lref<char> word);

        // How many occurrences?
        long count() const;

        // Get ith occurrence (returns NULL if not that many)
        ref<Cite> occurrence(in long i) const;

        // The following methods are meant to be used only by SearchTree.

        // Find decendant matching WORD creating one if necessary
        ref<Word> find_or_add(in lref<char> word);

        // Find only, return NULL on not found
        ref<Word> find(in lref<char> word) const;

        // Debugging dump
        void print(in long verbose) const;

        // Add an occurence
        void occurs_on(in ref<Cite> cite);
    };

    // A Cite object represents a citation.  There is one Cite object for each
    // line of each document in the repository.  There is thus a many-many
    // relationship from Cite to Word and a many-one relationship from Cite
    // to Document.
    interface Cite {
    private:
        attribute long offset;
    public:
        relationship ref<Document> doc inverse cited_by;
        relationship set<Word> cites inverse cited_by;
        // Constructor
        void initialize(in ref<Document> d, in long o);

        // Print the referenced line
        void print(in long vflag) const;

        // Destructor
        void finalize();
    };

    // A Document is a chunk of text that looks like a Unix file.
    // We also record the file name under which it was created.
    // (The need to record the name may go away when Shore adds a way to find
    // the pathname of a registered object given a Ref to it.)
    interface Document {
    private:
        attribute text body;
        attribute string name;
        attribute long cur_len;
    public:
        relationship set<Cite> cited_by inverse doc;
        // Constructor:  The body is empty.
        void initialize(in lref<char> base_name, in long len);

        // Add some text to the end of the body.
        void append(in lref<char> str);

        // Read-only access to the file name.
        lref<char> get_name() const;

        // Current length of text
        long size() const;

        // Print a line starting at OFFSET
        void print_line(in long offset) const;

        // Destructor
        void finalize();
    };

}

main.C

/*
 * ShoreConfig.h is needed only by applications
 * that distinguish platforms.  (Stree does not,
 * but we include this for documentation purposes.)
 */
#include <ShoreConfig.h>

#include <iostream.h>
#include <fstream.h>
#include <std.h>
#include "stree.h"

Ref<SearchTree> repository;
Ref<Pool> nodes;        // Place to create new anonymous objects

const char *DEMO_DIR = "stree";

char *argv0;
int verbose;
extern "C" int optind;

enum OPERATION {
    OP_NONE, OP_ADD, OP_LIST, OP_DEL, OP_POOL_LIST, OP_CLEAR
} operation = OP_NONE;

static void add_files(int argc, char *const*argc);
static void list_files(char *str);
static void delete_files(int argc, char *const*argc);
static void pool_list();
static void clear_all();

void usage() {
    cerr << "usage:" << endl;
    cerr << "\t" << argv0 << " -a[V] fname [fname ...]" << endl;
    cerr << "\t" << argv0 << " -l[V] word" << endl;
    cerr << "\t" << argv0 << " -d fname [fname ...]" << endl;
    cerr << "\t" << argv0 << " -p" << endl;
    cerr << "\t" << argv0 << " -c" << endl;
    cerr << "\t" << "the -V option turns on verbose mode" << endl;
    exit(1);
}

int main(int argc, char *argv[]) 
{
    argv0 = argv[0];
    shrc rc;

    // initialize connection to server
    SH_DO(Shore::init(argc, argv, 0, getenv("STREE_RC")));

    // get command-line options
    int c;
    while ((c = getopt(argc,argv,"aldpcV")) != EOF) switch(c) {
        case 'a': operation = OP_ADD; break;
        case 'l': operation = OP_LIST; break;
        case 'd': operation = OP_DEL; break;
        case 'p': operation = OP_POOL_LIST; break;
        case 'c': operation = OP_CLEAR; break;
        case 'V': verbose++; break;
        default: usage();
    }

    if (operation == OP_NONE)
        usage();

    // Start a transaction for initialization
    SH_BEGIN_TRANSACTION(rc);
    if (rc)
        rc.fatal(); // this terminates the program with extreme prejudice

    // Check that our demo directory exists
    rc = Shore::chdir(DEMO_DIR);
    if (rc != RCOK) {
        if (rc != RC(SH_NotFound))
            SH_ABORT_TRANSACTION(rc);

        // Not found.  Must be the first time through.
        // Create the directory
        SH_DO(Shore::mkdir(DEMO_DIR, 0755));
        SH_DO(Shore::chdir(DEMO_DIR));

        // Make a new SearchTree object ...
        repository = new("repository", 0644) SearchTree;
        repository.update()->initialize();

        // ... and a pool for allocating Nodes.
        SH_DO(nodes.create_pool("pool", 0644, nodes));
    } else { // not first time

        // Get the repository root from the database ...
        SH_DO(Ref<SearchTree>::lookup("repository",repository));

        // ... and the pool for creating nodes
        SH_DO(nodes.lookup("pool", nodes));
    }

    SH_DO(SH_COMMIT_TRANSACTION);

    switch (operation) {
        case OP_ADD:
            add_files(argc-optind, argv+optind);
            break;
        case OP_LIST:
            if (optind != argc-1)
                usage();
            list_files(argv[optind]);
            break;
        case OP_DEL:
            delete_files(argc-optind, argv+optind);
            break;
        case OP_POOL_LIST:
            pool_list();
            break;
        case OP_CLEAR:
            clear_all();
            break;
        default: break;
    }

    return 0;
} // main

// Add all the named files to the repository
static void add_files(int argc, char *const*argv) {
    shrc rc;

    SH_BEGIN_TRANSACTION(rc);
    if (rc)
        rc.fatal();
    for (int i=0; i<argc; i++)
        repository.update()->insert_file(argv[i]);
    if (verbose)
        cout << "about to commit" << endl;
    SH_DO(SH_COMMIT_TRANSACTION);
    if (verbose)
        cout << "committed" << endl;
} // add_files

// List all uses of a word
static void list_files(char *str) {
    shrc rc;
    int occurrences=0;

    SH_BEGIN_TRANSACTION(rc);
    if (rc)
        rc.fatal();
    Ref<Word> w = repository->find(str);
    if (verbose)
        cout << "========== " << str << endl;
    if (w && w->count() > 0) {
        Ref<Cite> c;
        for (int i=0; c = w->occurrence(i); i++) {
            occurrences++;
            c->print(verbose);
        }
    } else if (verbose) {
        cout << "**** Not found" << endl;
        occurrences = -1;
    }
    if(occurrences >= 0 && verbose) {
        cout << "**** " << occurrences << " citation"
            << (char *)(occurrences==1?"":"s") << endl;
    }
    SH_DO(SH_COMMIT_TRANSACTION);
} // list_files

// Removed the named files from the repository
static void delete_files(int argc, char *const*argv) {
    shrc rc;

    SH_BEGIN_TRANSACTION(rc);
    if (rc)
        rc.fatal();

    for (int i=0; i<argc; i++) {
        Ref<Document> d;
        SH_DO(d.lookup(argv[i],d));
        d.update()->finalize();
        SH_DO(Shore::unlink(argv[i]));
    }
    if (verbose)
        cout << "about to commit" << endl;
    SH_DO(SH_COMMIT_TRANSACTION);
    if (verbose)
        cout << "committed" << endl;
} // delete_files

static void pool_list() {
    shrc rc;

    SH_BEGIN_TRANSACTION(rc);
    if (rc)
        rc.fatal();

    Ref<any> ref;
    Ref<Word> w;
    Ref<Cite> c;
    {
        PoolScan scan("pool");
        if (scan != RCOK)
            SH_ABORT_TRANSACTION(scan.rc());

        while (scan.next(ref, true) == RCOK) {
            if (w = TYPE_OBJECT(Word).isa(ref)) {
                w->print(1);
            }
            else if (c = TYPE_OBJECT(Cite).isa(ref)) {
                c->print(2);
            }
            else cout << " Unknown type of object" << endl;
        }
    }
    SH_DO(SH_COMMIT_TRANSACTION);
} // pool_list

static void clear_all() {
    shrc rc;

    SH_BEGIN_TRANSACTION(rc);
    if (rc)
        rc.fatal();

    rc = Shore::unlink("repository");
    if (rc)
        cout << rc << endl;

    SH_DO(nodes.lookup("pool", nodes));
    SH_DO(nodes.destroy_contents());
    rc = Shore::unlink("pool");
    if (rc)
        cout << rc << endl;

    rc = Shore::chdir("..");
    if (rc)
        cout << rc << endl;

    SH_DO(Shore::rmdir(DEMO_DIR));

    SH_DO(SH_COMMIT_TRANSACTION);
} // clear_all

tree.C

// Member functions of the SearchTree class
#include <iostream.h>
#include <string.h>
#include <ctype.h>
#include "stree.h"
#include <sys/types.h>
#include <sys/stat.h>

extern Ref<Pool> nodes;
static int getword(const char *&p, char *res, int size);

extern int verbose; // defined in main.C

void SearchTree::initialize() {
    root = NULL;
}

void SearchTree::insert(char *s, Ref<Cite> c) {
    Ref<Word> w;
    if (root) {
        w = root.update()->find_or_add(s);
    }
    else {
        root = new(nodes) Word;
        root.update()->initialize(s);
        w = root;
    }
    w.update()->occurs_on(c);
}

void SearchTree::insert_file(char *fname) {
    shrc rc;

    if (verbose)
        cout << "Indexing file " << fname << endl;

    // Open input file
    ifstream in(fname);
    if (!in) {
        perror(fname);
        SH_ABORT_TRANSACTION(rc);
    }
    // do a unix stat to get the total size of the file.
    struct stat in_stat;
    if (stat(fname,&in_stat)) {
        perror(fname);
        SH_ABORT_TRANSACTION(rc);
    }

    // Create target document

    // Strip leading path from file name;
    char *base_name = strrchr(fname, '/');
    if (base_name)
        base_name++;
    else
        base_name = fname;

    Ref<Document> doc;
    rc = doc.new_persistent(base_name, 0644, doc);
    if (rc) {
        perror(base_name);
        SH_ABORT_TRANSACTION(rc);
    }
    doc.update()->initialize(base_name,in_stat.st_size);

    // for each line of the document ...
    char linebuf[1024];
    while (in.getline(linebuf, sizeof linebuf -1)) {
        long off = doc->size();

        // copy the line to the body of the document
        doc.update()->append(linebuf);
        doc.update()->append("\n");

        // allocate a new Cite object for this line
        Ref<Cite> cite = new (nodes) Cite;
        cite.update()->initialize(doc, off);

        // for each word on the line ...
        char word[100];
        const char *p = linebuf;
        while (getword(p, word, sizeof word)) {
            // link the citation to the word
            insert(word, cite);
        }
    }
}

Ref<Word> SearchTree::find(char *str) const {
    if (root)
        return root->find(str);
    return NULL;
}

// Copy a word of at most SIZE characters (including terminating null)
// in to the buffer starting at RES.  Start searching at location P.
// Words are delimited by white space.  The result is translated to lower
// case, with all non-letters eliminated.
// P is updated to point to the first character not copied.
// The result is 1 if a word is found, 0 if '\0' is encountered first.
static int getword(const char *&p, char *res, int size) {
    for (;; ) {
        // skip leading white space
        while (isspace(*p))
            p++;

        // check for eoln
        if (*p == 0)
            return 0;

        // gather non-space characters, translating to lower case and
        // ignoring non-alpha characters
        int len;
        for (len = 0; len < size-1 && *p && !isspace(*p); p++) {
            if (isupper(*p))
                res[len++] = tolower(*p);
            else if (islower(*p))
                res[len++] = *p;
        }
        if (len > 0) {
            res[len] = 0;
            return 1;
        }
        // otherwise, word was all digits and punctuation, so try again.
    }
}

word.C

// Member functions of the Word class
#include <iostream.h>
#include <string.h>
#include "stree.h"

extern Ref<Pool> nodes;

void Word::initialize(char *word) {
    value = word;
    left = NULL;
    right = NULL;
}

long Word::count() const {
    return cited_by.get_size();
}

Ref<Cite> Word::occurrence(long i) const {
    return cited_by.get_elt(i);
}

Ref<Word> Word::find_or_add(char *s) {
    int i = strcmp(s,value);
    if (i == 0)
        return this;
    if (i < 0) {
        if (left) return left.update()->find_or_add(s);
        else {
            left = new(nodes) Word;
            left.update()->initialize(s);
            return left;
        }
    }
    else {
        if (right) return right.update()->find_or_add(s);
        else {
            right = new(nodes) Word;
            right.update()->initialize(s);
            return right;
        }
    }
}

Ref<Word> Word::find(char *s) const {
    int i = strcmp(s,value);
    if (i == 0) return this;
    if (i < 0) return left ? left->find(s) : (Ref<Word>)NULL;
    return right ? right->find(s) : (Ref<Word>)NULL;
}

void Word::occurs_on(Ref<Cite> cite) {
    cited_by.add(cite);
}

void Word::print(long verbose) const {
    if (verbose) {
        int s = cited_by.get_size();
        cout << "Word '" << (char *)value
            << "' occurs on " << s << " line" << (s==1 ? "" : "s") << endl;
    }
    else cout << (char *)value;
}

cite.C

// Member functions of the Cite class
#include <iostream.h>
#include "stree.h"

void Cite::initialize(Ref<Document> d, long o) {
    doc = d;
    offset = o;
}

void Cite::print(long v) const {
    switch (v) {
        default:
        case 0: // just the file name
            cout << doc->get_name() << endl;
            break;
        case 1: // the file name and the corresponding line
            cout << doc->get_name() << ": ";
            doc->print_line(offset);
            break;
        case 2: // debugging version
            cout << "Cite, offset " << offset
                << " in file " << doc->get_name()
                << " cites";
            {
                int count = cites.get_size();
                for (int i = 0; i < count; i++) {
                    Ref<Word> w = cites.get_elt(i);
                    cout << " ";
                    w->print(0);
                }
                cout << endl;
            }
            break;
    }
}

void Cite::finalize() {
    while (cites.delete_one()) {}
}

document.C

// Member functions of the Document class
#include <iostream.h>
#include <string.h>
#include "stree.h"

void Document::initialize(char *base_name, long ilen) {
    body = 0;
    name = base_name;
    // set a char at the end of the body to initialze the
    // string space.
    body.set(ilen-1,0);
    // initialize cur_len.
    cur_len = 0;
}

void Document::append(char *str) {
    // body.set(str, body.length(), ::strlen(str));
    int str_size = ::strlen(str);
    body.set(str, cur_len, str_size);
    cur_len += str_size;
}

char *Document::get_name() const {
    return name;
}

long Document::size() const {
    // return body.strlen();
    return cur_len;
}

void Document::print_line(long offset) const {
    char buf[100];

    body.get(buf, offset, sizeof buf);
    buf[sizeof buf - 1] = 0;
    char *p = strchr(buf, '\n');
    if (p) *++p = 0;
    cout << buf;
}

void Document::finalize() {
    Ref<Cite> p;
    while (p = cited_by.delete_one()) {
        p.update()->finalize();
        SH_DO(p.destroy());
    }
}

stree_defs.C

#define MODULE_CODE
#include "stree.h"

swc

#!/bin/sh
# --------------------------------------------------------------- #
# -- Copyright (c) 1994, 1995 Computer Sciences Department,    -- #
# -- University of Wisconsin-Madison, subject to the terms     -- #
# -- and conditions given in the file COPYRIGHT.  All Rights   -- #
# -- Reserved.                                                 -- #
# --------------------------------------------------------------- #
# $Header: /p/shore/shore_cvs/src/examples/stree/swc,v 1.3 1995/04/26 11:03:06 solomon Exp $

mountpoint=/shoremnt
program=stree

if test $1x = -ix
then
    program=doc_index
    shift
fi

if test $# -ne 1
then
    echo "usage: $0 [-i] keyword"
    exit 1
fi

prog_path=`pwd`/$program

cd $mountpoint/$program
wc `$prog_path -l $1 | sort -u`

sedit

#!/bin/sh
# --------------------------------------------------------------- #
# -- Copyright (c) 1994, 1995 Computer Sciences Department,    -- #
# -- University of Wisconsin-Madison, subject to the terms     -- #
# -- and conditions given in the file COPYRIGHT.  All Rights   -- #
# -- Reserved.                                                 -- #
# --------------------------------------------------------------- #
# $Header: /p/shore/shore_cvs/src/examples/stree/sedit,v 1.3 1995/04/26 11:03:03 solomon Exp $

mountpoint=/shoremnt
program=stree

if test $1x = -ix
then
    program=doc_index
    shift
fi

if test -t 0
then
    edit=${EDITOR:-emacs}
else
    edit="echo EDIT"
fi

if test $# -ne 1
then
    echo "usage: $0 [-i] keyword"
    exit 1
fi

prog_path=`pwd`/$program

files="`$prog_path -l $1`" || exit 1
if [ -n "$files" ]
then
    cd $mountpoint/$program
    $edit $files
else
    echo $1 not found
fi

docIndex

// Member functions of the DocIndex class
#include <iostream.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "doc_index.h"

extern Ref<Pool> nodes;
static int getword(const char *&p, char *res, int size);

extern int verbose; // defined in main.C

void DocIndex::initialize() {
    SH_DO(ind.init(UniqueBTree));
}

void DocIndex::insert(char *s, Ref<Cite> c) {
    Ref<Word> w;
    bool found;

    SH_DO(ind.find(s,w,found));
    if (!found) {
        w = new(nodes) Word;
        w.update()->initialize(s);
        SH_DO(ind.insert(s,w));
    }
    w.update()->occurs_on(c);
}

void DocIndex::insert_file(char *fname) {
    shrc rc;

    if (verbose)
        cout << "Indexing file " << fname << endl;

    // Open input file
    ifstream in(fname);
    if (!in) {
        perror(fname);
        SH_ABORT_TRANSACTION(rc);
    }
    // do a unix stat to get the total size of the file.
    struct stat in_stat;
    if (stat(fname,&in_stat)) {
        perror(fname);
        SH_ABORT_TRANSACTION(rc);
    }

    // Create target document

    // Strip leading path from file name;
    char *base_name = strrchr(fname, '/');
    if (base_name)
        base_name++;
    else
        base_name = fname;

    Ref<Document> doc;
    rc = doc.new_persistent(base_name, 0644, doc);
    if (rc) {
        perror(base_name);
        SH_ABORT_TRANSACTION(rc);
    }
    doc.update()->initialize(base_name,in_stat.st_size);

    // for each line of the document ...
    char linebuf[1024];
    while (in.getline(linebuf, sizeof linebuf -1)) {
        long off = doc->size();

        // copy the line to the body of the document
        doc.update()->append(linebuf);
        doc.update()->append("\n");

        // allocate a new Cite object for this line
        Ref<Cite> cite = new (nodes) Cite;
        cite.update()->initialize(doc, off);

        // for each word on the line ...
        char word[100];
        const char *p = linebuf;
        while (getword(p, word, sizeof word)) {
            // link the citation to the word
            insert(word, cite);
        }
    }
}

Ref<Word> DocIndex::find(char *str) const {
    Ref<Word> w;
    bool found;

    SH_DO(ind.find(str,w,found));
    if (found)
        return w;
    return NULL;
}

// Copy a word of at most SIZE characters (including terminating null)
// in to the buffer starting at RES.  Start searching at location P.
// Words are delimited by white space.  The result is translated to lower
// case, with all non-letters eliminated.
// P is updated to point to the first character not copied.
// The result is 1 if a word is found, 0 if '\0' is encountered first.
static int getword(const char *&p, char *res, int size) {
    for (;; ) {
        // skip leading white space
        while (isspace(*p))
            p++;

        // check for eoln
        if (*p == 0)
            return 0;

        // gather non-space characters, translating to lower case and
        // ignoring non-alpha characters
        int len;
        for (len = 0; len < size-1 && *p && !isspace(*p); p++) {
            if (isupper(*p))
                res[len++] = tolower(*p);
            else if (islower(*p))
                res[len++] = *p;
        }
        if (len > 0) {
            res[len] = 0;
            return 1;
        }
        // otherwise, word was all digits and punctuation, so try again.
    }
}

void DocIndex::delete_word(sdl_string w) {
    int count;
    SH_DO(ind.remove(w,count));
    if (verbose)
        cout << "deleted " << count << (count==1 ? " copy" : " copies")
            << " of word '" << (char *)w << "' from the index" << endl;
}

void DocIndex::print() const {
    // index_iter<typeof(ind)> iterator(ind);
    IndexScanIter<sdl_string,Ref<Word> > iterator(this->ind);
    SH_DO(iterator.next());
    while (!iterator.eof) {
        cout << "key: '" << iterator.cur_key << "' value: ";
        iterator.cur_val->print(1);
        SH_DO(iterator.next());
    }
}

This page was generated from LaTeX sources
10/27/1997