module stree {
// interfaces defined here
interface SearchTree; // the top-level construct
interface Word; // a binary search tree node -- represents one word
interface Document; // a document stored in the repository
interface Cite; // a reference to a line in a document
// A binary search tree of Word objects
interface SearchTree {
private:
attribute ref<Word> root; // the root of the tree
// Update the entry matching WORD to add a citation.
// Add a new Word if necessary.
void insert(in lref<char> word, in ref<Cite> cite);
public:
// Constructor: make an empty tree
void initialize();
// Insert a new Document into the repository. The argument is a
// pathname to be interpreted in the Unix name space as the name of a
// Unix file containing the raw data. A new Document object with
// the same base name is created in the current Shore directory,
// filled with a copy of the file's context, and indexed by all of
// its words.
void insert_file(in lref<char> src);
// Retrieve the Word object matching the argument.
// Return NULL if not found.
ref<Word> find(in lref<char> word) const;
};
// There is one Word object for each distinct word appearing in any
// document in the repository.
interface Word {
private:
attribute string value;
attribute ref<Word> left, right;
public:
relationship set<Cite> cited_by inverse cites;
// Constructor: empty occurrences list
void initialize(in lref<char> word);
// How many occurrences?
long count() const;
// Get ith occurrence (returns NULL if not that many)
ref<Cite> occurrence(in long i) const;
// The following methods are meant to be used only by SearchTree.
// Find decendant matching WORD creating one if necessary
ref<Word> find_or_add(in lref<char> word);
// Find only, return NULL on not found
ref<Word> find(in lref<char> word) const;
// Debugging dump
void print(in long verbose) const;
// Add an occurence
void occurs_on(in ref<Cite> cite);
};
// A Cite object represents a citation. There is one Cite object for each
// line of each document in the repository. There is thus a many-many
// relationship from Cite to Word and a many-one relationship from Cite
// to Document.
interface Cite {
private:
attribute long offset;
public:
relationship ref<Document> doc inverse cited_by;
relationship set<Word> cites inverse cited_by;
// Constructor
void initialize(in ref<Document> d, in long o);
// Print the referenced line
void print(in long vflag) const;
// Destructor
void finalize();
};
// A Document is a chunk of text that looks like a Unix file.
// We also record the file name under which it was created.
// (The need to record the name may go away when Shore adds a way to find
// the pathname of a registered object given a Ref to it.)
interface Document {
private:
attribute text body;
attribute string name;
attribute long cur_len;
public:
relationship set<Cite> cited_by inverse doc;
// Constructor: The body is empty.
void initialize(in lref<char> base_name, in long len);
// Add some text to the end of the body.
void append(in lref<char> str);
// Read-only access to the file name.
lref<char> get_name() const;
// Current length of text
long size() const;
// Print a line starting at OFFSET
void print_line(in long offset) const;
// Destructor
void finalize();
};
}
/*
* ShoreConfig.h is needed only by applications
* that distinguish platforms. (Stree does not,
* but we include this for documentation purposes.)
*/
#include <ShoreConfig.h>
#include <iostream.h>
#include <fstream.h>
#include <std.h>
#include "stree.h"
Ref<SearchTree> repository;
Ref<Pool> nodes; // Place to create new anonymous objects
const char *DEMO_DIR = "stree";
char *argv0;
int verbose;
extern "C" int optind;
enum OPERATION {
OP_NONE, OP_ADD, OP_LIST, OP_DEL, OP_POOL_LIST, OP_CLEAR
} operation = OP_NONE;
static void add_files(int argc, char *const*argc);
static void list_files(char *str);
static void delete_files(int argc, char *const*argc);
static void pool_list();
static void clear_all();
void usage() {
cerr << "usage:" << endl;
cerr << "\t" << argv0 << " -a[V] fname [fname ...]" << endl;
cerr << "\t" << argv0 << " -l[V] word" << endl;
cerr << "\t" << argv0 << " -d fname [fname ...]" << endl;
cerr << "\t" << argv0 << " -p" << endl;
cerr << "\t" << argv0 << " -c" << endl;
cerr << "\t" << "the -V option turns on verbose mode" << endl;
exit(1);
}
int main(int argc, char *argv[])
{
argv0 = argv[0];
shrc rc;
// initialize connection to server
SH_DO(Shore::init(argc, argv, 0, getenv("STREE_RC")));
// get command-line options
int c;
while ((c = getopt(argc,argv,"aldpcV")) != EOF) switch(c) {
case 'a': operation = OP_ADD; break;
case 'l': operation = OP_LIST; break;
case 'd': operation = OP_DEL; break;
case 'p': operation = OP_POOL_LIST; break;
case 'c': operation = OP_CLEAR; break;
case 'V': verbose++; break;
default: usage();
}
if (operation == OP_NONE)
usage();
// Start a transaction for initialization
SH_BEGIN_TRANSACTION(rc);
if (rc)
rc.fatal(); // this terminates the program with extreme prejudice
// Check that our demo directory exists
rc = Shore::chdir(DEMO_DIR);
if (rc != RCOK) {
if (rc != RC(SH_NotFound))
SH_ABORT_TRANSACTION(rc);
// Not found. Must be the first time through.
// Create the directory
SH_DO(Shore::mkdir(DEMO_DIR, 0755));
SH_DO(Shore::chdir(DEMO_DIR));
// Make a new SearchTree object ...
repository = new("repository", 0644) SearchTree;
repository.update()->initialize();
// ... and a pool for allocating Nodes.
SH_DO(nodes.create_pool("pool", 0644, nodes));
} else { // not first time
// Get the repository root from the database ...
SH_DO(Ref<SearchTree>::lookup("repository",repository));
// ... and the pool for creating nodes
SH_DO(nodes.lookup("pool", nodes));
}
SH_DO(SH_COMMIT_TRANSACTION);
switch (operation) {
case OP_ADD:
add_files(argc-optind, argv+optind);
break;
case OP_LIST:
if (optind != argc-1)
usage();
list_files(argv[optind]);
break;
case OP_DEL:
delete_files(argc-optind, argv+optind);
break;
case OP_POOL_LIST:
pool_list();
break;
case OP_CLEAR:
clear_all();
break;
default: break;
}
return 0;
} // main
// Add all the named files to the repository
static void add_files(int argc, char *const*argv) {
shrc rc;
SH_BEGIN_TRANSACTION(rc);
if (rc)
rc.fatal();
for (int i=0; i<argc; i++)
repository.update()->insert_file(argv[i]);
if (verbose)
cout << "about to commit" << endl;
SH_DO(SH_COMMIT_TRANSACTION);
if (verbose)
cout << "committed" << endl;
} // add_files
// List all uses of a word
static void list_files(char *str) {
shrc rc;
int occurrences=0;
SH_BEGIN_TRANSACTION(rc);
if (rc)
rc.fatal();
Ref<Word> w = repository->find(str);
if (verbose)
cout << "========== " << str << endl;
if (w && w->count() > 0) {
Ref<Cite> c;
for (int i=0; c = w->occurrence(i); i++) {
occurrences++;
c->print(verbose);
}
} else if (verbose) {
cout << "**** Not found" << endl;
occurrences = -1;
}
if(occurrences >= 0 && verbose) {
cout << "**** " << occurrences << " citation"
<< (char *)(occurrences==1?"":"s") << endl;
}
SH_DO(SH_COMMIT_TRANSACTION);
} // list_files
// Removed the named files from the repository
static void delete_files(int argc, char *const*argv) {
shrc rc;
SH_BEGIN_TRANSACTION(rc);
if (rc)
rc.fatal();
for (int i=0; i<argc; i++) {
Ref<Document> d;
SH_DO(d.lookup(argv[i],d));
d.update()->finalize();
SH_DO(Shore::unlink(argv[i]));
}
if (verbose)
cout << "about to commit" << endl;
SH_DO(SH_COMMIT_TRANSACTION);
if (verbose)
cout << "committed" << endl;
} // delete_files
static void pool_list() {
shrc rc;
SH_BEGIN_TRANSACTION(rc);
if (rc)
rc.fatal();
Ref<any> ref;
Ref<Word> w;
Ref<Cite> c;
{
PoolScan scan("pool");
if (scan != RCOK)
SH_ABORT_TRANSACTION(scan.rc());
while (scan.next(ref, true) == RCOK) {
if (w = TYPE_OBJECT(Word).isa(ref)) {
w->print(1);
}
else if (c = TYPE_OBJECT(Cite).isa(ref)) {
c->print(2);
}
else cout << " Unknown type of object" << endl;
}
}
SH_DO(SH_COMMIT_TRANSACTION);
} // pool_list
static void clear_all() {
shrc rc;
SH_BEGIN_TRANSACTION(rc);
if (rc)
rc.fatal();
rc = Shore::unlink("repository");
if (rc)
cout << rc << endl;
SH_DO(nodes.lookup("pool", nodes));
SH_DO(nodes.destroy_contents());
rc = Shore::unlink("pool");
if (rc)
cout << rc << endl;
rc = Shore::chdir("..");
if (rc)
cout << rc << endl;
SH_DO(Shore::rmdir(DEMO_DIR));
SH_DO(SH_COMMIT_TRANSACTION);
} // clear_all
// Member functions of the SearchTree class
#include <iostream.h>
#include <string.h>
#include <ctype.h>
#include "stree.h"
#include <sys/types.h>
#include <sys/stat.h>
extern Ref<Pool> nodes;
static int getword(const char *&p, char *res, int size);
extern int verbose; // defined in main.C
void SearchTree::initialize() {
root = NULL;
}
void SearchTree::insert(char *s, Ref<Cite> c) {
Ref<Word> w;
if (root) {
w = root.update()->find_or_add(s);
}
else {
root = new(nodes) Word;
root.update()->initialize(s);
w = root;
}
w.update()->occurs_on(c);
}
void SearchTree::insert_file(char *fname) {
shrc rc;
if (verbose)
cout << "Indexing file " << fname << endl;
// Open input file
ifstream in(fname);
if (!in) {
perror(fname);
SH_ABORT_TRANSACTION(rc);
}
// do a unix stat to get the total size of the file.
struct stat in_stat;
if (stat(fname,&in_stat)) {
perror(fname);
SH_ABORT_TRANSACTION(rc);
}
// Create target document
// Strip leading path from file name;
char *base_name = strrchr(fname, '/');
if (base_name)
base_name++;
else
base_name = fname;
Ref<Document> doc;
rc = doc.new_persistent(base_name, 0644, doc);
if (rc) {
perror(base_name);
SH_ABORT_TRANSACTION(rc);
}
doc.update()->initialize(base_name,in_stat.st_size);
// for each line of the document ...
char linebuf[1024];
while (in.getline(linebuf, sizeof linebuf -1)) {
long off = doc->size();
// copy the line to the body of the document
doc.update()->append(linebuf);
doc.update()->append("\n");
// allocate a new Cite object for this line
Ref<Cite> cite = new (nodes) Cite;
cite.update()->initialize(doc, off);
// for each word on the line ...
char word[100];
const char *p = linebuf;
while (getword(p, word, sizeof word)) {
// link the citation to the word
insert(word, cite);
}
}
}
Ref<Word> SearchTree::find(char *str) const {
if (root)
return root->find(str);
return NULL;
}
// Copy a word of at most SIZE characters (including terminating null)
// in to the buffer starting at RES. Start searching at location P.
// Words are delimited by white space. The result is translated to lower
// case, with all non-letters eliminated.
// P is updated to point to the first character not copied.
// The result is 1 if a word is found, 0 if '\0' is encountered first.
static int getword(const char *&p, char *res, int size) {
for (;; ) {
// skip leading white space
while (isspace(*p))
p++;
// check for eoln
if (*p == 0)
return 0;
// gather non-space characters, translating to lower case and
// ignoring non-alpha characters
int len;
for (len = 0; len < size-1 && *p && !isspace(*p); p++) {
if (isupper(*p))
res[len++] = tolower(*p);
else if (islower(*p))
res[len++] = *p;
}
if (len > 0) {
res[len] = 0;
return 1;
}
// otherwise, word was all digits and punctuation, so try again.
}
}
// Member functions of the Word class #include <iostream.h> #include <string.h> #include "stree.h" extern Ref<Pool> nodes;
void Word::initialize(char *word) {
value = word;
left = NULL;
right = NULL;
}
long Word::count() const {
return cited_by.get_size();
}
Ref<Cite> Word::occurrence(long i) const {
return cited_by.get_elt(i);
}
Ref<Word> Word::find_or_add(char *s) {
int i = strcmp(s,value);
if (i == 0)
return this;
if (i < 0) {
if (left) return left.update()->find_or_add(s);
else {
left = new(nodes) Word;
left.update()->initialize(s);
return left;
}
}
else {
if (right) return right.update()->find_or_add(s);
else {
right = new(nodes) Word;
right.update()->initialize(s);
return right;
}
}
}
Ref<Word> Word::find(char *s) const {
int i = strcmp(s,value);
if (i == 0) return this;
if (i < 0) return left ? left->find(s) : (Ref<Word>)NULL;
return right ? right->find(s) : (Ref<Word>)NULL;
}
void Word::occurs_on(Ref<Cite> cite) {
cited_by.add(cite);
}
void Word::print(long verbose) const {
if (verbose) {
int s = cited_by.get_size();
cout << "Word '" << (char *)value
<< "' occurs on " << s << " line" << (s==1 ? "" : "s") << endl;
}
else cout << (char *)value;
}
// Member functions of the Cite class
#include <iostream.h>
#include "stree.h"
void Cite::initialize(Ref<Document> d, long o) {
doc = d;
offset = o;
}
void Cite::print(long v) const {
switch (v) {
default:
case 0: // just the file name
cout << doc->get_name() << endl;
break;
case 1: // the file name and the corresponding line
cout << doc->get_name() << ": ";
doc->print_line(offset);
break;
case 2: // debugging version
cout << "Cite, offset " << offset
<< " in file " << doc->get_name()
<< " cites";
{
int count = cites.get_size();
for (int i = 0; i < count; i++) {
Ref<Word> w = cites.get_elt(i);
cout << " ";
w->print(0);
}
cout << endl;
}
break;
}
}
void Cite::finalize() {
while (cites.delete_one()) {}
}
// Member functions of the Document class
#include <iostream.h>
#include <string.h>
#include "stree.h"
void Document::initialize(char *base_name, long ilen) {
body = 0;
name = base_name;
// set a char at the end of the body to initialze the
// string space.
body.set(ilen-1,0);
// initialize cur_len.
cur_len = 0;
}
void Document::append(char *str) {
// body.set(str, body.length(), ::strlen(str));
int str_size = ::strlen(str);
body.set(str, cur_len, str_size);
cur_len += str_size;
}
char *Document::get_name() const {
return name;
}
long Document::size() const {
// return body.strlen();
return cur_len;
}
void Document::print_line(long offset) const {
char buf[100];
body.get(buf, offset, sizeof buf);
buf[sizeof buf - 1] = 0;
char *p = strchr(buf, '\n');
if (p) *++p = 0;
cout << buf;
}
void Document::finalize() {
Ref<Cite> p;
while (p = cited_by.delete_one()) {
p.update()->finalize();
SH_DO(p.destroy());
}
}
#define MODULE_CODE #include "stree.h"
#!/bin/sh
# --------------------------------------------------------------- #
# -- Copyright (c) 1994, 1995 Computer Sciences Department, -- #
# -- University of Wisconsin-Madison, subject to the terms -- #
# -- and conditions given in the file COPYRIGHT. All Rights -- #
# -- Reserved. -- #
# --------------------------------------------------------------- #
# $Header: /p/shore/shore_cvs/src/examples/stree/swc,v 1.3 1995/04/26 11:03:06 solomon Exp $
mountpoint=/shoremnt
program=stree
if test $1x = -ix
then
program=doc_index
shift
fi
if test $# -ne 1
then
echo "usage: $0 [-i] keyword"
exit 1
fi
prog_path=`pwd`/$program
cd $mountpoint/$program
wc `$prog_path -l $1 | sort -u`
#!/bin/sh
# --------------------------------------------------------------- #
# -- Copyright (c) 1994, 1995 Computer Sciences Department, -- #
# -- University of Wisconsin-Madison, subject to the terms -- #
# -- and conditions given in the file COPYRIGHT. All Rights -- #
# -- Reserved. -- #
# --------------------------------------------------------------- #
# $Header: /p/shore/shore_cvs/src/examples/stree/sedit,v 1.3 1995/04/26 11:03:03 solomon Exp $
mountpoint=/shoremnt
program=stree
if test $1x = -ix
then
program=doc_index
shift
fi
if test -t 0
then
edit=${EDITOR:-emacs}
else
edit="echo EDIT"
fi
if test $# -ne 1
then
echo "usage: $0 [-i] keyword"
exit 1
fi
prog_path=`pwd`/$program
files="`$prog_path -l $1`" || exit 1
if [ -n "$files" ]
then
cd $mountpoint/$program
$edit $files
else
echo $1 not found
fi
// Member functions of the DocIndex class
#include <iostream.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "doc_index.h"
extern Ref<Pool> nodes;
static int getword(const char *&p, char *res, int size);
extern int verbose; // defined in main.C
void DocIndex::initialize() {
SH_DO(ind.init(UniqueBTree));
}
void DocIndex::insert(char *s, Ref<Cite> c) {
Ref<Word> w;
bool found;
SH_DO(ind.find(s,w,found));
if (!found) {
w = new(nodes) Word;
w.update()->initialize(s);
SH_DO(ind.insert(s,w));
}
w.update()->occurs_on(c);
}
void DocIndex::insert_file(char *fname) {
shrc rc;
if (verbose)
cout << "Indexing file " << fname << endl;
// Open input file
ifstream in(fname);
if (!in) {
perror(fname);
SH_ABORT_TRANSACTION(rc);
}
// do a unix stat to get the total size of the file.
struct stat in_stat;
if (stat(fname,&in_stat)) {
perror(fname);
SH_ABORT_TRANSACTION(rc);
}
// Create target document
// Strip leading path from file name;
char *base_name = strrchr(fname, '/');
if (base_name)
base_name++;
else
base_name = fname;
Ref<Document> doc;
rc = doc.new_persistent(base_name, 0644, doc);
if (rc) {
perror(base_name);
SH_ABORT_TRANSACTION(rc);
}
doc.update()->initialize(base_name,in_stat.st_size);
// for each line of the document ...
char linebuf[1024];
while (in.getline(linebuf, sizeof linebuf -1)) {
long off = doc->size();
// copy the line to the body of the document
doc.update()->append(linebuf);
doc.update()->append("\n");
// allocate a new Cite object for this line
Ref<Cite> cite = new (nodes) Cite;
cite.update()->initialize(doc, off);
// for each word on the line ...
char word[100];
const char *p = linebuf;
while (getword(p, word, sizeof word)) {
// link the citation to the word
insert(word, cite);
}
}
}
Ref<Word> DocIndex::find(char *str) const {
Ref<Word> w;
bool found;
SH_DO(ind.find(str,w,found));
if (found)
return w;
return NULL;
}
// Copy a word of at most SIZE characters (including terminating null)
// in to the buffer starting at RES. Start searching at location P.
// Words are delimited by white space. The result is translated to lower
// case, with all non-letters eliminated.
// P is updated to point to the first character not copied.
// The result is 1 if a word is found, 0 if '\0' is encountered first.
static int getword(const char *&p, char *res, int size) {
for (;; ) {
// skip leading white space
while (isspace(*p))
p++;
// check for eoln
if (*p == 0)
return 0;
// gather non-space characters, translating to lower case and
// ignoring non-alpha characters
int len;
for (len = 0; len < size-1 && *p && !isspace(*p); p++) {
if (isupper(*p))
res[len++] = tolower(*p);
else if (islower(*p))
res[len++] = *p;
}
if (len > 0) {
res[len] = 0;
return 1;
}
// otherwise, word was all digits and punctuation, so try again.
}
}
void DocIndex::delete_word(sdl_string w) {
int count;
SH_DO(ind.remove(w,count));
if (verbose)
cout << "deleted " << count << (count==1 ? " copy" : " copies")
<< " of word '" << (char *)w << "' from the index" << endl;
}
void DocIndex::print() const {
// index_iter<typeof(ind)> iterator(ind);
IndexScanIter<sdl_string,Ref<Word> > iterator(this->ind);
SH_DO(iterator.next());
while (!iterator.eof) {
cout << "key: '" << iterator.cur_key << "' value: ";
iterator.cur_val->print(1);
SH_DO(iterator.next());
}
}