/* ################################################################################ String indexing module First we create our non text-array variables, and subtract their sizes from the available memory. The remaining memory is then used to create a character array to hold the current section of the file The idea here is, firstly, to minimise disk accessing, which is costly, time-wise. To this end, the primary idea is to avoid reading the same section of the file more than once. Therefore we must find a way of identifying all occurances of a string in our read section before moving on to the next. The simplest and most efficient thing to do is to compare each char in the read data to the first letter of the query string, continuing on and comparing the rest of the query string to susequent characters if a match is detected. However, since the function does not necessarily read the entire file in one go, we must be careful to create an algorithm that will not be disrupted by the re-filling of the character array from the next section of the file. Assumptions: *The passed-in pointers do not count against the memory limit (just like the things they pint to) as they are generated and held outside the module ################################################################################ */ #include #include #include //#include using namespace std; /* ################################################################################ We place these functions within a class, where we use pre-defined variables (ie: not defined within the functions), so that we may more easily manage our memory useage by way of checking the sizeof() of the class. We could simply create a global function, but then we would have to subtract multiple sizeof()s from the memory total, increasing code size ################################################################################ */ class StringSpotter { public: void getIndices(string*, string*, list*, unsigned int*); //the requested function private: unsigned int arraySize; //tracks our memory useage char* memBlockStart; //c-string to hold the data we read from the file int currChar; //counter for character array ifstream inFile; //file reader int qstrCounter; //counter for when we have a primary-match and //need to test against all characters in the //query string long charsTot; //used to get our file index for a perfect match //to the query string void nextSection(); //private utility function to avoid a small amount //of redundant code bool fail; //for testing the external pointers }; void StringSpotter::getIndices( string* fName, //name of the file to open string* qString, //the query string list* indexListPtr, //pointer to our list of indices unsigned int* memSize) //size of our memory allocation { fail = false; if(fName == NULL) { //whichever error code you use goes here fail = true; } if(qString == NULL) { //ditto} fail = true; } if(indexListPtr == NULL) { //ditto} fail = true; } if(memSize == NULL) { //ditto fail = true; } else { arraySize = *memSize - sizeof(*this); //amount of the allowed memory we //have left after taking this //object's footprint into account try { memBlockStart = new char[arraySize]; //making our array } catch(bad_alloc&) { //error code here } } try { inFile.open(fName->c_str(), ifstream::in); //get our file } catch(ios_base::failure&) { //error code here; } charsTot = 0; if(inFile.good() && !fail) //if there's no problem with the file or pointers { nextSection(); //as implemented below, sets array //counter to zero and reads in more //of the file while(inFile.gcount() > 0) { // while there are still letters being //read (we haven't hit the eof) while(currChar < inFile.gcount()) { //while we've got characters left //to deal with in the current file section if(qString->at(0) == memBlockStart[currChar]) { //if we get a match to the first letter in the query string qstrCounter = 0;//our step-thtrough counter for the query string while(qString->at(qstrCounter) == memBlockStart[currChar]) { //while the current letter in the query string //matches the character we're looking at in the array if(qstrCounter == qString->length()-1) { //if we've matched right up to the end of the query string indexListPtr->push_back(charsTot-qString->length()+1); //push back our index qstrCounter = 0; //start checking using the //first char in the query again } else { qstrCounter++; //move one char along the query string } currChar++; //up one character in the array charsTot++; //up one character in total if(currChar >= inFile.gcount()) //if we've gone beyond our array space { nextSection(); //reset array counter to zero //and load the next file section } } } else { charsTot++; //if no match, move along currChar++; //ditto } } nextSection(); //load the next section when we come to the end of our array } } else { //Put your preferred error code here } inFile.close(); //close our file delete[] memBlockStart; //FREE THE ARRAY '07! } void StringSpotter::nextSection() { try { inFile.readsome(memBlockStart, arraySize); //read a block from the file } catch(ios_base::failure&) { //your error code here } currChar = 0; //array counter reset to 0 }