#include #include #include #include #include #include "windows.h" using namespace std; /******************************************************************** * Created by Ophir Gottlieb * ********************************************************************/ int AttributeSize(); char DelimiterType(); string OpenFiles(ifstream &inFile, ofstream &outFile); void SpaceDelimiter(ifstream &inFile, ofstream &outFile, int size, bool convert); void CommaDelimiter(ifstream &inFile, ofstream &outFile, int size, bool convert); bool Convert(); char Verify(); void PrintSummary(string files, char type, int size, bool convert); void Run(); void main(void) { Run(); } /******************************************************************** * Function: Run * * ---------------------- * * Runs the functions within the program. * ********************************************************************/ void Run() { ifstream inFile; ofstream outFile; string files = OpenFiles(inFile, outFile); int size = AttributeSize() + 1; bool convert = Convert(); char type = DelimiterType(); char verify = Verify(); if(verify == 'y' || verify == 'Y') PrintSummary(files, type, size, convert); switch(type){ case ('S'): SpaceDelimiter(inFile, outFile, size, convert); case ('s'): SpaceDelimiter(inFile, outFile, size, convert); case ('C'): CommaDelimiter(inFile, outFile, size, convert); case ('c'): CommaDelimiter(inFile, outFile, size, convert); } inFile.close(); outFile.close(); cout << "File formatting complete." << endl; system("pause"); } /******************************************************************** * Function: OpenFiles * * ---------------------- * * Repeatedly prompts the user until he/she types in the name * * of a file that actually exists (and can be read). * * The specified ifstream is then bound to that file before * * returning to the caller. Also gets name for output file. * ********************************************************************/ string OpenFiles(ifstream &inFile, ofstream &outFile) { string files; cout << "This program will take either space, tab or comma delimited" << endl << "files and put them into the required formatting for SVMlight." << endl; cout << endl << "Enter a file name for the new formatted text file: "; string outfilename; getline(cin, outfilename); outFile.open(outfilename.c_str()); files = "Output: " + outfilename; while (true) { cout << "Enter the name of the input data file: "; string infilename; getline(cin, infilename); inFile.open(infilename.c_str()); files = files + " Input: " + infilename; if (!inFile.fail()) return files; inFile.clear(); cout << "Unable to open file named " << infilename << endl << "Please try again." << endl; } return files; } /******************************************************************** * Function: AttributeSize * * ---------------------- * * Gets the number of attributes for the input file. * ********************************************************************/ int AttributeSize() { int size = 1; cout << "How many attributes (exlcuding the classifier) are in the input file? "; cin >> size; return size; } /******************************************************************** * Function: DelimiterType * * ---------------------- * * Repeatedly prompts the user until he/she types in the name * * a valid response for the type of delimiter the current text * * file has. * ********************************************************************/ char DelimiterType() { char type; bool validAnswer = false; while(validAnswer == false) { cout << endl << "How is the current input file delimited: " << endl << "s: Space or Tab delimited" << endl << "c: Comma delimited" << endl; cin >> type; if (type == 't' || type == 'T'){ type = 's'; validAnswer = true; } else if (type == 's' || type == 'S' || type == 'c' || type == 'C') validAnswer = true; else cout << "Incorrect entry, please choose s, t or c" << endl; } return type; } /******************************************************************** * Function: Convert * * ---------------------- * * Aks the user if he wants the reponse values of 0 to be converted * * to -1. * ********************************************************************/ bool Convert() { bool convert = false; char answer; cout << "Would you like to convert the 0 classifications to -1? (y/n): "; cin >> answer; if(answer == 'y' || answer == 'Y') convert = true; return convert; } /******************************************************************** * Function: PrintSummary * * ---------------------- * * Repeats the user selections prior to conversion. * ********************************************************************/ void PrintSummary(string files, char type, int size, bool convert) { string change = "No"; if(convert == true) change = "Yes"; cout << endl << "Here is a summary of your Selections: " << endl; cout << "******************************************************************" << endl; cout << files << endl; cout << "Number of Attributes (not including response): " << size-1 << endl; cout << "Converting 0 values to -1 for classification: " << change << endl; cout << "******************************************************************" << endl; system("pause"); } /******************************************************************** * Function: Verify * * ---------------------- * * ASks the user if he wants to run PrintSummary. * ********************************************************************/ char Verify() { char verify; cout << "Do you want to verify your selections? (y/n)"; cin >> verify; return verify; } /******************************************************************** * Function: SpaceDelimiter * * ---------------------- * * Correctly formats a space (or tab) delimited file. * ********************************************************************/ void SpaceDelimiter(ifstream &inFile, ofstream &outFile, int size, bool convert) { double data; int i = 1, counter = 1; while(!(inFile.eof())){ if (inFile.eof()) return; inFile >> data; if (i == 1){ if(convert == false) { outFile << data << " "; i++; } else { if (data == 0) { outFile << -1 << " "; i++; } else { outFile << data << " "; i++; } } } else if (i % size != 0){ outFile << i-1 << ":" << data << " "; i++; } else if(i % size == 0){ outFile << i-1 << ":" << data << endl; i = 1; if (counter % 100 == 0){ cout << "Reading Row: " << counter << ".." << endl; } counter++; } } } /******************************************************************** * Function: CommaDelimiter * * ---------------------- * * Correctly formats a comma delimited file. * ********************************************************************/ void CommaDelimiter(ifstream &inFile, ofstream &outFile, int size, bool convert) { string data; int i = 1, counter = 1; while(!(inFile.eof())){ if (i == 1){ if (inFile.eof()) return; getline (inFile, data, ','); if(convert == false) { outFile << data << " "; i++; } else { if (data == "0") { outFile << -1 << " "; i++; } else { outFile << data << " "; i++; } } } else if (i % size != 0){ getline (inFile, data, ','); outFile << i-1 << ":" << data << " "; i++; } else if(i % size == 0){ getline (inFile, data, '\n'); outFile << i-1 << ":" << data << endl; i = 1; if (counter % 100 == 0){ cout << "Reading Row: " << counter << ".." << endl; } counter++; } } }