#include "stdio.h" #include "stdlib.h" #include "assert.h" #include "string.h" #include "ctype.h" #include "GSegmentTree.h" #include "GArray.h" #include "ScribeDefs.h" #include "GContainers.h" #include "GString.h" #include "GFile.h" #define MaxDirSize 50000 int Args = 0; char **Arg = 0; #ifndef WIN32 #include #include #include #include #include #include typedef int HANDLE; typedef long LONG; typedef uint DWORD; typedef long long int64; typedef unsigned long long uint64; #define INVALID_HANDLE_VALUE -1 uint64 GetTickCount() { timeval tv; gettimeofday(&tv, 0); return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); } void CloseHandle(HANDLE h) { close(h); } void _lgi_assert(bool b, char *test, char *file, int line) { if (!b) { printf("%s:%i - Assert: %s\n", file, line, test); exit(-1); } } LgiFunc char *LgiNewUtf16To8(char16 *In, int InLen) { LgiAssert(0); return 0; } #endif HANDLE OpenFile(char *f, bool write) { HANDLE h = INVALID_HANDLE_VALUE; #ifdef WIN32 h = CreateFile( f, write ? GENERIC_WRITE : GENERIC_READ, 0, 0, write ? CREATE_ALWAYS : OPEN_EXISTING, 0, 0); #else int Flags = write ? O_RDWR : O_RDONLY; struct stat s; if (lstat(f, &s)) { Flags |= O_CREAT; } h = open(f, Flags | O_LARGEFILE, S_IRUSR | S_IWUSR); if (h == INVALID_HANDLE_VALUE) { printf("%s:%i - OpenFile(%s) failed with %i\n", __FILE__, __LINE__, f, errno); } #endif return h; } uint64 Seek(HANDLE f, int64 p, int w = SEEK_SET) { LONG h = p >> 32; #ifdef WIN32 DWORD l = SetFilePointer(f, p & 0xffffffff, &h, w); return l | ((uint64) h << 32); #else return lseek64(f, p, w); #endif } uint64 Tell(HANDLE f) { LONG h = 0; #ifdef WIN32 DWORD l = SetFilePointer(f, 0, &h, SEEK_CUR); return l | ((uint64) h << 32); #else return lseek64(f, 0, SEEK_CUR); #endif } uint64 Read(void *p, uint64 s, HANDLE f) { DWORD r = 0; #ifdef WIN32 ReadFile(f, p, s, &r, 0); #else r = read(f, p, s); #endif return r; } uint64 ReadAt(uint64 pos, void *ptr, uint64 size, HANDLE file) { DWORD r = 0; uint64 cur = Tell(file); Seek(file, pos); #ifdef WIN32 ReadFile(file, ptr, size, &r, 0); #else r = read(file, ptr, size); #endif Seek(file, cur); return r; } uint64 Write(void *p, uint64 s, HANDLE f) { DWORD w = 0; #ifdef WIN32 WriteFile(f, p, s, &w, 0); #else w = write(f, p, s); #endif return w; } uint64 GetSize(HANDLE f) { uint64 s = 0; #ifdef WIN32 DWORD l, h; if ((l = GetFileSize(f, &h)) != 0xffffffff) { s = l | ((uint64)h<<32); } #else int64 Here = lseek64(f, 0, SEEK_CUR); s = lseek64(f, 0, SEEK_END); lseek64(f, Here, SEEK_SET); #endif return s; } bool Eof(HANDLE f) { return Tell(f) >= GetSize(f); } void LgiFormatSize(char *Str, uint64 Size) { int K = 1024; int M = K * K; int G = K * M; if (Size < K) { sprintf(Str, "%i bytes", Size); } else if (Size < 10 * K) { sprintf(Str, "%.2f K", (double) (int64)Size / K); } else if (Size < M) { sprintf(Str, "%i K", Size / K); } else if (Size < G) { sprintf(Str, "%.2f M", (double) (int64)Size / M); } else { sprintf(Str, "%.2f G", (double) (int64)Size / G); } } //////////////////////////////////////////////////////////////////////////////// // Utils bool GetOption(char *Opt, char *Value = 0) { for (int i=1; i= sizeof(StorageHeader)) { Seek(f, Pos); StorageItemHeader Node; Read(&Node, sizeof(Node), f); if (Node.Magic == STORAGE2_ITEM_MAGIC) { if (!Seek(f, Node.DataLoc)) { printf("Dumping node (at %i) data (at %i):\n", Pos, Node.DataLoc); int Magic = ReadInt32(f); if (Magic == Node.Type) { int Fields = ReadInt32(f); printf("%i fields:\n\n", Fields); for (int i=0; i= sizeof(StorageHeader)) { Seek(f, Pos); StorageItemHeader Node; Read(&Node, sizeof(Node), f); if (Node.Magic != STORAGE2_ITEM_MAGIC) { printf("Error: node magic number wrong (at %i)\n", Pos); } else { Stats->Nodes++; Status = true; switch (Node.Type) { case MAGIC_MAIL: Stats->Email++; break; case MAGIC_FOLDER: case MAGIC_FOLDER_2: Stats->Folder++; break; case MAGIC_CONTACT: Stats->Contact++; break; } printf("Node at %i: Type=%X(%s) Data=%i Dir=%i\n", Pos, Node.Type, ItemTypeName((ScribeItemTypes)Node.Type), Node.DataSize, Node.DirCount); if (Node.DirCount > 0) { for (int i=0; i Children; int ActualFlds; char *AttachmentName; int ParentGrouping; int DataLen; char *Data; Node(int l, StorageItemHeader *h) { Loc = l; Header = *h; Fld = 0; Owner = 0; NewLoc = 0; // Spam = 0; DataLen = 0; Data = 0; AttachmentName = 0; ActualFlds = -1; ParentGrouping = -1; } Node(char *Name, int Type) { // Spam = 0; Loc = 0; Data = 0; DataLen = 0; NewLoc = 0; memset(&Header, 0, sizeof(Header)); Owner = 0; Header.Type = MAGIC_FOLDER_2; AttachmentName = 0; Fld = NEW(Folder); if (Fld) { Fld->Type = Type; Fld->Name = NewStr(Name); if (Fld->Name) { int NameLen = strlen(Fld->Name); DataLen = sizeof(uint32) + // magic sizeof(uint32) + // # of items sizeof(uint16) + // ItemType sizeof(uint32) + sizeof(uint32) + sizeof(uint16) + // Name sizeof(uint32) + NameLen; Data = NEW(char[DataLen]); if (Data) { uint32 *p = (uint32*)Data; *p++ = MAGIC_FOLDER_2; *p++ = 2; // Item type *((uint16*&)p)++ = FIELD_FOLDER_TYPE; *p++ = sizeof(Fld->Type); *p++ = Fld->Type; // Name *((uint16*&)p)++ = FIELD_FOLDER_NAME; *p++ = NameLen; memcpy(p, Fld->Name, NameLen); } } } } ~Node() { DeleteObj(Fld); DeleteArray(Data); DeleteArray(AttachmentName); } int Type() { return Header.Type; } int Size() { return Data ? DataLen : Header.DataSize; } bool IsFolder() { return ( Header.Type == MAGIC_FOLDER OR Header.Type == MAGIC_FOLDER_2 ); } void GetAttachmentName(HANDLE f) { uint32 Old = Tell(f); Seek(f, Header.DataLoc + 12); uint32 Len = 0; if (Read(&Len, sizeof(Len), f) == sizeof(Len)) { if (Len > 0 && Len < 256) { if (AttachmentName = new char[Len+1]) { Read(AttachmentName, Len, f); AttachmentName[Len] = 0; } } } Seek(f, Old); } void SetOwner(Node *n) { if (Type() AND n->Type()) { if (IsFolder()) { assert(n->IsFolder()); } if (Owner) { assert(0); } Owner = n; if (Owner) { Owner->Children[Owner->Children.Length()] = this; } } } char *TypeName() { return ItemTypeName((ScribeItemTypes) Type()); } bool ReadVar(HANDLE f, uint16 &i) { return Read(&i, 2, f) == 2; } bool ReadVar(HANDLE f, uint32 &i) { return Read(&i, 4, f) == 4; } bool ReadVar(HANDLE f, char *&s) { uint32 Size; if (ReadVar(f, Size)) { s = NEW(char[Size+1]); if (s) { if (Read(s, Size, f) == Size) { s[Size] = 0; return true; } DeleteArray(s); } } return false; } bool ReadFolder(HANDLE f) { bool Status = false; if (Header.DataLoc AND Seek(f, Header.DataLoc) == Header.DataLoc) { uint32 Magic; if (ReadVar(f, Magic)) { if (Magic == MAGIC_FOLDER) { if (NOT Fld) Fld = NEW(Folder); if (Fld) { ReadVar(f, Fld->Type); uint32 u; ReadVar(f, u); ReadVar(f, Fld->Name); } } else if (Magic == MAGIC_FOLDER_2) { uint32 Count; if (ReadVar(f, Count)) { int i = 0; while ( Tell(f) < Header.DataLoc + Header.DataSize AND i < Count ) { uint16 Id; if (ReadVar(f, Id)) { switch (Id) { case OLD_FIELD_FOLDER_TYPE: case FIELD_FOLDER_TYPE: { uint32 Size; if (ReadVar(f, Size)) { assert(Size == 4); if (NOT Fld) Fld = NEW(Folder); if (Fld) { ReadVar(f, Fld->Type); } } break; } case OLD_FIELD_FOLDER_NAME: case FIELD_FOLDER_NAME: { char *s = 0; if (ReadVar(f, s)) { if (NOT Fld) Fld = NEW(Folder); if (Fld) { Fld->Name = s; Status = true; } } break; } default: { uint32 Size; if (ReadVar(f, Size)) { Seek(f, Size, SEEK_CUR); } break; } } } i++; } } } } } /* if (Fld AND stricmp(Fld->Name, "Spam") == 0) { Spam = true; } */ return Status; } }; template class NodeTree { class NodeStore { public: NodeStore *Left, *Right; T Value; bool Insert(NodeStore *i) { if (i->Value < Value) { if (Left) { return Left->Insert(i); } else { Left = i; return true; } } else if (i->Value > Value) { if (Right) { return Right->Insert(i); } else { Right = i; return true; } } else { return false; } } }; int Used; int Nodes; int Block; NodeStore *Current; List Blocks; NodeStore *Root; public: NodeTree() { Used = 0; Nodes = 0; Current = 0; Root = 0; Block = 1024; } ~NodeTree() { Blocks.DeleteArrays(); } int GetUsed() { return Used; } int Sizeof() { return Blocks.Length() * (sizeof(NodeStore) * Block); } bool Insert(T Value) { if (NOT Current OR Used >= Block-1) { Current = NEW(NodeStore[Block]); Blocks.Insert(Current); Used = 0; } if (Current) { NodeStore *New = Current + Used; New->Left = 0; New->Right = 0; New->Value = Value; if (Root) { if (Root->Insert(New)) { Used++; return true; } } else { Root = New; Used = 1; return true; } } return false; } }; bool VerifyField(uint32 Type, int Field) { switch (Type) { case MAGIC_MAIL: { switch (Field) { case FIELD_FROM: case FIELD_REPLY: case FIELD_DATE_RECEIVED: case FIELD_DATE_SENT: case FIELD_SUBJECT: case FIELD_MESSAGE_ID: case FIELD_INTERNET_HEADER: case FIELD_FLAGS: case FIELD_PRIORITY: case FIELD_CHARSET: case FIELD_MARK_COLOUR: case FIELD_ALTERNATE_HTML: case FIELD_LABEL: case FIELD_CODE_PAGE: case FIELD_REFERENCES: case FIELD_SERVER_UID: case FIELD_HTML_CHARSET: case FIELD_ACCOUNT_ID: case FIELD_TEXT: case FIELD_TO: case FIELD_CC: case FIELD_BCC: case FIELD_ADDRESSED_TO: return true; } break; } case MAGIC_CONTACT: { switch (Field) { case FIELD_TITLE: case FIELD_FIRST_NAME: case FIELD_LAST_NAME: case FIELD_EMAIL_ADDR: case FIELD_NICK: case FIELD_SPOUSE: case FIELD_NOTE: case FIELD_UID: case FIELD_TIMEZONE: case FIELD_HOME_STREET: case FIELD_HOME_SUBURB: case FIELD_HOME_POSTCODE: case FIELD_HOME_STATE: case FIELD_HOME_COUNTRY: case FIELD_HOME_PHONE: case FIELD_HOME_MOBILE: case FIELD_HOME_IM: case FIELD_HOME_FAX: case FIELD_HOME_WEBPAGE: case FIELD_WORK_STREET: case FIELD_WORK_SUBURB: case FIELD_WORK_POSTCODE: case FIELD_WORK_STATE: case FIELD_WORK_COUNTRY: case FIELD_WORK_PHONE: case FIELD_WORK_MOBILE: case FIELD_WORK_IM: case FIELD_WORK_FAX: case FIELD_COMPANY: case FIELD_WORK_WEBPAGE: case FIELD_CONTACT_CUST_FLD1: case FIELD_CONTACT_CUST_VAL1: case FIELD_CONTACT_CUST_FLD2: case FIELD_CONTACT_CUST_VAL2: case FIELD_CONTACT_CUST_FLD3: case FIELD_CONTACT_CUST_VAL3: case FIELD_CONTACT_CUST_FLD4: case FIELD_CONTACT_CUST_VAL4: case FIELD_PLUGIN_ASSOC: case FIELD_ALT_EMAIL: return true; } break; } case MAGIC_ATTACHMENT: { switch (Field) { case FIELD_MIME_TYPE: case FIELD_CONTENT_ID: return true; } } } return false; } bool VerifyObject(HANDLE f, uint32 Type, int64 Pos, int64 &Size, bool &RewriteCount, int &ActualFlds, char *&AttachmentName) { bool Status = false; bool Debug = 0; switch (Type) { case MAGIC_MAIL: case MAGIC_CONTACT: { int64 Old = Tell(f); Seek(f, Pos + 4, SEEK_SET); Size = 4; // The magic number int32 Fields = 0; ActualFlds = 0; if (Read(&Fields, sizeof(Fields), f) == sizeof(Fields) AND Fields > 0 AND Fields < 1000) { Size += sizeof(Fields); // Add the field size while (ActualFlds < Fields && !Eof(f)) { uint16 Id = 0; uint32 CurSize = Tell(f) - Pos; if (Read(&Id, sizeof(Id), f) == sizeof(Id)) { if (VerifyField(Type, Id)) { int32 s; if (Read(&s, sizeof(s), f) == sizeof(s) AND s >= 0 AND s < (1 << 20)) { // Valid field... Size += 6 + s; Seek(f, s, SEEK_CUR); Status = true; ActualFlds++; } else { // Not a valid size... Seek(f, Tell(f) - sizeof(s) - sizeof(Id)); if (Debug) printf("Invalid Mail Field Size=%u (Pos=%i)\n", s, (int)Tell(f)); break; } } else { // Not a valid field... Seek(f, Tell(f) - sizeof(Id)); if (Debug) printf("Invalid Mail Field=%u (Pos=%i)\n", Id, (int)Tell(f)); break; } } else { // printf("%s:%i - Read failed.\n", __FILE__, __LINE__); break; } } } if (Status) { RewriteCount = ActualFlds != Fields; int64 FileDiff = Tell(f) - Pos; if (FileDiff != Size) { printf("%s:%i - Size error in validate (%i != %i).\n", __FILE__, __LINE__, (int)Size, (int)FileDiff); } /* printf("Validate %s: %i bytes (Rewrite=%i Fields=%i of %i @ %i)\n", ItemTypeName((ScribeItemTypes)Type), (int)Size, RewriteCount, ActualFlds, Fields, (int)Pos); */ } Seek(f, Old, SEEK_SET); break; } case MAGIC_ATTACHMENT: { int64 Old = Tell(f); Seek(f, Pos + 4, SEEK_SET); Size = 4; // The magic number uint32 i[3]; if (Read(i, sizeof(i), f) == sizeof(i)) { if (i[0] >= 0 AND // content i[0] <= 4 AND i[1] >= 0 AND // size i[1] <= (40<<20) AND i[2] >= 0 AND // name len i[2] <= 255) { // Seems ok... Size += sizeof(i) + i[2] + i[1]; Status = true; AttachmentName = NEW(char[i[2]+1]); if (AttachmentName) { Read(AttachmentName, i[2], f); AttachmentName[i[2]] = 0; } else { Seek(f, i[2], SEEK_CUR); } // Seek over data... // printf("Attachment '%s' is @ %u, length %u\n", AttachmentName, (uint32)Tell(f), (uint32)i[1]); Seek(f, i[1], SEEK_CUR); while (true) { uint16 Id; if (Read(&Id, sizeof(Id), f) == sizeof(Id)) { if (VerifyField(Type, Id)) { uint32 Bytes; if (Read(&Bytes, sizeof(Bytes), f) == sizeof(Bytes)) { if (Bytes >= 0 AND Bytes < 10000) { Size += 6 + Bytes; Seek(f, Bytes, SEEK_CUR); } else { Seek(f, Tell(f) - sizeof(Bytes)); break; } } else { break; } } else { Seek(f, Tell(f) - sizeof(Id)); break; } } else break; } int FileDiff = Tell(f) - Pos; if (Size != FileDiff) { printf("Attach size error %i != %i\n", Size, FileDiff); } // printf("Attachment ok, '%s' %i bytes\n", Name, i[1]); } else { // printf("Attachment sanity check failed: %i,%i,%i\n", i[0], i[1], i[2]); } } else { printf("%s:%i - Read failed.\n", __FILE__, __LINE__); } Seek(f, Old, SEEK_SET); break; } } return Status; } void Scan(HANDLE f, GArray &Nodes) { // Skip header... uint64 Offset = sizeof(StorageHeader); Seek(f, 0, SEEK_END); uint64 FileSize = Tell(f); Seek(f, Offset, SEEK_SET); int Len = 8 << 20; char *Buf = NEW(char[Len]); if (Buf) { uint64 Used = 0; int Start = GetTickCount(); int Time = Start; NodeTree t; // Counts int DupeDataLoc = 0; int InvalidType = 0; int NoDataOrDir = 0; int BadDataPtr = 0; int BadDirPtr = 0; int BadDataMagic = 0; int GoodData = 0; while (NOT Eof(f)) { Offset = Tell(f) - Used; if (GetTickCount() > Time + 1000) { char o[64], treesize[64]; Time = GetTickCount(); double Elasped = ((double)(Time - Start)) / 1000; double Rate = ((double)(int64)Offset) / Elasped; double TimeLeft = (double)(int64)(FileSize - Offset) / Rate; double Total = TimeLeft + Elasped; LgiFormatSize(o, Offset); LgiFormatSize(treesize, t.Sizeof()); printf( "\r%s Nodes=%i Dupes=%i TreeSize=%s " "Elasped=%i:%02.2i:%02.2i " "Remaining=%i:%02.2i:%02.2i " "Total=%i:%02.2i:%02.2i" "", o, Nodes.Length(), DupeDataLoc, treesize, ((int)Elasped)/3600, ((int)Elasped)/60, ((int)Elasped)%60, ((int)TimeLeft)/3600, ((int)TimeLeft)/60, ((int)TimeLeft)%60, ((int)Total)/3600, ((int)Total)/60, ((int)Total)%60); } int r = Read(Buf + Used, Len - Used, f); Used += r; if (Used <= 0) break; else { int i; for (i=0; iType)) == 0) { InvalidType++; } else if (h->DataLoc == 0 AND h->DirLoc == 0) { NoDataOrDir++; } else { // Type is valid int ObjectLoc = Offset + i; if ( h->DataLoc == 0 OR ( h->DataLoc >= 64 AND h->DataLoc < FileSize AND h->DataSize >= 0 AND h->DataSize < (40 << 20) ) ) { if ( h->DirLoc == 0 OR ( h->DirLoc >= 64 AND h->DirLoc < FileSize AND h->DirCount >= 0 AND h->DirCount < MaxDirSize AND h->DirAlloc >= 0 AND h->DirAlloc < MaxDirSize ) ) { // Valid node if (NOT h->DataLoc OR t.Insert(h->DataLoc)) { bool DataOk = h->DataLoc == 0; if (h->DataLoc) { uint32 m; ReadAt(h->DataLoc, &m, sizeof(m), f); if (m == MAGIC_FOLDER OR m == MAGIC_FOLDER_2) { DataOk = h->Type == MAGIC_FOLDER OR h->Type == MAGIC_FOLDER_2; } else { DataOk = m == h->Type; } if (NOT DataOk) { // printf(" BadDataMagic: %08.8x (Type=%s Header=%i Data=%i)\n", m, Type, ObjectLoc, h->DataLoc); } } if (DataOk) { Node *n = NEW(Node(ObjectLoc, h)); if (n) { Nodes.Add(n); } } else { BadDataMagic++; } } else if (h->DataLoc) { DupeDataLoc++; } } else { BadDirPtr++; /* printf("\t%s @ %i\n", Type, ObjectLoc); printf("\tInvalid Directory Loc/Count/Alloc @ Loc %u. DirLoc=%u DirCount=%u DirAlloc=%u\n", ObjectLoc, h->DirLoc, h->DirCount, h->DirAlloc); */ } } else { BadDataPtr++; /* printf("\t%s @ %i\n", Type, ObjectLoc); printf("\tInvalid DataSeg Loc/Size @ Loc %u. DataLoc=%u DataSize=%u\n", ObjectLoc, h->DataLoc, h->DataSize); */ } } break; } case MAGIC_MAIL: case MAGIC_FOLDER: case MAGIC_FOLDER_2: case MAGIC_CONTACT: case MAGIC_ATTACHMENT: { // Might be a detached object int64 Size = 0; int64 DataLoc = Offset + i; bool RewriteCount = false; int ActualFlds = 0; char *AttachmentName = 0; if (VerifyObject(f, i32, DataLoc, Size, RewriteCount, ActualFlds, AttachmentName)) { if (t.Insert(DataLoc)) { GoodData++; StorageItemHeader h; h.Magic = STORAGE2_ITEM_MAGIC; h.Type = i32; h.DataLoc = DataLoc; h.DataSize = Size; h.ParentLoc = 0; h.DirLoc = 0; h.DirCount = 0; h.DirAlloc = 0; Node *n = NEW(Node(0, &h)); if (n) { n->AttachmentName = AttachmentName; AttachmentName = 0; if (RewriteCount) { n->ActualFlds = ActualFlds; } Nodes.Add(n); } } else if (i32 == MAGIC_ATTACHMENT) { for (int n=0; nLoc == DataLoc) { Nodes[n]->AttachmentName = AttachmentName; AttachmentName = 0; break; } } } DeleteArray(AttachmentName); } break; } } } if (i < Used) { memmove(Buf, Buf + i, Used - i); Used = Used - i; } } } printf( " DupeDataLoc = %i\n" " InvalidType = %i\n" " NoDataOrDir = %i\n" " BadDirPtr = %i\n" " BadDataPtr = %i\n" " BadDataMagic = %i\n" " -------------------\n" " GoodData = %i\n", DupeDataLoc, InvalidType, NoDataOrDir, BadDirPtr, BadDataPtr, BadDataMagic, GoodData); DeleteArray(Buf); } } int FindNode(int Loc, GArray &n) { if (n.Length() == 0) { return -1; } else if (n.Length() == 1) { return n[0]->Loc == Loc ? 0 : -1; } int Low = 0, High = n.Length()-1; while (true) { if (n[Low]->Loc == Loc) { return Low; } if (n[High]->Loc == Loc) { return High; } int i = (High + Low) >> 1; assert(i != Low); assert(i != High); Node *Cur = n[i]; if (Cur->Loc == Loc) { return i; } else if (Cur->Loc > Loc) { // Choose lower half High = i; } else { // Choose upper half Low = i; } if (High - Low < 2) { return -1; } } } void PrintTree(GStream &f, Node *n, int Depth = 0) { if (n AND n->IsFolder()) { int i; for (i=0; iFld ? n->Fld->Name : 0, n->Loc, n->Data ? n->DataLen : n->Header.DataSize, n->Children.Length()); for (i=0; iChildren.Length(); i++) { Node *c = n->Children[i]; if (c->IsFolder()) { PrintTree(f, c, Depth+1); } } } } void ExportData(HANDLE Out, HANDLE In, Node *n, StorageItemHeader *h) { if (Out AND In AND n AND h) { uint32 Pos = Tell(Out); if (Pos == h->DataLoc) { if (n->Data) { Write(n->Data, n->DataLen, Out); } else { // printf("Data write starting at %i for %i bytes ", ftell(Out), n->Header.DataSize); Seek(In, n->Header.DataLoc); char Buf[2048]; for (uint32 i=0; iHeader.DataSize;) { int Len = min(n->Header.DataSize - i, sizeof(Buf)); int r = Read(Buf, Len, In); if (r == Len) { if (i == 0 AND n->ActualFlds >= 0) { int *p = (int*) Buf; p[1] = n->ActualFlds; } int w = Write(Buf, r, Out); if (w == r) { i += w; } else { assert(0); } } else { assert(0); break; } } // printf("finishing at %i\n", ftell(Out)); } } else { printf("Export data error: Pos=%i != DataLoc=%i\n", Pos, h->DataLoc); assert(0); } } } DWORD ExportTime = 0; int ExportDone = 0; struct FolderCounts { int NodeCount; int DataSize; int Mail; int Contacts; int Folders; }; void CountNodes(Node *n, FolderCounts &Counts) { for (int i=0; iChildren.Length(); i++) { CountNodes(n->Children[i], Counts); } Counts.NodeCount++; Counts.DataSize += sizeof(StorageItemHeader); Counts.DataSize += n->Header.DataSize; switch (n->Type()) { case MAGIC_MAIL: Counts.Mail++; break; case MAGIC_CONTACT: Counts.Contacts++; break; case MAGIC_FOLDER: case MAGIC_FOLDER_2: Counts.Folders++; break; } } void ExportNode(HANDLE Out, HANDLE In, Node *n, StorageItemHeader *h, FolderCounts &Counts) { if (Out AND In AND n) { if (GetTickCount() - ExportTime > 1000) { int64 Size = Tell(Out); printf("\r\t\t%I64imb, %i of %i, %i%%", Size>>20, ExportDone, Counts.NodeCount, ExportDone*100/Counts.NodeCount); ExportTime = GetTickCount(); } h->Magic = STORAGE2_ITEM_MAGIC; h->Type = n->Type(); assert(h->Type); // Write directory... StorageItemHeader *Dir = n->Children.Length() ? NEW(StorageItemHeader[n->Children.Length()]) : 0; if (Dir) { // Write a blank directory h->DirLoc = Tell(Out); h->DirCount = h->DirAlloc = n->Children.Length(); uint32 DirSize = h->DirCount * sizeof(StorageItemHeader); memset(Dir, 0, DirSize); Write(Dir, DirSize, Out); // Write children uint32 CurSize = 0; for (int i=0; iChildren.Length(); i++) { Node *c = n->Children[i]; c->NewLoc = h->DirLoc + (i * sizeof(StorageItemHeader)); Dir[i].ParentLoc = n->NewLoc; // CurSize = Tell(Out); // printf("Write %s %i -> %i, %i bytes @ %imb\n", c->TypeName(), c->Loc, c->NewLoc, c->Size(), CurSize>>20); ExportNode(Out, In, c, Dir + i, Counts); } // Write the directory Seek(Out, h->DirLoc); Write(Dir, DirSize, Out); Seek(Out, 0, SEEK_END); } if (n->Header.DataLoc OR n->Data != 0) { h->DataLoc = Tell(Out); h->DataSize = n->Data ? n->DataLen : n->Header.DataSize; // Write data... ExportData(Out, In, n, h); ExportDone++; } DeleteArray(Dir); } } class GNodeSeg : public GSegment { public: Node *n; GNodeSeg(Node *node) { n = node; Start = n->Header.DataLoc; Length = n->Header.DataSize; } }; int64 Export(HANDLE f, GArray &Nodes, char *InFile) { int64 ExportSize = 0; // Build list of folders first... GArray Folders; Node OrphanEmail("Orphan Email", MAGIC_MAIL); Node OrphanAttachments("Orphan Attachments", MAGIC_MAIL); Node OrphanContacts("Orphan Contacts", MAGIC_CONTACT); Node OrphanCalendars("Orphan Calendar", MAGIC_CALENDAR); GArray ParentGroups; printf("\tScanning for folders...\n"); int i; for (i=0; iIsFolder()) { Folders.Add(n); if (n->ReadFolder(f)) { printf("\t\t%i: Folder '%s' with %i children (%s containing %s)\n", n->Loc, n->Fld->Name, n->Header.DirCount, ItemTypeName((ScribeItemTypes) n->Type()), ItemTypeName((ScribeItemTypes) n->Fld->Type)); } else { printf("\t\t%i: Failed folder load. (%s)\n", n->Loc, ItemTypeName((ScribeItemTypes) n->Type())); n->Header.Type = 0; } } } printf("\t%i folders found.\n", Folders.Length()); if (Folders.Length() == 0) { // Create mailbox to hang stuff off Folders[0] = NEW(Node("Mailbox", MAGIC_NONE)); } // Check that node list is sorted bool Sorted = true; int CurLoc = 0; for (i=0; iLoc) { int Here = Nodes[i]->Loc; if (Here > CurLoc) { CurLoc = Here; } else { Sorted = false; break; } } } printf("\tNodes sorted: %i\n", Sorted); if (NOT Sorted) { LgiAssert(!"Not sorted!!"); return 0; } // Fix up as many parent pointers as we can printf("\tCorrecting tree errors... (%i nodes)\n", Nodes.Length()); int ParentPtrCorrect = 0; int DirsRemoved = 0; int NodesChecked = 0; for (i=0; iIsFolder()) { int asd=0; } if (n->Loc > 0 AND n->Header.DirLoc) { int Pos = n->Header.DirLoc; int Dir = FindNode(Pos, Nodes); if (Dir >= 0) { int j; for (j=0; jHeader.DirCount; j++) { Node *c = (Dir + j) < Nodes.Length() ? Nodes[Dir + j] : 0; if (c) { if (Pos != c->Loc) { break; } if (n->Loc != c->Header.ParentLoc) { c->Header.ParentLoc = n->Loc; ParentPtrCorrect++; } NodesChecked++; } Pos += sizeof(StorageItemHeader); } if (j < n->Header.DirCount) { printf("Error didn't scan all of %s\n", n->TypeName()); } } else { // Dir doesn't exist n->Header.DirLoc = 0; n->Header.DirCount = 0; n->Header.DirAlloc = 0; DirsRemoved++; } } } printf("\t\tNodes Checked: %i, Parent ptrs fixed: %i, Missing directories removed: %i\n", NodesChecked, ParentPtrCorrect, DirsRemoved); // Assign all non-folder nodes to a folder printf("\tAssigning nodes to folders...\n"); int Orphans = 0; int Owned = 0; int Unassigned = 0; Node *FirstFolder = Folders.Length() > 0 ? Folders[0] : 0; for (i=0; iType()) { if (n->Loc == 0) { // Doesn't have a valid directory entry so do nothing and // let the node be grafted onto an orphan folder } else if (n->Type() == MAGIC_ATTACHMENT) { int Index = FindNode(n->Header.ParentLoc, Nodes); if (Index > 0) { n->SetOwner(Nodes[Index]); } } else { int Index = FindNode(n->Header.ParentLoc, Folders); if (Index >= 0) { /* Node *f = Folders[Index]; if (f->Spam) { int Dir = FindNode(f->Header.DirLoc, Nodes); if (Dir >= 0) { bool Has = false; for (int k=0; kHeader.DirCount; k++) { Node *c = Nodes[Dir+k]; if (c AND c->Loc == f->Header.DirLoc + (k * sizeof(c->Header)) ) { if (c->Loc == n->Loc) { Has = true; break; } } else { break; } } if (Has) { // printf("%s is in spam folder\n", n->TypeName()); } else { // printf("%s isn't in spam folder, loc=%i\n", n->TypeName(), n->Loc); n->Header.Type = 0; } } } */ n->SetOwner(Folders[Index]); } else { // Parent missing, group according to Parent ptr value in an attempt to keep // email from the same folder in the same place. Node *pg = 0; for (int k=0; kParentGrouping == n->Header.ParentLoc) { pg = p; break; } } if (!pg) { Node *Type = 0; // Create parent group switch (n->Header.Type) { case MAGIC_MAIL: Type = &OrphanEmail; break; case MAGIC_CONTACT: Type = &OrphanContacts; break; case MAGIC_CALENDAR: Type = &OrphanCalendars; break; } if (Type) { // Create sub-folder for pg char FolderName[256]; sprintf(FolderName, "%i", n->Header.ParentLoc); if (pg = new Node(FolderName, n->Header.Type)) { pg->ParentGrouping = n->Header.ParentLoc; pg->SetOwner(Type); ParentGroups.Add(pg); } } } if (pg) { n->SetOwner(pg); } } } if (n->Owner) { // printf("\t\t%s @ %i Ok\n", ItemTypeName((ScribeItemTypes) n->Header.Type), n->Loc); Owned++; } else if (n->Header.DataLoc) { // printf("\t\t%s @ %i doesn't have a owner (Parent=%i)\n", // ItemTypeName((ScribeItemTypes) n->Header.Type), n->Loc, n->Header.ParentLoc); int Magic = 0; Seek(f, n->Header.DataLoc); Read(&Magic, sizeof(Magic), f); if (ItemTypeName((ScribeItemTypes) Magic)) { switch (n->Type()) { case MAGIC_FOLDER: case MAGIC_FOLDER_2: { if (Folders.Length() > 0) { n->SetOwner(Folders[0]); } break; } case MAGIC_MAIL: { n->SetOwner(&OrphanEmail); break; } case MAGIC_CONTACT: { n->SetOwner(&OrphanContacts); break; } case MAGIC_CALENDAR: { n->SetOwner(&OrphanCalendars); break; } case MAGIC_ATTACHMENT: { if (!n->AttachmentName) { n->GetAttachmentName(f); } if (n->AttachmentName) { // Create an email shell to hold the orphaned attachment StorageItemHeader h; h.Magic = STORAGE2_ITEM_MAGIC; h.Type = MAGIC_MAIL; h.DataLoc = 0; h.DataSize = 0; h.ParentLoc = 0; h.DirLoc = 0; h.DirCount = 0; h.DirAlloc = 0; Node *Shell = NEW(Node(0, &h)); if (Shell) { Shell->DataLen = 4 + 4 + (2 + 4 + strlen(n->AttachmentName)) + (2 + 4 + 4); Shell->Data = NEW(char[Shell->DataLen]); int *p = (int*) Shell->Data; *p++ = MAGIC_MAIL; *p++ = 2; short *s = (short*)p; *s++ = FIELD_SUBJECT; p = (int*)s; int NameLen = strlen(n->AttachmentName); *p++ = NameLen; memcpy(p, n->AttachmentName, NameLen); s = (short*)((char*)p + NameLen); *s++ = FIELD_FLAGS; p = (int*)s; *p++ = 4; *p++ = MAIL_READ | MAIL_ATTACHMENTS; n->SetOwner(Shell); Shell->SetOwner(&OrphanAttachments); } } else { Unassigned++; } break; } default: { /* printf("\t\tOrphaned %s @ %i not assigned (parent=%i).\n", ItemTypeName((ScribeItemTypes) n->Type()), n->Loc, n->Header.ParentLoc); */ Unassigned++; break; } } } else { Unassigned++; } Orphans++; } } } printf("\t\tOwned: %i, Orphans: %i of which %i are unassigned, %i parent groups.\n", Owned, Orphans, Unassigned, ParentGroups.Length()); // Cull duplicates from folders printf("\tCulling dupelicate nodes from folders...\n"); int CulledNodes = 0; for (i=0; iIsFolder()) { // Make a tree of all the nodes, discarding nodes that point to the same // data GSegmentTree Segs; for (int s=0; sChildren.Length(); s++) { GNodeSeg *Ns = NEW(GNodeSeg(Folder->Children[s])); if (Ns) { // This will fail if a node is already marking that // section of the file as it's data segment Segs.Insert(Ns); } } // Now write the fixed list back into the folder's child list GNodeSeg **Index = (GNodeSeg **) Segs.CreateIndex(); if (Index) { int Old = Folder->Children.Length(); Folder->Children.Length(0); for (int c=0; cChildren[c] = Index[c]->n; } CulledNodes += Old - Folder->Children.Length(); DeleteArray(Index); } } } printf("\t\tCulled %i nodes.\n", CulledNodes); // Export nodes to new folder file printf("\tWriting export folders...\n"); char ExportFile[256]; strcpy(ExportFile, InFile); char *ExDir = strrchr(ExportFile, DIR_CHAR); if (ExDir) strcpy(ExDir+1, "export.mail2"); HANDLE Export = OpenFile(ExportFile, true); if (Export != INVALID_HANDLE_VALUE) { #ifdef LINUX ftruncate64(Export, 0); #endif Seek(Export, 0, SEEK_SET); // Write the header. StorageHeader Header; memset(&Header, 0, sizeof(Header)); Header.Magic = STORAGE2_MAGIC; if (Write(&Header, sizeof(Header), Export)) { // Write out the nodes... Node *Mailbox = Folders[0]; if (OrphanEmail.Children.Length()) { printf(" Attaching %i orphan email...\n", OrphanEmail.Children.Length()); OrphanEmail.SetOwner(Mailbox); } if (OrphanAttachments.Children.Length()) { printf(" Attaching %i orphan attachments...\n", OrphanAttachments.Children.Length()); OrphanAttachments.SetOwner(Mailbox); } if (OrphanContacts.Children.Length()) { printf(" Attaching %i orphan contacts...\n", OrphanContacts.Children.Length()); OrphanContacts.SetOwner(Mailbox); } if (OrphanCalendars.Children.Length()) { printf(" Attaching %i orphan calendar events...\n", OrphanCalendars.Children.Length()); OrphanCalendars.SetOwner(Mailbox); } if (0) { printf("\tFolder tree:\n"); GFile f; if (f.Open("G:\\Mail\\Matt\\tree.txt", O_WRITE)) { f.SetSize(0); PrintTree(f, Folders[0], 2); } } Mailbox->NewLoc = Tell(Export); StorageItemHeader Root; memset(&Root, 0, sizeof(Root)); Root.Magic = STORAGE2_ITEM_MAGIC; Root.Type = MAGIC_FOLDER_2; Root.DirCount = Root.DirAlloc = Mailbox->Children.Length(); Root.ParentLoc = 0; Write(&Root, sizeof(Root), Export); FolderCounts Counts; ZeroObj(Counts); CountNodes(Folders[0], Counts); printf("\t\tExport: %i mb, %i nodes, %i mail, %i contacts, %i folders\n", Counts.DataSize >> 20, Counts.NodeCount, Counts.Mail, Counts.Contacts, Counts.Folders); ExportNode(Export, f, Folders[0], &Root, Counts); printf("\n"); Seek(Export, Mailbox->NewLoc, SEEK_SET); Write(&Root, sizeof(Root), Export); } ExportSize = Seek(Export, 0, SEEK_END); CloseHandle(Export); } else { printf("%s:%i - Couldn't open '%s' for export.\n", __FILE__, __LINE__, ExportFile); } return ExportSize; } bool Dump(HANDLE f, GArray &Nodes, char *InFile) { char Out[MAX_PATH]; strsafecpy(Out, InFile, sizeof(Out)); char *d = strrchr(Out, DIR_CHAR); if (!d) return false; strcpy(d + 1, "folder-nodes.txt"); GFile o; if (!o.Open(Out, O_WRITE)) return false; int Mail = 0; int Contacts = 0; int Folders = 0; o.SetSize(0); for (int i=0; iLoc, ItemTypeName((ScribeItemTypes)n->Header.Type), n->Header.DataLoc, n->Header.DataSize, n->Header.ParentLoc); o.Print(" Dir=%i, %i items, %i alloc\n", n->Header.DirLoc, n->Header.DirCount, n->Header.DirAlloc); o.Print(" Owner=%p\n", n->Owner); o.Print("\n"); switch (n->Type()) { case MAGIC_MAIL: Mail++; break; case MAGIC_CONTACT: Contacts++; break; case MAGIC_FOLDER: case MAGIC_FOLDER_2: Folders++; break; } } o.Print("\n\nMail: %i\nContacts: %i\nFolders: %i\n", Mail, Contacts, Folders); return true; } int main(int _Args, char **_Arg) { printf("Scribe Folder Dumper v0.30\n"); #ifdef LINUX setvbuf(stdout,(char *)NULL,_IONBF,0); // print mesgs immediately. #endif int Status = -1; Args = _Args; Arg = _Arg; assert(sizeof(StorageHeader) == 64); assert(sizeof(StorageItemHeader) == 32); if (Args > 2) { HANDLE f = OpenFile(Arg[1], false); if (f != INVALID_HANDLE_VALUE) { Status = 0; char NodeOffset[256]; if (GetOption("t")) { DumpTree(f); } else if (GetOption("s")) { GArray Nodes(100000); // NodeTree Store; printf("Starting scan...\n"); Scan(f, Nodes); printf("Scan complete: %i possible nodes found...\n", Nodes.Length()); if (GetOption("a")) { printf("Starting export of all nodes...\n"); int64 ExportSize = Export(f, Nodes, Arg[1]); printf("Export finished with %i byte file.\n", (int)ExportSize); } else if (GetOption("d")) { printf("Starting dump of notes to file...\n"); Dump(f, Nodes, Arg[1]); } } else if (GetOption("n", NodeOffset)) { int Node = atoi(NodeOffset); if (Node >= 64) { DumpNodeData(f, Node); } else { printf("Error: Invalid node address.\n"); } } else { Status = -1; } CloseHandle(f); } } if (Status == -1) { printf( "Usage: folder-dump \n" "\n" " Input Options:\n" " -t Dump the entire tree (use when tree structure is intact).\n" " -s Scan file for objects (in case the tree is broken).\n" "\n" " Output Options:\n" " -n Output node at the given offset.\n" " -a Export all scanned objects to 'export.mail2'\n" " -d Write node information to 'folder-nodes.txt'.\n" "\n" "To do a full scan and dump to a new folder file:\n" " folder-dump -s -a\n"); // "\t-s\t\tDump all by magic number scanning (finds orphaned nodes)\n" } return Status; }