[PATCH] osutil: use getdirentriesattr for HFS+ (DO NOT COMMIT)
Sean Farley
sean.michael.farley at gmail.com
Sat Sep 20 09:04:16 CDT 2014
# HG changeset patch
# User Sean Farley <sean.michael.farley at gmail.com>
# Date 1411220690 25200
# Sat Sep 20 06:44:50 2014 -0700
# Node ID a816acfcb4ae39280905073a06dee60a2635a793
# Parent ca854cd4a26a8770fbc22b4d7ee1ac6823b682a5
osutil: use getdirentriesattr for HFS+ (DO NOT COMMIT)
This is a proof of concept patch that uses getdirentriesattr to do a batch call
of fileson OS X in the hopes of speeding up 'hg status'. Unfortunately, the
tests show that this adds no speed at all.
I hope I am doing something really obviously slow. If not, then I hope this
patch serves as a warning to others thinking of using getdirentriesattr.
diff --git a/mercurial/osutil.c b/mercurial/osutil.c
--- a/mercurial/osutil.c
+++ b/mercurial/osutil.c
@@ -17,10 +17,11 @@
#ifdef _WIN32
#include <windows.h>
#include <io.h>
#else
#include <dirent.h>
+#include <sys/attr.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#endif
@@ -284,32 +285,65 @@ static PyObject *makestat(const struct s
if (stat)
memcpy(&((struct listdir_stat *)stat)->st, st, sizeof(*st));
return stat;
}
+typedef struct attrbuff {
+ u_int32_t length;
+ attrreference_t name;
+ struct timespec modTime;
+#if __LITTLE_ENDIAN__
+ mode_t accessMask;
+ short padding;
+#else
+ short padding;
+ mode_t accessMask;
+#endif
+ off_t fileLogicalSize;
+} __attribute__ ((packed)) attrbuff;
+
static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
{
PyObject *list, *elem, *stat, *ret = NULL;
char fullpath[PATH_MAX + 10];
int kind, err;
struct stat st;
- struct dirent *ent;
+
+ unsigned long index;
+ unsigned short done = 0;
+ /* get 50 files at a time */
+ unsigned int batch = 50;
+ unsigned int count;
+ attrbuff *thisEntry;
+ char permAttributes[batch * (sizeof(attrbuff) + 64)];
+ /* the next two variables are junk; we don't use them */
+ unsigned int junkBaseP;
+ unsigned int newState;
+
+
+ /* initialize the struct that tells getdirentries which attributes to
+ retrieve; these must match the attrbuff or else memory will be overwritten
+ at the wrong place and you'll end up with garbage */
+ struct attrlist attrList = { 0 };
+ memset(&attrList, 0, sizeof(attrList));
+ attrList.bitmapcount = ATTR_BIT_MAP_COUNT;
+ attrList.commonattr = ATTR_CMN_NAME | ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK;
+ attrList.fileattr = ATTR_FILE_TOTALSIZE;
+
DIR *dir;
-#ifdef AT_SYMLINK_NOFOLLOW
int dfd = -1;
-#endif
if (pathlen >= PATH_MAX) {
errno = ENAMETOOLONG;
PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
goto error_value;
}
strncpy(fullpath, path, PATH_MAX);
fullpath[pathlen] = '/';
+ dfd = open(path, O_RDONLY, 0);
#ifdef AT_SYMLINK_NOFOLLOW
- dfd = open(path, O_RDONLY);
if (dfd == -1) {
PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
goto error_value;
}
dir = fdopendir(dfd);
@@ -323,70 +357,89 @@ static PyObject *_listdir(char *path, in
list = PyList_New(0);
if (!list)
goto error_list;
- while ((ent = readdir(dir))) {
- if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
+ do {
+ count = batch;
+ err = getdirentriesattr(dfd, &attrList, &permAttributes, sizeof(permAttributes), &count, &junkBaseP, &newState, 0);
+ if (err < 0) {
+ printf("error with getdirentriesattr for '%s'. %d/%s\n", path, err, strerror(err));
+ goto error_dir;
+ } else {
+ done = err;
+ }
+ thisEntry = (attrbuff *) permAttributes;
+
+ for (index = 0; index < count; index++) {
+
+ char *nameStart = ((char *) &thisEntry->name) + thisEntry->name.attr_dataoffset;
+ if (!strcmp(nameStart, ".") || !strcmp(nameStart, ".."))
continue;
- kind = entkind(ent);
- if (kind == -1 || keepstat) {
+ if (keepstat) {
#ifdef AT_SYMLINK_NOFOLLOW
err = fstatat(dfd, ent->d_name, &st,
AT_SYMLINK_NOFOLLOW);
#else
- strncpy(fullpath + pathlen + 1, ent->d_name,
+ strncpy(fullpath + pathlen + 1, nameStart,
PATH_MAX - pathlen);
fullpath[PATH_MAX] = 0;
- err = lstat(fullpath, &st);
+ /* Copy the getdirentries info into st so that the code churn is low */
+ st.st_mode = thisEntry->accessMask;
+ st.st_mtime = thisEntry->modTime.tv_sec;
+ st.st_size = thisEntry->fileLogicalSize;
#endif
if (err == -1) {
/* race with file deletion? */
if (errno == ENOENT)
continue;
- strncpy(fullpath + pathlen + 1, ent->d_name,
+ strncpy(fullpath + pathlen + 1, nameStart,
PATH_MAX - pathlen);
fullpath[PATH_MAX] = 0;
PyErr_SetFromErrnoWithFilename(PyExc_OSError,
fullpath);
goto error;
}
kind = st.st_mode & S_IFMT;
}
/* quit early? */
- if (skip && kind == S_IFDIR && !strcmp(ent->d_name, skip)) {
+ if (skip && kind == S_IFDIR && !strcmp(nameStart, skip)) {
ret = PyList_New(0);
goto error;
}
if (keepstat) {
stat = makestat(&st);
if (!stat)
goto error;
- elem = Py_BuildValue("siN", ent->d_name, kind, stat);
+ elem = Py_BuildValue("siN", nameStart, kind, stat);
} else
- elem = Py_BuildValue("si", ent->d_name, kind);
+ elem = Py_BuildValue("si", nameStart, kind);
if (!elem)
goto error;
PyList_Append(list, elem);
Py_DECREF(elem);
+
+ thisEntry = (attrbuff*)((char*)thisEntry + thisEntry->length);
+
}
+ } while ( err == 0 && ! done );
ret = list;
Py_INCREF(ret);
error:
Py_DECREF(list);
error_list:
closedir(dir);
error_dir:
-#ifdef AT_SYMLINK_NOFOLLOW
+
close(dfd);
-#endif
+
error_value:
return ret;
}
static PyObject *statfiles(PyObject *self, PyObject *args)
More information about the Mercurial-devel
mailing list