/*
* $Id: python.c 567 2007-06-24 01:20:20Z elliotth $
*
* Copyright (c) 2000-2003, Darren Hiebert
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for Python language
* files.
*/
/*
* INCLUDE FILES
*/
#include "general.h" /* must always come first */
#include <string.h>
#include "entry.h"
#include "options.h"
#include "read.h"
#include "routines.h"
#include "vstring.h"
/*
* DATA DEFINITIONS
*/
typedef enum {
K_CLASS, K_FUNCTION, K_MEMBER
} pythonKind;
static kindOption PythonKinds[] = {
{TRUE, 'c', "class", "classes"},
{TRUE, 'f', "function", "functions"},
{TRUE, 'm', "member", "class members"}
};
typedef struct NestingLevel NestingLevel;
typedef struct NestingLevels NestingLevels;
struct NestingLevel
{
int indentation;
vString *name;
boolean is_class;
};
struct NestingLevels
{
NestingLevel *levels;
int n;
int allocated;
};
/*
* FUNCTION DEFINITIONS
*/
#define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])
static boolean isIdentifierFirstCharacter (int c)
{
return (boolean) (isalpha (c) || c == '_');
}
static boolean isIdentifierCharacter (int c)
{
return (boolean) (isalnum (c) || c == '_');
}
/* Given a string with the contents of a line directly after the "def" keyword,
* extract all relevant information and create a tag.
*/
static void makeFunctionTag (vString *const function,
vString *const parent, int is_class_parent)
{
tagEntryInfo tag;
initTagEntry (&tag, vStringValue (function));
tag.kindName = "function";
tag.kind = 'f';
if (vStringLength (parent) > 0)
{
if (is_class_parent)
{
tag.kindName = "member";
tag.kind = 'm';
tag.extensionFields.scope [0] = "class";
tag.extensionFields.scope [1] = vStringValue (parent);
}
else
{
tag.extensionFields.scope [0] = "function";
tag.extensionFields.scope [1] = vStringValue (parent);
}
}
/* If a function starts with __, we mark it as file scope.
* FIXME: What is the proper way to signal such attributes?
* TODO: What does functions/classes starting with _ and __ mean in python?
*/
if (strncmp (vStringValue (function), "__", 2) == 0 &&
strcmp (vStringValue (function), "__init__") != 0)
{
tag.extensionFields.access = "private";
tag.isFileScope = TRUE;
}
else
{
tag.extensionFields.access = "public";
}
makeTagEntry (&tag);
}
/* Given a string with the contents of the line directly after the "class"
* keyword, extract all necessary information and create a tag.
*/
static void makeClassTag (vString *const class, vString *const inheritance,
vString *const parent, int is_class_parent)
{
tagEntryInfo tag;
initTagEntry (&tag, vStringValue (class));
tag.kindName = "class";
tag.kind = 'c';
if (vStringLength (parent) > 0)
{
if (is_class_parent)
{
tag.extensionFields.scope [0] = "class";
tag.extensionFields.scope [1] = vStringValue (parent);
}
else
{
tag.extensionFields.scope [0] = "function";
tag.extensionFields.scope [1] = vStringValue (parent);
}
}
tag.extensionFields.inheritance = vStringValue (inheritance);
makeTagEntry (&tag);
}
/* Skip a single or double quoted string. */
static const char *skipString (const char *cp)
{
const char *start = cp;
int escaped = 0;
for (cp++; *cp; cp++)
{
if (escaped)
escaped--;
else if (*cp == '\\')
escaped++;
else if (*cp == *start)
return cp + 1;
}
return cp;
}
/* Skip everything up to an identifier start. */
static const char *skipEverything (const char *cp)
{
for (; *cp; cp++)
{
if (isIdentifierFirstCharacter ((int) *cp))
return cp;
if (*cp == '"' || *cp == '\'')
{
cp = skipString(cp);
}
}
return cp;
}
/* Skip an identifier. */
static const char *skipIdentifier (const char *cp)
{
while (isIdentifierCharacter ((int) *cp))
cp++;
return cp;
}
static const char *findDefinitionOrClass (const char *cp)
{
while (*cp)
{
cp = skipEverything (cp);
if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5))
{
return cp;
}
cp = skipIdentifier (cp);
}
return NULL;
}
static const char *skipSpace (const char *cp)
{
while (isspace ((int) *cp))
++cp;
return cp;
}
/* Starting at ''cp'', parse an identifier into ''identifier''. */
static const char *parseIdentifier (const char *cp, vString *const identifier)
{
vStringClear (identifier);
while (isIdentifierCharacter ((int) *cp))
{
vStringPut (identifier, (int) *cp);
++cp;
}
vStringTerminate (identifier);
return cp;
}
static void parseClass (const char *cp, vString *const class,
vString *const parent, int is_class_parent)
{
vString *const inheritance = vStringNew ();
vStringClear (inheritance);
cp = parseIdentifier (cp, class);
cp = skipSpace (cp);
if (*cp == '(')
{
++cp;
while (*cp != ')')
{
if (*cp == '\0')
{
/* Closing parenthesis can be in follow up line. */
cp = (const char *) fileReadLine ();
if (!cp) break;
vStringPut (inheritance, ' ');
continue;
}
vStringPut (inheritance, *cp);
++cp;
}
vStringTerminate (inheritance);
}
makeClassTag (class, inheritance, parent, is_class_parent);
vStringDelete (inheritance);
}
static void parseFunction (const char *cp, vString *const def,
vString *const parent, int is_class_parent)
{
cp = parseIdentifier (cp, def);
makeFunctionTag (def, parent, is_class_parent);
}
/* Get the combined name of a nested symbol. Classes are separated with ".",
* functions with "/". For example this code:
* class MyClass:
* def myFunction:
* def SubFunction:
* class SubClass:
* def Method:
* pass
* Would produce this string:
* MyClass.MyFunction/SubFunction/SubClass.Method
*/
static boolean constructParentString(NestingLevels *nls, int indent,
vString *result)
{
int i;
NestingLevel *prev = NULL;
int is_class = FALSE;
vStringClear (result);
for (i = 0; i < nls->n; i++)
{
NestingLevel *nl = nls->levels + i;
if (indent <= nl->indentation)
break;
if (prev)
{
if (prev->is_class)
vStringCatS(result, ".");
else
vStringCatS(result, "/");
}
vStringCat(result, nl->name);
is_class = nl->is_class;
prev = nl;
}
return is_class;
}
static NestingLevels *newNestingLevels(void)
{
NestingLevels *nls = xCalloc (1, NestingLevels);
return nls;
}
static void freeNestingLevels(NestingLevels *nls)
{
int i;
for (i = 0; i < nls->allocated; i++)
vStringDelete(nls->levels[i].name);
if (nls->levels) eFree(nls->levels);
eFree(nls);
}
/* TODO: This is totally out of place in python.c, but strlist.h is not usable.
* Maybe should just move these three functions to a separate file, even if no
* other parser uses them.
*/
static void addNestingLevel(NestingLevels *nls, int indentation,
vString *name, boolean is_class)
{
int i;
NestingLevel *nl = NULL;
for (i = 0; i < nls->n; i++)
{
nl = nls->levels + i;
if (indentation <= nl->indentation) break;
}
if (i == nls->n)
{
if (i >= nls->allocated)
{
nls->allocated++;
nls->levels = xRealloc(nls->levels,
nls->allocated, NestingLevel);
nls->levels[i].name = vStringNew();
}
nl = nls->levels + i;
}
nls->n = i + 1;
vStringCopy(nl->name, name);
nl->indentation = indentation;
nl->is_class = is_class;
}
static void findPythonTags (void)
{
vString *const continuation = vStringNew ();
vString *const name = vStringNew ();
vString *const parent = vStringNew();
NestingLevels *const nesting_levels = newNestingLevels();
const char *line;
int line_skip = 0;
boolean longStringLiteral = FALSE;
while ((line = (const char *) fileReadLine ()) != NULL)
{
const char *cp = line;
char *longstring;
const char *keyword;
int indent;
cp = skipSpace (cp);
if (*cp == '#' || *cp == '\0') /* skip comment or blank line */
continue;
/* Deal with line continuation. */
if (!line_skip) vStringClear(continuation);
vStringCatS(continuation, line);
vStringStripTrailing(continuation);
if (vStringLast(continuation) == '\\')
{
vStringChop(continuation);
vStringCatS(continuation, " ");
line_skip = 1;
continue;
}
cp = line = vStringValue(continuation);
cp = skipSpace (cp);
indent = cp - line;
line_skip = 0;
/* Deal with multiline string ending. */
if (longStringLiteral)
{
/* Note: We do ignore anything in the same line after a multiline
* string for now.
*/
if (strstr (cp, "\"\"\""))
longStringLiteral = FALSE;
continue;
}
/* Deal with multiline string start. */
if ((longstring = strstr (cp, "\"\"\"")) != NULL)
{
/* Note: For our purposes, the line just ends at the first long
* string.
*/
*longstring = '\0';
longstring += 3;
longStringLiteral = TRUE;
while ((longstring = strstr (longstring, "\"\"\"")) != NULL)
{
longstring += 3;
longStringLiteral = !longStringLiteral;
}
}
/* Deal with def and class keywords. */
keyword = findDefinitionOrClass (cp);
if (keyword)
{
boolean found = FALSE;
boolean is_class = FALSE;
if (!strncmp (keyword, "def", 3))
{
cp = skipSpace (keyword + 3);
found = TRUE;
}
else if (!strncmp (keyword, "class", 5))
{
cp = skipSpace (keyword + 5);
found = TRUE;
is_class = TRUE;
}
if (found)
{
boolean is_parent_class;
is_parent_class =
constructParentString(nesting_levels, indent, parent);
if (is_class)
parseClass (cp, name, parent, is_parent_class);
else
parseFunction(cp, name, parent, is_parent_class);
addNestingLevel(nesting_levels, indent, name, is_class);
}
}
}
/* Clean up all memory we allocated. */
vStringDelete (parent);
vStringDelete (name);
vStringDelete (continuation);
freeNestingLevels (nesting_levels);
}
extern parserDefinition *PythonParser (void)
{
static const char *const extensions[] = { "py", "pyx", "pxd", "scons", NULL };
parserDefinition *def = parserNew ("Python");
def->kinds = PythonKinds;
def->kindCount = KIND_COUNT (PythonKinds);
def->extensions = extensions;
def->parser = findPythonTags;
return def;
}
/* vi:set tabstop=4 shiftwidth=4: */
|