Docs »
foundations.strings
Source code for foundations.strings

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
**strings.py**

**Platform:**
	Windows, Linux, Mac Os X.

**Description:**
	Defines various strings manipulation objects.

**Others:**

"""

#**********************************************************************************************************************
#***	Future imports.
#**********************************************************************************************************************
from __future__ import unicode_literals

#**********************************************************************************************************************
#***	External imports.
#**********************************************************************************************************************
import os
import platform
import posixpath
import random
import re

#**********************************************************************************************************************
#***	Internal imports.
#**********************************************************************************************************************
import foundations.common
import foundations.verbose

#**********************************************************************************************************************
#***	Module attributes.
#**********************************************************************************************************************
__author__ = "Thomas Mansencal"
__copyright__ = "Copyright (C) 2008 - 2014 - Thomas Mansencal"
__license__ = "GPL V3.0 - http://www.gnu.org/licenses/"
__maintainer__ = "Thomas Mansencal"
__email__ = "[email protected]"
__status__ = "Production"

__all__ = ["LOGGER",
			"ASCII_CHARACTERS",
			"toString",
			"getNiceName",
			"getVersionRank",
			"getSplitextBasename",
			"getCommonAncestor",
			"getCommonPathsAncestor",
			"getWords",
			"filterWords",
			"replace",
			"removeStrip",
			"toForwardSlashes",
			"toBackwardSlashes",
			"toPosixPath",
			"getNormalizedPath",
			"getRandomSequence",
			"isEmail",
			"isWebsite"]

LOGGER = foundations.verbose.installLogger()

ASCII_CHARACTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"

#**********************************************************************************************************************
#***	Module classes and definitions.
#**********************************************************************************************************************
toString = foundations.verbose.toUnicode

[docs]def getNiceName(name):
	"""
	Converts a string to nice string: **currentLogText** -> **Current Log Text**.

	Usage::

		>>> getNiceName("getMeANiceName")
		u'Get Me A Nice Name'
		>>> getNiceName("__getMeANiceName")
		u'__Get Me A Nice Name'

	:param name: Current string to be nicified.
	:type name: unicode
	:return: Nicified string.
	:rtype: unicode
	"""

	chunks = re.sub(r"(.)([A-Z][a-z]+)", r"\1 \2", name)
	return " ".join(element.title() for element in re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", chunks).split())

[docs]def getVersionRank(version):
	"""
	Converts a version string to it's rank.

	Usage::

		>>> getVersionRank("4.2.8")
		4002008000000
		>>> getVersionRank("4.0")
		4000000000000
		>>> getVersionRank("4.2.8").__class__
		<type 'int'>

	:param version: Current version to calculate rank.
	:type version: unicode
	:return: Rank.
	:rtype: int
	"""

	tokens = list(foundations.common.unpackDefault(filter(any, re.split("\.|-|,", version)), length=4, default=0))
	rank = sum((int(1000 ** i) * int(tokens[-i]) for i in range(len(tokens), 0, -1)))
	LOGGER.debug("> Rank: '{0}'.".format(rank))
	return rank

[docs]def getSplitextBasename(path):
	"""
	Gets the basename of a path without its extension.

	Usage::

		>>> getSplitextBasename("/Users/JohnDoe/Documents/Test.txt")
		u'Test'

	:param path: Path to extract the basename without extension.
	:type path: unicode
	:return: Splitext basename.
	:rtype: unicode
	"""

	basename = foundations.common.getFirstItem(os.path.splitext(os.path.basename(os.path.normpath(path))))
	LOGGER.debug("> Splitext basename: '{0}'.".format(basename))
	return basename

[docs]def getCommonAncestor(*args):
	"""
	Gets common ancestor of given iterables.

	Usage::

		>>> getCommonAncestor(("1", "2", "3"), ("1", "2", "0"), ("1", "2", "3", "4"))
		(u'1', u'2')
		>>> getCommonAncestor("azerty", "azetty", "azello")
		u'aze'

	:param \*args: Iterables to retrieve common ancestor from.
	:type \*args: [iterable]
	:return: Common ancestor.
	:rtype: iterable
	"""

	array = map(set, zip(*args))
	divergence = filter(lambda i: len(i) > 1, array)
	if divergence:
		ancestor = foundations.common.getFirstItem(args)[:array.index(foundations.common.getFirstItem(divergence))]
	else:
		ancestor = min(args)
	LOGGER.debug("> Common Ancestor: '{0}'".format(ancestor))
	return ancestor

[docs]def getCommonPathsAncestor(*args):
	"""
	Gets common paths ancestor of given paths.

	Usage::

		>>> getCommonPathsAncestor("/Users/JohnDoe/Documents", "/Users/JohnDoe/Documents/Test.txt")
		u'/Users/JohnDoe/Documents'

	:param \*args: Paths to retrieve common ancestor from.
	:type \*args: [unicode]
	:return: Common path ancestor.
	:rtype: unicode
	"""

	pathAncestor = os.sep.join(getCommonAncestor(*[path.split(os.sep) for path in args]))
	LOGGER.debug("> Common Paths Ancestor: '{0}'".format(pathAncestor))
	return pathAncestor

[docs]def getWords(data):
	"""
	Extracts the words from given string.

	Usage::

		>>> getWords("Users are: John Doe, Jane Doe, Z6PO.")
		[u'Users', u'are', u'John', u'Doe', u'Jane', u'Doe', u'Z6PO']

	:param data: Data to extract words from.
	:type data: unicode
	:return: Words.
	:rtype: list
	"""

	words = re.findall(r"\w+", data)
	LOGGER.debug("> Words: '{0}'".format(", ".join(words)))
	return words

[docs]def filterWords(words, filtersIn=None, filtersOut=None, flags=0):
	"""
	Filters the words using the given filters.

	Usage::

		>>> filterWords(["Users", "are", "John", "Doe", "Jane", "Doe", "Z6PO"], filtersIn=("John", "Doe"))
		[u'John', u'Doe', u'Doe']
		>>> filterWords(["Users", "are", "John", "Doe", "Jane", "Doe", "Z6PO"], filtersIn=("\w*r",))
		[u'Users', u'are']
		>>> filterWords(["Users", "are", "John", "Doe", "Jane", "Doe", "Z6PO"], filtersOut=("\w*o",))
		[u'Users', u'are', u'Jane', u'Z6PO']

	:param filtersIn: Regex filters in list.
	:type filtersIn: tuple or list
	:param filtersIn: Regex filters out list.
	:type filtersIn: tuple or list
	:param flags: Regex flags.
	:type flags: int
	:return: Filtered words.
	:rtype: list
	"""

	filteredWords = []
	for word in words:
		if filtersIn:
			filterMatched = False
			for filter in filtersIn:
				if not re.search(filter, word, flags):
					LOGGER.debug("> '{0}' word skipped, filter in '{1}' not matched!".format(word, filter))
				else:
					filterMatched = True
					break
			if not filterMatched:
				continue

		if filtersOut:
			filterMatched = False
			for filter in filtersOut:
				if re.search(filter, word, flags):
					LOGGER.debug("> '{0}' word skipped, filter out '{1}' matched!".format(word, filter))
					filterMatched = True
					break
			if filterMatched:
				continue
		filteredWords.append(word)
	LOGGER.debug("> Filtered words: '{0}'".format(", ".join(filteredWords)))
	return filteredWords

[docs]def replace(string, data):
	"""
	Replaces the data occurrences in the string.

	Usage::

		>>> replace("Users are: John Doe, Jane Doe, Z6PO.", {"John" : "Luke", "Jane" : "Anakin", "Doe" : "Skywalker",
		 "Z6PO" : "R2D2"})
		u'Users are: Luke Skywalker, Anakin Skywalker, R2D2.'

	:param string: String to manipulate.
	:type string: unicode
	:param data: Replacement occurrences.
	:type data: dict
	:return: Manipulated string.
	:rtype: unicode
	"""

	for old, new in data.iteritems():
		string = string.replace(old, new)
	return string

[docs]def removeStrip(string, pattern):
	"""
	Removes the pattern occurrences in the string and strip the result.

	Usage::

		>>> removeStrip("John Doe", "John")
		u'Doe'

	:param string: String to manipulate.
	:type string: unicode
	:param pattern: Replacement pattern.
	:type pattern: unicode
	:return: Manipulated string.
	:rtype: unicode
	"""

	return string.replace(pattern, "").strip()

[docs]def toForwardSlashes(data):
	"""
	Converts backward slashes to forward slashes.

	Usage::

		>>> toForwardSlashes("To\Forward\Slashes")
		u'To/Forward/Slashes'

	:param data: Data to convert.
	:type data: unicode
	:return: Converted path.
	:rtype: unicode
	"""

	data = data.replace("\\", "/")
	LOGGER.debug("> Data: '{0}' to forward slashes.".format(data))
	return data

[docs]def toBackwardSlashes(data):
	"""
	Converts forward slashes to backward slashes.

	Usage::

		>>> toBackwardSlashes("/Users/JohnDoe/Documents")
		u'\\Users\\JohnDoe\\Documents'

	:param data: Data to convert.
	:type data: unicode
	:return: Converted path.
	:rtype: unicode
	"""

	data = data.replace("/", "\\")
	LOGGER.debug("> Data: '{0}' to backward slashes.".format(data))
	return data

[docs]def toPosixPath(path):
	"""
	Converts Windows path to Posix path while stripping drives letters and network server slashes.

	Usage::

		>>> toPosixPath("c:\\Users\\JohnDoe\\Documents")
		u'/Users/JohnDoe/Documents'

	:param path: Windows path.
	:type path: unicode
	:return: Path converted to Posix path.
	:rtype: unicode
	"""

	posixPath = posixpath.normpath(toForwardSlashes(re.sub(r"[a-zA-Z]:\\|\\\\", "/", os.path.normpath(path))))
	LOGGER.debug("> Stripped converted to Posix path: '{0}'.".format(posixPath))
	return posixPath

[docs]def getNormalizedPath(path):
	"""
	Normalizes a path, escaping slashes if needed on Windows.

	Usage::

		>>> getNormalizedPath("C:\\Users/johnDoe\\Documents")
		u'C:\\Users\\JohnDoe\\Documents'

	:param path: Path to normalize.
	:type path: unicode
	:return: Normalized path.
	:rtype: unicode
	"""

	if platform.system() == "Windows" or platform.system() == "Microsoft":
		path = os.path.normpath(path).replace("\\", "\\\\")
		LOGGER.debug("> Path: '{0}', normalized path.".format(path))
		return path
	else:
		path = os.path.normpath(path)
		LOGGER.debug("> Path: '{0}', normalized path.".format(path))
		return path

[docs]def getRandomSequence(length=8):
	"""
	Returns a random sequence.

	Usage::

		>>> getRandomSequence()
		u'N_mYO7g5'

	:param length: Length of the sequence.
	:type length: int
	:return: Random sequence.
	:rtype: unicode
	"""

	return "".join([random.choice(ASCII_CHARACTERS) for i in range(length)])

[docs]def isEmail(data):
	"""
	Check if given data string is an email.

	Usage::

		>>> isEmail("[email protected]")
		True
		>>> isEmail("john.doe:domain.com")
		False

	:param data: Data to check.
	:type data: unicode
	:return: Is email.
	:rtype: bool
	"""

	if re.match(r"[\w.%+-]+@[\w.]+\.[a-zA-Z]{2,4}", data):
		LOGGER.debug("> {0}' is matched as email.".format(data))
		return True
	else:
		LOGGER.debug("> {0}' is not matched as email.".format(data))
		return False

[docs]def isWebsite(url):
	"""
	Check if given url string is a website.

	Usage::

		>>> isWebsite("http://www.domain.com")
		True
		>>> isWebsite("domain.com")
		False

	:param data: Data to check.
	:type data: unicode
	:return: Is website.
	:rtype: bool
	"""

	if re.match(r"(http|ftp|https)://([\w\-\.]+)/?", url):
		LOGGER.debug("> {0}' is matched as website.".format(url))
		return True
	else:
		LOGGER.debug("> {0}' is not matched as website.".format(url))
		return False