#!/usr/bin/env python

# Script current version: 2.6 (07.07.2015)
#
# Compatible Python2 version:
# - 2.4.3 (CentOS-5.1-i386)
# - 2.5.4 (freeBSD-6.2-i386)
# - 2.6.6 (CentOS-6.6-x86_64)
# - 2.7.9 (Windows-7-6.1.7601-SP1-AMD64)

import os, subprocess, sys          # system modules
import mimetypes, re                # for file type detecting
import time                         # for time execution measuring support
import platform                     # for OS platform detecting
import optparse                     # for script execution tunning
import traceback                    # for logging tracebackes of exceptions
import string
import random

################################################################################
#                              SCRIPT CHECK RESULTS:
#
# Legend:
#   [v/v/v] - File analysis works /
#           - OS analysis works /
#           - OS analysis compared with 'psutil' /
#
#   [2.6][v/v/?] CentOS-5.1-i386
#   [1.3][v/v/?] CentOS-5.1-x86_64
#   [1.5][v/v/?] Debian 5.0 x32
#   [1.5][v/v/?] Debian 6.0 x64
#   [1.5][v/v/?] Debian 6.0 x32
#   [1.5][v/v/?] RedHat el5 x32
#   [1.5][v/v/?] RedHat el5 x64
#   [1.5][v/v/?] SuSE 12.1 x32
#   [1.5][v/v/?] SuSE 12.1 x64
#   [1.5][v/v/?] Ubuntu 8.04 x64
#   [1.5][v/v/?] Ubuntu 10.04 x32
#   [1.5][v/v/?] Ubuntu 10.04 x64
#   [1.5][v/x/?] FreeBSD 6.2 x32
#   [2.0][v/v/?(1.5v)] Windows 7 x64
#   [1.5][v/v/?] Windows 2008 x32
#   [1.5][v/v/?] Windows 2012 x64
#   [1.5][v/v/?] Windows 2003 x32
#   [1.5][?/?/?] Windows 2008 R2 x64 (Unable to install python)
#
# TODO: estimate OS analysis spent time
# TODO: do OS estimation several times with average arithmetic value calculation
# TODO: auto-zipping
# TODO: remote deploy and sending back the reports
# TODO: compare psutil results with my OS resources usage calculation [under StressTest]
# TODO: check different OS's: Win, Linux
# TODO: [if will need] implement another methods of OS usage calculation for Linux and Windows
# TODO: real check Win/Linux hostings in 'education mode', add new file extensions to known list if found
# TODO: [how?] add network capacity calculation
# TODO: [need?] add APS resources calculation
# TODO: [how?] DB size calculation
# TODO: investigate performance of script blocks. Check time execution with little BIN/TXT sets
# TODO: implement Linux HDD test as in Windows: f.open() + f.write() + f.read() and compare with dd results (use Win method - it works correctly in Linux)
#
################################################################################
#                                <CONFIG:>                                     #
################################################################################
TIME                            = time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())                 # script launch time
MIN_MAJOR_PYTHON_VERSION        = 2                                                                 # minimal compatible Python version = 2.4.3
MIN_MINOR_PYTHON_VERSION        = 4
MIN_MICRO_PYTHON_VERSION        = 3
FILE_TYPE_UNK                   = 0                                                                 # unknown filetype
FILE_TYPE_BIN                   = 1                                                                 # binary filetype
FILE_TYPE_TXT                   = 2                                                                 # text filetype
FILE_TYPE_DEF                   = FILE_TYPE_TXT                                                     # default filetype
DETECT_NON_ROOT_FOLDERS_ONLY    = False
# DETECT_NON_ROOT_FOLDERS_ONLY    = True                                                             # True for filtering webspaces folder by folder owner in SCAN_DIR
PLESK_PANELS                    = True                                                              # affect on additional folders (subfolder ./system for Plesk) file scanning
# DETAILED_REPORTS_FOLDER_NAME    = "reports.%s.%s.%s" % (platform.node(), platform.system(), TIME)   # folder name with detailed reports by each webspace, ex: /var/www/vhosts/vhosts-reports/
DETAILED_REPORTS_FOLDER_NAME    = "hosting_analysis_report"
SUM_TXT_REPORT_FILENAME         = "summary.txt"
SUM_XML_REPORT_FILENAME         = "summary.xml"
WEBSPACES_LOG_FILENAME          = "files.events.txt"
OS_LOG_FILENAME                 = "os.events.txt"
HDD_ANALYSIS_TMP_FILENAME       = "_hddtest.dat"
EOL                             = "\n"                           # End of Line
ABS_ROOT_DIR			= None

BIN_FILE_EXTENSIONS = set([
	'.bin','.exe','.dat','.mvc','.ico','.gif','.png','.doc','.docx','.pdf',
	'.djvu','.rar','.zip','.xls','.xlsx','.ppt','.pptx','.mdb','.accdb','.cda',
	'.wav','.wma','.mp3','.avi','.mpg','.mpeg','.mdv','.flv','.swf','.divx',
	'.wmv','.vob','.bmp','.jpg','.jpeg','.tiff','.iso','.mdf','.mds','.nrg',
	'.gz','.ttf','.mo','.xap','.woff','.mp4','m4v','.psd'
])
TXT_FILE_EXTENSIONS = set([
	'.bat','.txt','.log','.webstat','.processed','.cgi','.pl','.shtml','.asp',
	'.cfm','.php','.css','.htm','.html','.py','.fcgi','.stat','.bak','.conf',
	'.rtf','.json','.ini','.sql','.po','.scss','.tmpl','.less','.text','.inf',
	])

################################################################################
#                      Handle script execution options                         #
################################################################################

optionParser = optparse.OptionParser()
optionParser.add_option(
	"-c",
	"--skip-content",
	action="store_true",
	default=False,
	dest="noContentAnalysis",
	help="Skip file content analysis"
)
optionParser.add_option(
	"-e",
	'--skip-environment',
	action="store_true",
	default=False,
	dest="noEnvironmentAnalysis",
	help="Skip OS environment resources usage analysis"
)
optionParser.add_option(
	"-d",
	'--directory',
	action="store",     # example: '/var/www/vhosts/'
	default=False,      # {$SELF_CURRENT_FOLDER}
	dest="directory",
	help="Specify root directory with webspaces (subfolders), example: '/var/www/vhosts/', \"C:/vhosts\""
)
optionParser.add_option(
	"-n",
	'--os-network-via-rsync-file',
	action="store",     # example: '/var/www/vhosts/rsync-report.txt', 'rsync-report.txt', "C:/www/webspacesanalysis/rsync-win.txt"
	default=None,       # do not use input file for OS network speed calculation
	dest="rsyncReportFile",
	help="Specify 'rsync' filename with network speed results for parsing and getting into report, examples: '/var/www/vhosts/rsync-report.txt', \"C:/www/webspacesanalysis/rsync-win.txt\""
)
optionParser.add_option(
	"-f",
	'--skip-unknown-reporting',
	action="store_true",
	default=False,
	dest="noUnknownFilesDeepHandling",
	help="Skip accumulating and reporting of unknown file extensions and unknown MIME types [switch off education mode]. This option makes script faster if there are many unknown files"
)
optionParser.add_option(
	"-p",
	'--use-psutil',
	action="store_true",
	default=False,
	dest="usePsutilModule",
	help="Try to use 'psutil' module for CPU and RAM analysis. The module gives truthfully results, but needs painful installation."
)
# TODO
optionParser.add_option(
	"-r",
	'--rough-analysis',
	action="store_true",
	default=False,
	dest="roughAnalysis",
	help="TODO: Ignore 'unknown' filetype and take file into account as 'default' filetype from config ['text' or 'binary']"
)

scriptOptions, scriptArguments = optionParser.parse_args()

# Setting script execution options
SCAN_DIR = scriptOptions.directory
SKIP_CONTENT_ANALYSIS = scriptOptions.noContentAnalysis
SKIP_ENVIRONMENT_ANALYSIS = scriptOptions.noEnvironmentAnalysis
SKIP_UNKNOWN_FILES_DEEP_HANDLING = scriptOptions.noUnknownFilesDeepHandling
ROUGH_ANALYSIS = scriptOptions.roughAnalysis
USE_PSUTIL = scriptOptions.usePsutilModule
RSYNC_INPUT_DATA = scriptOptions.rsyncReportFile

################################################################################
#                                                                              #
#                                    Logger                                    #
#                                                                              #
################################################################################

class Logger:

	def __init__(self):
		global SUM_TXT_REPORT_FILENAME, OS_LOG_FILENAME, WEBSPACES_LOG_FILENAME
		# Check and create folder for reports
		self.__checkAndPrepareFolder()
		self.fileSummaryReport = os.path.join(self.reportsDir, SUM_TXT_REPORT_FILENAME) # Summary report-file, /var/www/vhosts/reports.nsu-dpotapov.Windows.2015-04-23-10-59-27/summary.txt
		self.osEventsLog = os.path.join(self.reportsDir, OS_LOG_FILENAME)               # OS events log-file, /var/www/vhosts/reports.nsu-dpotapov.Windows.2015-04-23-10-59-27/os.events.txt
		self.fileEventsLog = os.path.join(self.reportsDir, WEBSPACES_LOG_FILENAME)      # Webspace file analysis events log-file, /var/www/vhosts/reports.nsu-dpotapov.Windows.2015-04-23-10-59-27/files.events.txt
		# Create all main empty log files
		self.createCustomReport('', self.fileSummaryReport)
		self.createCustomReport('', self.osEventsLog)
		self.createCustomReport('', self.fileEventsLog)
		# Get file flows with logs
		self.flowOs = open(self.osEventsLog, "a")
		self.flowWebspaces = open(self.fileEventsLog, "a")

	def __checkAndPrepareFolder(self):
		global SCAN_DIR                         # Get. Example: False or '/var/www/vhosts/'
		global DETAILED_REPORTS_FOLDER_NAME     # Get. Example: 'reports.nsu-dpotapov.Windows.2015-04-23-10-59-27'
		global ABS_ROOT_DIR                     # Set. Example: '/var/www/vhosts/'
		# Detect ABS_ROOT_DIR - Root directory for scanning
		if not SCAN_DIR:
			print "[INFO] SCAN_DIR is not set, set script current folder as SCAN_DIR"
			ABS_ROOT_DIR = os.path.dirname(os.path.realpath(__file__))
		elif not os.path.exists(SCAN_DIR):
			sys.exit('SCAN_DIR "' + SCAN_DIR + '" not exists')
		elif not os.path.isdir(SCAN_DIR):
			sys.exit('SCAN_DIR "' + SCAN_DIR + '" is not a directory')
		else:
			print "[INFO] SCAN_DIR successfully detected"
			ABS_ROOT_DIR = os.path.abspath(SCAN_DIR)
		print "[INFO] Using '%s' folder for vhosts analysis" % str(ABS_ROOT_DIR)
		# fixme: change reportsDir from "{VHOSTS_DIR}/reports" to "{OS_TMP_FOLDER}/reports"
		self.reportsDir = os.path.join(ABS_ROOT_DIR, DETAILED_REPORTS_FOLDER_NAME)  # /var/www/vhosts/reports.nsu-dpotapov.Windows.2015-04-23-10-59-27/
		if not os.path.exists(self.reportsDir):
			os.makedirs(self.reportsDir)
		else:
			# clear previous report files
			self.deleteAllFilesInFolder(self.reportsDir)

	def deleteAllFilesInFolder(self, folder):
		try:
			try:
				for root, dirs, files in os.walk(folder):
					for name in files:
						os.remove(os.path.join(root, name))
				msg = "[INFO] Already existed folder '%s' was successfully cleaned" % folder
			except Exception, e:
				msg = "[WARNING] Unable to clean folder '%s', error: %s" % (folder, convertExceptionToText(e))
		finally:
			print msg

	def printWhereSummaryTextReportIs(self):
		print "* SUMMARY report [TEXT] saved on %s" % str(self.fileSummaryReport)
	def printWhereSummaryXmlReportIs(self):
		print "* SUMMARY report [XML] saved on %s" % self.getCustomReportFullPath(SUM_XML_REPORT_FILENAME)
	def getCustomReportFullPath(self, fileName):
		return str(os.path.join(self.reportsDir, fileName))
	def saveTextToSummaryReport(self, text):
		self.__write(self.fileSummaryReport, text)
	def saveTextToOsReport(self, text):
		self.flowOs.write(text)
	def saveTextToWebspaceReport(self, text):
		self.flowWebspaces.write(text)
	def createCustomReport(self, text, fileName):
		file = os.path.join(self.reportsDir, fileName)
		f = open(file, 'w')
		try:
			f.write(text)
		finally:
			f.close()
	def __write(self, fileName, text):
		f = open(fileName, "a")
		try:
			f.write(text)
		finally:
			f.close()
	def closeFileFlows(self):
		self.flowOs.close()
		self.flowWebspaces.close()

################################################################################
#                                                                              #
#                            Webspace Abstract                                 #
#                                                                              #
################################################################################

class WebspaceAbstract:

	def __init__(self):
		self.linksCount = 0                             # Total counter of symbolic links
		self.analyseTime = 0                            # Analyse time [seconds]
		self.allFilesSize = 0                           # Total size of all files [bytes]
		self.binFilesSize = 0                           # Total size of binary files [bytes]
		self.txtFilesSize = 0                           # Total size of text files [bytes]
		self.unkFilesSize = 0                           # Total size of unknown files [bytes]
		self.allFilesCount = 0                          # Total counter of all files
		self.binFilesCount = 0                          # Total counter of binary files
		self.txtFilesCount = 0                          # Total counter of text files
		self.unkFilesCount = 0                          # Total counter of unknown files
		self.errFilesCount = 0                          # Total counter of files which was not calculated and raised exceptions
		self.unknownMimes = set()                       # 'audio/x-ms-wma', 'audio/x-pn-realaudio', 'audio/mpeg', etc
		self.unknownExtensions = set()                  # '.10','.11','.passwd','.0', etc
	def __hasFiles(self):
		if self.allFilesCount != 0:
			return True
		else:
			return False
	def __hasSize(self):
		if self.allFilesSize != 0:
			return True
		else:
			return False
	def getPercentOfBinFiles(self):
		if self.__hasFiles():
			return self.binFilesCount * 100 / self.allFilesCount
		else:
			return 0
	def getPercentOfTxtFiles(self):
		if self.__hasFiles():
			return self.txtFilesCount * 100 / self.allFilesCount
		else:
			return 0
	def getPercentOfUnkFiles(self):
		if self.__hasFiles():
			return self.unkFilesCount * 100 / self.allFilesCount
		else:
			return 0
	def getPercentOfBinFilesSize(self):
		if self.__hasSize():
			return self.binFilesSize * 100 / self.allFilesSize
		else:
			return 0
	def getPercentOfTxtFilesSize(self):
		if self.__hasSize():
			return self.txtFilesSize * 100 / self.allFilesSize
		else:
			return 0
	def getPercentOfUnkFilesSize(self):
		if self.__hasSize():
			return self.unkFilesSize * 100 / self.allFilesSize
		else:
			return 0

################################################################################
#                                                                              #
#                                  Webspace                                    #
#                                                                              #
################################################################################

class Webspace(WebspaceAbstract):

	def __init__(self, rootDir, webspaceName):
		self.rootDir = rootDir                          # /var/www/vhosts/
		self.webspaceName = webspaceName                # a10-52-42-65.qa.plesk.ru
		self.dir = os.path.join(rootDir, webspaceName)  # /var/www/vhosts/a10-52-42-65.qa.plesk.ru/
		WebspaceAbstract.__init__(self)

	def getXmlReport(self):
		return '<webspace><name>%s</name><dir>%s</dir><bin-files-counter>%d</bin-files-counter>\
<txt-files-counter>%d</txt-files-counter><unk-files-counter>%d</unk-files-counter>\
<all-files-counter>%d</all-files-counter><bin-files-size unit="bytes">%d</bin-files-size>\
<txt-files-size unit="bytes">%d</txt-files-size><unk-files-size unit="bytes">%d</unk-files-size>\
<all-files-size unit="bytes">%d</all-files-size><symlinks>%d</symlinks><unprocessed>%d</unprocessed></webspace>' % (
			self.getName(),self.getFullDir(),self.binFilesCount,self.txtFilesCount,self.unkFilesCount,
			self.allFilesCount,self.binFilesSize,self.txtFilesSize,self.unkFilesSize,
			self.allFilesSize,self.linksCount,self.errFilesCount)

	def getFullDir(self): return self.dir
	def getRootDir(self): return self.rootDir
	def getName(self): return self.webspaceName
	def getTimeExecution(self): return self.analyseTime
	def getAllFilesTotalSize(self): return getBytesWithSuffix(self.allFilesSize)
	def getAllFilesCounter(self): return self.allFilesCount

	def __getFiletypeByMime(self, mime_type):
		if not mime_type:
			return FILE_TYPE_UNK
		elif re.match("^(image|application|video|audio)(.*)", mime_type):
			return FILE_TYPE_BIN
		elif re.match("^(text)(.*)", mime_type):
			return FILE_TYPE_TXT
		else:
			if not SKIP_UNKNOWN_FILES_DEEP_HANDLING:
				self.unknownMimes.add(mime_type)
			return FILE_TYPE_UNK

	def __getFileType(self, extension = None):
		if not extension:
			return FILE_TYPE_UNK
		elif extension in BIN_FILE_EXTENSIONS:
			return FILE_TYPE_BIN
		elif extension in TXT_FILE_EXTENSIONS:
			return FILE_TYPE_TXT
		else:
			return FILE_TYPE_UNK

	def __addFile(self, file):
		# ALL FILES
		self.allFilesCount = self.allFilesCount + 1
		# File handling
		fileSize = os.path.getsize(file)
		fileExt = os.path.splitext(file)[1].lower() # '.exe', '', etc
		mimeType = mimetypes.guess_type(file)[0] # None or string 'image/vnd.microsoft.icon'
		#TODO: skip 0 sized files !?

		fileType = self.__getFiletypeByMime(mimeType)
		if fileType == FILE_TYPE_UNK:
			fileType = self.__getFileType(fileExt)

		self.allFilesSize = self.allFilesSize + fileSize
		# BINARY
		if fileType == FILE_TYPE_BIN:
			self.binFilesCount = self.binFilesCount + 1
			self.binFilesSize = self.binFilesSize + fileSize
		# TEXT
		elif fileType == FILE_TYPE_TXT:
			self.txtFilesCount = self.txtFilesCount + 1
			self.txtFilesSize = self.txtFilesSize + fileSize
		# UNKNOWN
		else:
			if not SKIP_UNKNOWN_FILES_DEEP_HANDLING:
				self.unknownExtensions.add(fileExt)
			self.unkFilesCount = self.unkFilesCount + 1
			self.unkFilesSize = self.unkFilesSize + fileSize

	def analyseContent(self):
		timeStart = time.time()
		self.__calculateFolder(self.getFullDir())
		if PLESK_PANELS == True:
			self.__calculateFolder(os.path.join(self.getRootDir(), 'system', self.getName()))
		analyseTimeDuration = time.time() - timeStart
		if analyseTimeDuration > 0:
			self.analyseTime = analyseTimeDuration

	def __calculateFolder(self, folder):
		if os.path.exists(folder):
			for root, dirs, files in os.walk(folder, topdown=False):
				for name in files:
					fileName = os.path.join(root, name)
					if os.path.islink(fileName):
						self.linksCount = self.linksCount + 1
					else:
						try:
							self.__addFile(fileName)
						except Exception, e:
							self.errFilesCount = self.errFilesCount + 1
							LOGGER.saveTextToWebspaceReport(convertExceptionToText(e))

	def __getReport(self, full = False):
		report = "*****************************************************" + EOL
		report += "Webspace: %s" % self.dir + EOL
		report += "*****************************************************" + EOL
		report += 'BIN files counter: %d (%d%%)' % (self.binFilesCount , self.getPercentOfBinFiles()) + EOL
		report += "TXT files counter: %d (%d%%)" % (self.txtFilesCount, self.getPercentOfTxtFiles()) + EOL
		report += "UNK files counter: %d (%d%%)" % (self.unkFilesCount, self.getPercentOfUnkFiles()) + EOL
		report += "ALL files counter: %d" % self.allFilesCount + EOL
		report += EOL
		report += "BIN files size: %s (%d%%)" % (getBytesWithSuffix(self.binFilesSize), self.getPercentOfBinFilesSize()) + EOL
		report += "TXT files size: %s (%d%%)" % (getBytesWithSuffix(self.txtFilesSize), self.getPercentOfTxtFilesSize()) + EOL
		report += "UNK files size: %s (%d%%)" % (getBytesWithSuffix(self.unkFilesSize), self.getPercentOfUnkFilesSize()) + EOL
		report += "ALL files size: %s" % getBytesWithSuffix(self.allFilesSize) + EOL
		report += EOL
		report += "Symbolic links counter: %d" % self.linksCount + EOL
		report += "Counter of files with handling errors: %d" % self.errFilesCount + EOL
		if full:
			report += "----------------------------" + EOL
			report += "Unknown Mime types: " + EOL
			report += EOL.join(self.unknownMimes) + EOL
			report += "----------------------------" + EOL
			report += "Unknown extensions: " + EOL
			report += EOL.join(self.unknownExtensions) + EOL
		return report

	def reportToFileInText(self):
		fileName = "%s.txt" % self.getName()
		LOGGER.createCustomReport(self.__getReport(True), fileName)
		print " * Report [TEXT] for '%s': %s" % (self.getName(), LOGGER.getCustomReportFullPath(fileName))

################################################################################
#                                                                              #
#                             Webspace handler                                 #
#                                                                              #
################################################################################

class WebspaceHandler(WebspaceAbstract):

	def __init__(self):
		WebspaceAbstract.__init__(self)
		self.webspaces = list()

	def addWebspace(self, webspace):
		self.webspaces.append(webspace)
		self.allFilesCount += webspace.allFilesCount
		self.allFilesSize += webspace.allFilesSize
		self.binFilesCount += webspace.binFilesCount
		self.binFilesSize += webspace.binFilesSize
		self.txtFilesCount += webspace.txtFilesCount
		self.txtFilesSize += webspace.txtFilesSize
		self.unkFilesCount += webspace.unkFilesCount
		self.unkFilesSize += webspace.unkFilesSize
		self.analyseTime += webspace.analyseTime
		self.linksCount += webspace.linksCount
		self.errFilesCount += webspace.errFilesCount
		if not SKIP_UNKNOWN_FILES_DEEP_HANDLING:
			self.unknownMimes.update(webspace.unknownMimes)
			self.unknownExtensions.update(webspace.unknownExtensions)

	def getSummaryReport(self, full = False):
		report = "*****************************************************" + EOL
		report += "SUMMARY FILE REPORT OF %d WEBSPACES:" % (self.__getAmountOfWebspaces()) + EOL
		report += "*****************************************************" + EOL

		if (self.__hasWebspaces()):
			report += EOL
			for webspace in self.webspaces:
				report += " - %s [%s] (%s) (%d files)" % (
					webspace.getFullDir(),
					getTimeDurationForHuman(webspace.getTimeExecution()),
					webspace.getAllFilesTotalSize(),
					webspace.getAllFilesCounter()
				) + EOL
		report += EOL + "Total time execution: %s" % (getTimeDurationForHuman(self.analyseTime)) + EOL
		report += "-----------------------------------------------------" + EOL

		if (not self.__hasWebspaces()):
			report += 'NO ANY WEBSPACES. NOTHING TO REPORT.' + EOL
		else:
			report += 'BIN files counter: %d (%d%%)' % (self.binFilesCount , self.getPercentOfBinFiles()) + EOL
			report += "TXT files counter: %d (%d%%)" % (self.txtFilesCount, self.getPercentOfTxtFiles()) + EOL
			report += "UNK files counter: %d (%d%%)" % (self.unkFilesCount, self.getPercentOfUnkFiles()) + EOL
			report += "ALL files counter: %d" % self.allFilesCount + EOL
			report += EOL
			report += "BIN files size: %s (%d%%)" % (getBytesWithSuffix(self.binFilesSize), self.getPercentOfBinFilesSize()) + EOL
			report += "TXT files size: %s (%d%%)" % (getBytesWithSuffix(self.txtFilesSize), self.getPercentOfTxtFilesSize()) + EOL
			report += "UNK files size: %s (%d%%)" % (getBytesWithSuffix(self.unkFilesSize), self.getPercentOfUnkFilesSize()) + EOL
			report += "ALL files size: %s" % getBytesWithSuffix(self.allFilesSize) + EOL
			report += EOL
			report += "Symbolic links counter: %d" % self.linksCount + EOL
			report += "Counter of files with handling errors: %d" % self.errFilesCount + EOL
			report += "-----------------------------------------------------" + EOL
			if full:
				report += "UNKNOWN MIME TYPES: " + EOL
				report += EOL.join(self.unknownMimes) + EOL
				report += "-----------------------------------------------------" + EOL
				report += "UNKNOWN EXTENSIONS: " + EOL
				report += EOL.join(self.unknownExtensions) + EOL
		return report

	def __hasWebspaces(self):
		if self.webspaces:
			return True
		else:
			return False

	def __getAmountOfWebspaces(self):
		return len(self.webspaces)

################################################################################
#                                                                              #
#                                   Reporter                                   #
#                                                                              #
################################################################################

class Reporter:

	def __init__(self):
		self.summaryReport = ''                         # Summary report text
		self.xmlReport = '<?xml version="1.0"?><data>'  # Summary XML report | no module 'json' on freeBSD OS

	def run(self):
		global ABS_ROOT_DIR
		# file processing
		if not SKIP_CONTENT_ANALYSIS:
			self.xmlReport += '<files>'
			webspaceHandler = WebspaceHandler()
			print "Analysing webspaces files..."
			for name in os.listdir(ABS_ROOT_DIR):
				object = os.path.join(ABS_ROOT_DIR, name)
				# skip not folders
				if not os.path.isdir(object):
					continue
				if name == DETAILED_REPORTS_FOLDER_NAME:
					continue
				# handle folders
				if DETECT_NON_ROOT_FOLDERS_ONLY == False or (DETECT_NON_ROOT_FOLDERS_ONLY == True and os.stat(object).st_uid != 0):
					webspace = Webspace(ABS_ROOT_DIR, name)
					# analyse and report each webspace
					webspace.analyseContent()
					webspace.reportToFileInText()
					self.xmlReport += webspace.getXmlReport()
					webspaceHandler.addWebspace(webspace)
			print "Analysing webspaces files completed." + EOL
			print webspaceHandler.getSummaryReport()
			self.xmlReport += '</files>'
			self.summaryReport += webspaceHandler.getSummaryReport(True)
		# system environment processing
		if not SKIP_ENVIRONMENT_ANALYSIS:
			self.xmlReport += '<os>'
			system = System()
			system.analyseSystem()
			self.xmlReport += system.getXmlReport()
			self.summaryReport += system.getSysInfo()
			self.summaryReport += system.getReport()
			print system.getSysInfo()
			print system.getReport()
			self.xmlReport += '</os>'
		# print summary text report
		if self.summaryReport:
			LOGGER.saveTextToSummaryReport(self.summaryReport)
			LOGGER.printWhereSummaryTextReportIs()
		# print summary XML report
		self.xmlReport += '</data>'
		LOGGER.createCustomReport(self.xmlReport, SUM_XML_REPORT_FILENAME)
		LOGGER.printWhereSummaryXmlReportIs()

################################################################################
#                                                                              #
#                                     Python                                   #
#                                                                              #
################################################################################

class Python:

	def checkVersion(self):
		print EOL + "Checking Python version compatibility..."
		print "Minimal compatible Python version: %d.%d.%d" % (MIN_MAJOR_PYTHON_VERSION, MIN_MINOR_PYTHON_VERSION, MIN_MICRO_PYTHON_VERSION)
		try:
			self.__detectPython()
			print "Detected Python version: %d.%d.%d" % (self.versionMajor, self.versionMinor, self.versionMicro)
			if self.__isPythonVersionCompatible():
				print "[SUCCESS] Python version - ok"
			else:
				print "[WARNING] Python version is possible incompatible"
		except:
			print "[ERROR] Unable to detect and check Python version"
		print

	def __detectPython(self):
		self.versionMajor = sys.version_info[0]
		self.versionMinor = sys.version_info[1]
		self.versionMicro = sys.version_info[2]

	def __isPythonVersionCompatible(self):
		if (self.versionMajor < MIN_MAJOR_PYTHON_VERSION):
			return False
		elif (self.versionMajor > MIN_MAJOR_PYTHON_VERSION):
			return True
		elif (self.versionMinor < MIN_MINOR_PYTHON_VERSION):
			return False
		elif (self.versionMinor > MIN_MINOR_PYTHON_VERSION):
			return True
		elif (self.versionMicro < MIN_MICRO_PYTHON_VERSION):
			return False
		else:
			return True

################################################################################
#                                                                              #
#                                     System                                   #
#                                                                              #
################################################################################

class System:

	def __init__(self):
		self.__cleanData()
		self.unknown = '-1'
		self.pythonVersion = sys.version_info           # Python version. Example: '2.7.3 (default, Jul 26 2012, 16:37:41)', etc
		self.system = platform.system().lower()         # 'linux', 'windows', 'sunos', 'darwin', 'freebsd', etc
		self.machine = platform.machine()               # 'x86_64', 'i686', 'sun4u', 'ppc64', 'ia64', 'i386', 'AMD64', etc
		self.platform = platform.platform()             # 'Linux-2.6.18-274.12.1.el5-x86_64-with-redhat-5.7-Final', 'Windows-2008ServerR2-6.1.7601-SP1', etc
		self.hddTestTmpFile = os.path.join(ABS_ROOT_DIR, HDD_ANALYSIS_TMP_FILENAME)  # Do HDD test on vhosts partition
		self.usePsutil = False

	def __cleanData(self):
		self.cpuUsage = None         # CPU Usage in percents [None|integer]
		self.ramUsage = None         # RAM Usage in percents [None|float]
		self.hddWriteSpeed = None    # HDD write speed (bytes/second) [None|float]
		self.hddReadSpeed = None     # HDD read speed (bytes/second) [None|float]
		self.networkSpeed = None     # Network speed (bytes/second) [None|float]
		self.cpuJobTime = 0          # TODO: Amount of seconds spent on CPU analysis
		self.ramJobTime = 0          # TODO: Amount of seconds spent on RAM analysis
		self.hddReadJobTime = 0      # TODO: Amount of seconds spent on HDD reading analysis
		self.hddWriteJobTime = 0     # TODO: Amount of seconds spent on HDD writing analysis

	def getXmlReport(self):
		return '<cpu-usage unit="%%">%s</cpu-usage><ram-usage unit="%%">%s</ram-usage>\
<hdd-write unit="bytes/sec">%s</hdd-write><hdd-read unit="bytes/sec">\
%s</hdd-read><network unit="bytes/sec">%s</network>' % (
			self.getCpuUsage(),self.getRamUsage(),self.gethddWriteSpeed(),
			self.gethddReadSpeed(),self.getNetworkSpeed())

	def isWindows(self):
		if re.match("^(.*)(windows|microsoft)(.*)", self.system):
			LOGGER.saveTextToOsReport('Using Windows OS analysis')
			print "[INFO] Using 'Windows' platform methods for OS analysis"
			return True
		else:
			return False

	def isLinux(self):
		if re.match("^(.*)(linux)(.*)", self.system):
			LOGGER.saveTextToOsReport('Using Linux OS analysis')
			print "[INFO] Using 'Linux' platform methods for OS analysis"
			return True
		else:
			return False

	def isUnix(self):
		if re.match("^(.*)(freebsd)(.*)", self.system):
			LOGGER.saveTextToOsReport('Using Unix OS analysis')
			print "[INFO] Using 'Unix' platform methods for OS analysis"
			return True
		else:
			return False

	def printSysInfo(self):
		print self.getSysInfo()

	def getSysInfo(self):
		result = EOL + "*****************************************************" + EOL
		result += "SYSTEM INFO:" + EOL
		result += "*****************************************************" + EOL
		result += "Python: %s" % str(self.pythonVersion) + EOL
		result += "System: %s" % self.system + EOL
		result += "Machine: %s" % self.machine + EOL
		result += "Platform: %s" % self.platform + EOL
		result += "*****************************************************" + EOL + EOL
		return result

	def analyseSystem(self):
		self.__cleanData()
		print EOL + "System resources analysing started..."

		cpuWarning = "[WARNING] Unable to calculate CPU usage"
		ramWarning = "[WARNING] Unable to calculate RAM usage"
		hddWarning = "[WARNING] Unable to calculate HDD read/write speed"
		netWarning = "[WARNING] Unable to calculate NETWORK speed"

		# --- ALL PLATFORMS --- #
		# NETWORK
		if RSYNC_INPUT_DATA:
			try:
				self.readNetworkSpeedFromTextfile()
			except Exception, e:
				print netWarning
				LOGGER.saveTextToOsReport(convertExceptionToText(e))
		# --- WINDOWS --- #
		if self.isWindows():
			# CPU
			try:
				self.calculateWindowsCpuUsage()
			except Exception, e:
				print cpuWarning
				LOGGER.saveTextToOsReport(convertExceptionToText(e))
			# RAM
			try:
				self.calculateWindowsRamUsage()
			except Exception, e:
				print ramWarning
				LOGGER.saveTextToOsReport(convertExceptionToText(e))
				try:
					self.calculateWindowsRamUsage2()
				except Exception, e:
					print ramWarning
					LOGGER.saveTextToOsReport(convertExceptionToText(e))
			# HDD
			try:
				self.calculateHddSpeedViaPythonIO()
			except Exception, e:
				print hddWarning
				LOGGER.saveTextToOsReport(convertExceptionToText(e))
		# --- LINUX --- #
		elif self.isLinux() or self.isUnix():
			# CPU
			try:
				self.calculateLinuxCpuUsage()
			except Exception, e:
				print cpuWarning
				LOGGER.saveTextToOsReport(convertExceptionToText(e))
			# RAM
			try:
				self.calculateLinuxRamUsage2()
			except Exception, e:
				print ramWarning
				LOGGER.saveTextToOsReport(convertExceptionToText(e))
			# HDD
			try:
				self.calculateHddSpeedViaPythonIO()
			except Exception, e:
				print hddWarning
				LOGGER.saveTextToOsReport(convertExceptionToText(e))
		else:
			LOGGER.saveTextToOsReport('[WARNING] Unable to detect OS platform')
			print "[WARNING] Unable to detect OS platform. Skipping OS resources usage analysis."

		LOGGER.saveTextToOsReport(self.getSysInfo())
		print "System resources analysing completed." + EOL

	def getCpuUsage(self):
		if self.cpuUsage is None:
			return self.unknown
		else:
			return str(self.cpuUsage)
	def getRamUsage(self):
		if self.ramUsage is None:
			return self.unknown
		else:
			return str(self.ramUsage)
	def gethddWriteSpeed(self):
		if self.hddWriteSpeed is None:
			return self.unknown
		else:
			return str(self.hddWriteSpeed)
	def gethddReadSpeed(self):
		if self.hddReadSpeed is None:
			return self.unknown
		else:
			return str(self.hddReadSpeed)
	def getNetworkSpeed(self):
		if self.networkSpeed is None:
			return self.unknown
		else:
			return str(self.networkSpeed)

	def getReport(self):
		report = "*****************************************************" + EOL
		report += "SYSTEM RESOURCES USAGE REPORT:" + EOL
		report += "*****************************************************" + EOL

		report += "CPU Usage: "
		if self.cpuUsage is not None:
			report += "%s%%" % self.getCpuUsage()
			report += EOL
		else:
			report += self.unknown + EOL

		report += "RAM Usage: "
		if self.ramUsage is not None:
			report += "%s%%" % self.getRamUsage()
			report += EOL
		else:
			report += self.unknown + EOL

		report += "HDD write speed: "
		if self.hddWriteSpeed is not None:
			report += "%s/s" % getBytesWithSuffix(self.hddWriteSpeed) + EOL
		else:
			report += self.unknown + EOL

		report += "HDD read speed: "
		if self.hddReadSpeed is not None:
			report += "%s/s (could be affected by file cache)" % getBytesWithSuffix(self.hddReadSpeed) + EOL
		else:
			report += self.unknown + EOL

		report += "NETWORK speed: "
		if self.networkSpeed is not None:
			report += "%s/s" % getBytesWithSuffix(self.networkSpeed) + EOL
		else:
			report += self.unknown + EOL

		report += "-----------------------------------------------------" + EOL
		return report

	def watchReport(self, counter = 100):
		for i in range(0, counter):
			self.analyseSystem()
			print self.getReport()

	################################################################################
	#                                 CPU                                          #
	################################################################################
	def calculateLinuxCpuUsage(self, interval = 0.1, counter = 6, amountOfMeasurements = 10):
		LOGGER.saveTextToOsReport(EOL + "****** Linux CPU calculation started ******" + EOL)
		sumResult = 0
		for j in range(0, amountOfMeasurements):
			LOGGER.saveTextToOsReport("------ Begin measurement #%d ------" % j + EOL)
			result = 0
			badMeasurementsCounter = 0
			for i in range(0, counter):
				x = self.__getCpuTime()
				LOGGER.saveTextToOsReport("x = %s" % str(x) + EOL)
				time.sleep(interval)
				y = self.__getCpuTime()
				LOGGER.saveTextToOsReport("y = %s" % str(y) + EOL)
				for i in range(len(x)):
					y[i]-=x[i]
				a = y[len(y)-1]*100.00
				b = sum(y)
				LOGGER.saveTextToOsReport("a = %s" % str(a) + EOL)
				LOGGER.saveTextToOsReport("b = %s" % str(b) + EOL)
				if b != 0:
					result += 100-(a/b)
				else:
					badMeasurementsCounter += 1
				LOGGER.saveTextToOsReport("result = %d" % result + EOL)
				LOGGER.saveTextToOsReport("badMeasurementsCounter = %d" % badMeasurementsCounter + EOL)
			sumResult += round(result / counter - badMeasurementsCounter, 1)
			LOGGER.saveTextToOsReport("------ Finish measurement #%d ------" % j + EOL)
		LOGGER.saveTextToOsReport("****** Linux CPU calculation finished ******" + EOL)
		self.cpuUsage = sumResult / amountOfMeasurements

	def __getCpuTime(self):
		statFile = file("/proc/stat", "r")
		timeList = statFile.readline().split(" ")[2:6]
		statFile.close()
		for i in range(len(timeList)):
			timeList[i] = int(timeList[i])
		return timeList

	def calculateWindowsCpuUsage(self, amountOfMeasurements = 10):
		sumResult = 0
		for i in range(0, amountOfMeasurements):
			process = os.popen("WMIC CPU GET LoadPercentage 2>&1")
			output = process.read().strip().split()
			process.close()
			if (output[0] == 'LoadPercentage'):
				sumResult += int(output[1])
		self.cpuUsage = sumResult / amountOfMeasurements

	################################################################################
	#                                 RAM                                          #
	################################################################################
	# through command-line utility 'free'
	def calculateLinuxRamUsage2(self):
		process = os.popen('free')
		output = process.readlines()
		process.close()
		memoryReport = output[1].strip().split()
		memoryType = memoryReport.pop(0) # 'Mem:'

		if memoryType == 'Mem:' and len(memoryReport) > 5:
			total = int(memoryReport[0])
			used = int(memoryReport[1])
			buffers = int(memoryReport[4])
			cache = int(memoryReport[5])
			### - SHARED ?
			#shared = int(memoryReport[3])
			usedMemory = used - buffers - cache
			self.ramUsage = float(usedMemory * 100 / total)

	# through wmic component
	def calculateWindowsRamUsage(self):
		process = os.popen("wmic os get TotalVisibleMemorySize /format:list 2>&1") # in kilobytes
		totalMemoryString = process.read().strip().split()
		process.close()
		process = os.popen("wmic os get FreePhysicalMemory /format:list 2>&1") # in kilobytes
		freeMemoryString = process.read().strip().split()
		process.close()
		totalMemoryList = totalMemoryString[0].split('=')
		freeMemoryList = freeMemoryString[0].split('=')
		if (totalMemoryList[0] == 'TotalVisibleMemorySize'):
			totalPhysicalMemory = int(totalMemoryList[1])
		if (freeMemoryList[0] == 'FreePhysicalMemory'):
			freePhysicalMemory = int(freeMemoryList[1])
		usedPhysicalMemory = totalPhysicalMemory - freePhysicalMemory
		self.ramUsage = usedPhysicalMemory * 100 / totalPhysicalMemory

	# through kerner32 library
	def calculateWindowsRamUsage2(self):
		import ctypes # for resourse usage calculation in Windows environment
		kernel32 = ctypes.windll.kernel32
		c_ulong = ctypes.c_ulong
		class MEMORYSTATUS(ctypes.Structure):
			_fields_ = [
				('dwLength', c_ulong),
				('dwMemoryLoad', c_ulong),
				('dwTotalPhys', c_ulong),
				('dwAvailPhys', c_ulong),
				('dwTotalPageFile', c_ulong),
				('dwAvailPageFile', c_ulong),
				('dwTotalVirtual', c_ulong),
				('dwAvailVirtual', c_ulong)
			]

		memoryStatus = MEMORYSTATUS()
		memoryStatus.dwLength = ctypes.sizeof(MEMORYSTATUS)
		kernel32.GlobalMemoryStatus(ctypes.byref(memoryStatus))
		self.ramUsage = memoryStatus.dwMemoryLoad

	# TODO implement RAM calc through systeminfo? It actually works very slooooowly

	################################################################################
	#                                 HDD                                          #
	################################################################################
	# through command-line utility 'dd'
	def calculateLinuxHddSpeed(self):
		# write speed estimation, bs=64k & count=4k = ~256 Mb total filesize
		writeCmd = ["dd","if=/dev/zero","of=%s" % self.hddTestTmpFile, "bs=64k","count=2k"]
		writeCmdResult = subprocess.Popen(writeCmd, stderr=subprocess.PIPE).communicate() # dd return his output via stderr
		writeCmdResultLines = writeCmdResult[1].splitlines()
		writeCmdResultData = writeCmdResultLines[2].strip().split()
		bytesWritten = int(writeCmdResultData[0])
		writeSecondsSpent = float(writeCmdResultData[5])
		# HDD write speed [bytes/second]
		self.hddWriteSpeed = bytesWritten / writeSecondsSpent
		#read speed estimation
		readCmd = ["dd","if=%s" % self.hddTestTmpFile, "of=/dev/null"]
		readCmdResult = subprocess.Popen(readCmd, stderr=subprocess.PIPE).communicate() # dd return his output via stderr
		readCmdResultLines = readCmdResult[1].splitlines()
		readCmdResultData = readCmdResultLines[2].strip().split()
		bytesRead = int(readCmdResultData[0])
		readSecondsSpent = float(readCmdResultData[5])
		# HDD read speed [bytes/second]
		self.hddReadSpeed = bytesRead / readSecondsSpent
		# clean up
		os.remove(self.hddTestTmpFile)

	# native i/o ASCII method, without using buffer
	def calculateHddSpeedViaPythonIO(self, blockSize = 32768, count = 8, amount_of_measurements = 50):

		writeTimeDuration = float(0)
		readTimeDuration = float(0)

		for i in range(0, amount_of_measurements):
			# write
			timeStart = time.time()
			wf = open(self.hddTestTmpFile, "w", 0)       # ASCII method, no buffer
			writeTimeDuration += time.time() - timeStart
			for i in range(0, count):
				block = getRandomString(blockSize)  # random string with 32768 symbols [bytes]
				timeStart = time.time()
				wf.write(block)
				writeTimeDuration += time.time() - timeStart
			timeStart = time.time()
			wf.close()  # written 32768 symbols of "1" * 8 times = 256Kb
			writeTimeDuration += time.time() - timeStart

			# read
			timeStart = time.time()
			rf = open(self.hddTestTmpFile, "r", 0)
			rf.read()
			rf.close()  # read 32768 * 8 = 256Kb
			readTimeDuration += time.time() - timeStart

			# clean up
			os.remove(self.hddTestTmpFile)

		self.hddWriteSpeed = amount_of_measurements * blockSize * count / writeTimeDuration
		self.hddReadSpeed = amount_of_measurements * blockSize * count / readTimeDuration

	################################################################################
	#                               Network                                        #
	################################################################################
	# through 'rsync' output file
	# WIN: sent 43 bytes  received 110,124,806 bytes  8,809,987.92 bytes/sec
	#   ['sent', '43', 'bytes', 'received', '110,124,806', 'bytes', '8,809,987.92', 'bytes/sec']
	# LIN: sent 66 bytes  received 110111326 bytes  14681518.93 bytes/sec
	#   ['sent', '66', 'bytes', 'received', '110111326', 'bytes', '14681518.93', 'bytes/sec']
	def readNetworkSpeedFromRsyncLogfile(self):
		# TODO: get os.path.realpath(RSYNC_INPUT_DATA) here ?!
		if not os.path.exists(RSYNC_INPUT_DATA):
			LOGGER.saveTextToOsReport("[RSYNC-NET] File '%s' not exists" % RSYNC_INPUT_DATA + EOL)
		elif not os.path.isfile(RSYNC_INPUT_DATA):
			LOGGER.saveTextToOsReport("[RSYNC-NET] '%s' is not file" % RSYNC_INPUT_DATA + EOL)
		else:
			f = open(RSYNC_INPUT_DATA, 'r')
			try:
				for line in f:
					try:
						lineList = line.strip().split()
						if lineList:
							if lineList[0] == 'sent' and len(lineList) > 7:
								self.networkSpeed = float(lineList[6].replace(',',''))
								break
					except:
						pass
			finally:
				f.close()

	# Only network speed value in text file, example: 33558692.0 | 8,809,987.92
	def readNetworkSpeedFromTextfile(self):
		# TODO: get os.path.realpath(RSYNC_INPUT_DATA) here ?!
		if not os.path.exists(RSYNC_INPUT_DATA):
			LOGGER.saveTextToOsReport("[RSYNC-NET] File '%s' not exists" % RSYNC_INPUT_DATA + EOL)
		elif not os.path.isfile(RSYNC_INPUT_DATA):
			LOGGER.saveTextToOsReport("[RSYNC-NET] '%s' is not file" % RSYNC_INPUT_DATA + EOL)
		else:
			f = open(RSYNC_INPUT_DATA, 'r')
			try:
				for line in f:
					try:
						lineList = line.strip().split()
						if lineList[0]:
							self.networkSpeed = float(lineList[0].replace(',',''))
							break
					except:
						pass
			finally:
				f.close()


################################################################################
#                                 HELPERS                                      #
################################################################################

def getBytesWithSuffix(num, prefix = '', suffix='B'):
	allPrefixes = ['','K','M','G','T','P','E','Z']
	prefixes = allPrefixes[allPrefixes.index(prefix):]
	for unit in prefixes:
		if abs(num) < 1024.0:
			return "%3.1f %s%s" % (num, unit, suffix)
		num /= 1024.0
	return "%.1f %s%s" % (num, 'Y', suffix)

def getTimeDurationForHuman(seconds):
	return time.strftime('%M:%S', time.gmtime(seconds))

def convertExceptionToText(e):
	result = EOL
	try:
		if hasattr(e, 'args'):
			result += "args: %s" % str(e.args) + EOL                       # <type 'tuple'>
		if hasattr(e, 'message'):
			result += "message: %s" % str(e.message) + EOL                 # <type 'str'>
		if hasattr(e, '__str__') and callable(getattr(e,'__str__')):
			result += "__str__: %s" % str(e.__str__()) + EOL               # <type 'str'>
		result += "traceback: %s" % str(traceback.format_exc()) + EOL      # <type 'str'>
	finally:
		result += EOL
	return result

def getRandomString(length=32768):
	random_string = ''
	for i in range(0, length):
		random_string += random.choice(string.lowercase)
	return random_string

################################################################################
#                                                                              #
#                                     MAIN                                     #
#                                                                              #
################################################################################

print EOL + "Script started" + EOL
python = Python()
python.checkVersion()
# check scan_folder, prepare reports folder and common report files
LOGGER = Logger()
# run
reporter = Reporter()
reporter.run()
# close file flows
LOGGER.closeFileFlows()
print EOL + "Script finished" + EOL
