Active_prgs/Redgrave/EndoMultipleTableFieldCompare.py

"""

EndoMultipleTableFieldCompare

Created by:
Emanuel Borges
07.23.2021

This program will take a list of tables with the fields contained in those tables
and tell you which tables (fields not data) are subsumed by other tables and which
tables (fields not data) are identical.  This is helpful to get an idea of how the
tables are possibly being used.

"""

import os
from datetime import datetime

if __name__ == '__main__':
    ##  Full path on these three
    tableAndFieldsReportName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sif Backup all tab cols.csv"
    subSetOutputFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sifBackup_MannyAnalysis(subsetOf).txt"
    identicalOutputFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sifBackup_MannyAnalysis(identicalTo).txt"

    ##  Start at zero
    tableNameColumnNumber = 1
    fieldNameColumnNumber = 2


    contents = open(tableAndFieldsReportName).readlines()


    if os.path.exists(subSetOutputFileName):
        print("This file already exists!  Please choose a different name.")
    elif os.path.exists(identicalOutputFileName):
        print("This file already exists!  Please choose a different name.")
    else:
        subSetOutputFile = open(subSetOutputFileName,'w')
        identicalOutputFile = open(identicalOutputFileName,'w')


        contents = contents[1:]
        tableFieldMatrix = {}
        for line in contents:
            line = line.replace("\n","")
            line = line.split("|")
            if line[1] in list(tableFieldMatrix.keys()):
                tableFieldMatrix[line[tableNameColumnNumber]].append(line[fieldNameColumnNumber])
            else:
                tableFieldMatrix[line[tableNameColumnNumber]] = [line[fieldNameColumnNumber],]


        tableFieldList = list(tableFieldMatrix.keys())
        for tbl in tableFieldList:
            allButMeList = list(tableFieldList)
            allButMeList.remove(tbl)
            for newTbl in allButMeList:
                if(set(tableFieldMatrix[tbl]).issubset(set(tableFieldMatrix[newTbl]))):
                    nl = subSetOutputFile.write("The field list in %s is a subset of %s\n"% (tbl, newTbl))
                if sorted(tableFieldMatrix[tbl]) == sorted(tableFieldMatrix[newTbl]):
                    nl = identicalOutputFile.write("The field list in %s is identical to fields in %s\n"% (tbl, newTbl))
        subSetOutputFile.close()
        identicalOutputFile.close()
        
Revision:	762
Committed:	Fri Jul 30 14:43:55 2021 UTC (4 years, 7 months ago) by nino.borges
Content type:	text/x-python
File size:	2660 byte(s)
Log Message:	This program will take a list of tables with the fields contained in those tables and tell you which tables (fields not data) are subsumed by other tables and which tables (fields not data) are identical. This is helpful to get an idea of how the tables are possibly being used. Created for use in the Endo case.
#	User	Rev	Content
1	nino.borges	762	"""
2
3			EndoMultipleTableFieldCompare
4
5			Created by:
6			Emanuel Borges
7			07.23.2021
8
9			This program will take a list of tables with the fields contained in those tables
10			and tell you which tables (fields not data) are subsumed by other tables and which
11			tables (fields not data) are identical. This is helpful to get an idea of how the
12			tables are possibly being used.
13
14			"""
15
16			import os
17			from datetime import datetime
18
19			if __name__ == '__main__':
20			## Full path on these three
21			tableAndFieldsReportName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sif Backup all tab cols.csv"
22			subSetOutputFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sifBackup_MannyAnalysis(subsetOf).txt"
23			identicalOutputFileName = r"C:\Users\eborges\Documents\Cases\Endo\Temp\DatabaseReports\20210723_Tables_Rows\csvVersion\sifBackup_MannyAnalysis(identicalTo).txt"
24
25			## Start at zero
26			tableNameColumnNumber = 1
27			fieldNameColumnNumber = 2
28
29
30			contents = open(tableAndFieldsReportName).readlines()
31
32
33			if os.path.exists(subSetOutputFileName):
34			print("This file already exists! Please choose a different name.")
35			elif os.path.exists(identicalOutputFileName):
36			print("This file already exists! Please choose a different name.")
37			else:
38			subSetOutputFile = open(subSetOutputFileName,'w')
39			identicalOutputFile = open(identicalOutputFileName,'w')
40
41
42			contents = contents[1:]
43			tableFieldMatrix = {}
44			for line in contents:
45			line = line.replace("\n","")
46			line = line.split("\|")
47			if line[1] in list(tableFieldMatrix.keys()):
48			tableFieldMatrix[line[tableNameColumnNumber]].append(line[fieldNameColumnNumber])
49			else:
50			tableFieldMatrix[line[tableNameColumnNumber]] = [line[fieldNameColumnNumber],]
51
52
53			tableFieldList = list(tableFieldMatrix.keys())
54			for tbl in tableFieldList:
55			allButMeList = list(tableFieldList)
56			allButMeList.remove(tbl)
57			for newTbl in allButMeList:
58			if(set(tableFieldMatrix[tbl]).issubset(set(tableFieldMatrix[newTbl]))):
59			nl = subSetOutputFile.write("The field list in %s is a subset of %s\n"% (tbl, newTbl))
60			if sorted(tableFieldMatrix[tbl]) == sorted(tableFieldMatrix[newTbl]):
61			nl = identicalOutputFile.write("The field list in %s is identical to fields in %s\n"% (tbl, newTbl))
62			subSetOutputFile.close()
63			identicalOutputFile.close()
64