Coverage for .tox/p311/lib/python3.10/site-packages/scicom/utilities/statistics.py: 100%

57 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-26 14:26 +0200

1"""Prune a network.""" 

2import igraph as ig 

3import numpy as np 

4import pandas as pd 

5 

6 

7class PruneNetwork: 

8 """Create statistics for communication networks by deletion. 

9 

10 For a given dataset with sender and receiver information, 

11 create a weighted network with igraph. For a given number 

12 of iterations, deletion amounts, and deletion types, the 

13 algorithm then generates network statistics for randomly 

14 sampled subnetworks. 

15 """ 

16 

17 def __init__(self, dataframe:pd.DataFrame) -> None: 

18 """Initialize pruning.""" 

19 self.inputDF = dataframe 

20 

21 def makeNet(self, dataframe:pd.DataFrame) -> ig.Graph: 

22 """Create network from dataframe. 

23 

24 Assumes the existence of sender, receiver and step 

25 column names. 

26 """ 

27 networkdata = dataframe[["sender", "receiver", "step"]] 

28 networkdata = networkdata.groupby( 

29 ["sender", "receiver"], 

30 ).size().reset_index(name="Count") 

31 return ig.Graph.TupleList( 

32 networkdata.itertuples(index=False), directed=True, weights=True, 

33 ) 

34 

35 def netStats(self, G:ig.Graph) -> pd.DataFrame: 

36 """Generate network statistics. 

37 

38 Any statistic calculated on the full 

39 network can be added in principle. 

40 Currently implemented are: 

41 average relative degree, 

42 density 

43 transitivtiy 

44 cohesion 

45 average path length 

46 modularity 

47 """ 

48 numVs = len(G.vs) 

49 avg_rel_deg = np.mean([x/numVs for x in G.degree(mode="all")]) 

50 density = G.density() 

51 transitivity = G.transitivity_undirected() 

52 cohesion = G.cohesion() 

53 avg_path_len = G.average_path_length() 

54 modularity = G.modularity(G.components()) 

55 return pd.DataFrame( 

56 [ 

57 { 

58 "avg_relative_degree": avg_rel_deg, 

59 "avg_path_length": avg_path_len, 

60 "density": density, 

61 "transitivity": transitivity, 

62 "cohesion": cohesion, 

63 "modularity": modularity, 

64 }, 

65 ], 

66 ) 

67 

68 def generatePruningParameters(self, G:ig.Graph) -> pd.DataFrame: 

69 """Generate a random set of pruning weights.""" 

70 nodes = G.get_vertex_dataframe() 

71 id2name = G.get_vertex_dataframe().to_dict()["name"] 

72 rng = np.random.default_rng() 

73 del_parameter = pd.DataFrame( 

74 { 

75 "ids": nodes.index, 

76 "degree": G.degree(), 

77 "unif": rng.uniform(0, 1, len(G.vs)), 

78 "log_normal": rng.lognormal(0, 1, len(G.vs)), 

79 "exp": rng.exponential(1, len(G.vs)), 

80 "beta": rng.beta(a=2, b=3, size=len(G.vs)), 

81 }, 

82 ) 

83 

84 del_parameter = G.get_edge_dataframe()[["source", "target"]].merge( 

85 del_parameter, left_on="source", right_on="ids", 

86 ).merge( 

87 del_parameter, left_on="target", right_on="ids", 

88 ) 

89 del_parameter["degree"] = del_parameter.degree_x * del_parameter.degree_y / np.dot( 

90 del_parameter.degree_x, del_parameter.degree_y, 

91 ) 

92 del_parameter["unif"] = del_parameter.unif_x * del_parameter.unif_y / np.dot( 

93 del_parameter.unif_x, del_parameter.unif_y, 

94 ) 

95 del_parameter["log_normal"] = del_parameter.log_normal_x * del_parameter.log_normal_y / np.dot( 

96 del_parameter.log_normal_x, del_parameter.log_normal_y, 

97 ) 

98 del_parameter["exp"] = del_parameter.exp_x * del_parameter.exp_y / np.dot( 

99 del_parameter.exp_x, del_parameter.exp_y, 

100 ) 

101 del_parameter["beta"] = del_parameter.beta_x * del_parameter.beta_y / np.dot( 

102 del_parameter.beta_x, del_parameter.beta_y, 

103 ) 

104 sender = del_parameter["source"].apply(lambda x: id2name[x]) 

105 receiver = del_parameter["target"].apply(lambda x: id2name[x]) 

106 del_parameter.insert(0, "sender", sender) 

107 del_parameter.insert(0, "receiver", receiver) 

108 return del_parameter[ 

109 ["sender", "receiver", "degree", "unif", "log_normal", "exp", "beta"] 

110 ] 

111 

112 

113 def deleteFromNetwork( 

114 self, 

115 iterations: int = 10, 

116 delAmounts: tuple = (0.1, 0.25, 0.5, 0.75, 0.9), 

117 delTypes: tuple = ("degree", "unif", "log_normal", "exp", "beta"), 

118 ) -> pd.DataFrame: 

119 """Run the deletion by sampling.""" 

120 results = [] 

121 fullNet = self.makeNet( 

122 self.inputDF, 

123 ) 

124 fullStats = self.netStats(fullNet) 

125 fullStats = fullStats.assign( 

126 delVal=0, delType="NA", delIteration=0, 

127 ) 

128 results.append(fullStats) 

129 for idx in range(1, iterations + 1): 

130 prunVals = self.generatePruningParameters( 

131 fullNet, 

132 ) 

133 tempDF = self.inputDF.merge( 

134 prunVals, 

135 ) 

136 for val in list(delAmounts): 

137 for deltype in list(delTypes): 

138 delDF = tempDF.sample( 

139 round(len(tempDF) * (1 - val)), 

140 weights=deltype, 

141 ) 

142 delNet = self.makeNet(delDF) 

143 delStats = self.netStats(delNet) 

144 delStats = delStats.assign( 

145 delVal=val, delType=deltype, delIteration=idx, 

146 ) 

147 results.append(delStats) 

148 return pd.concat(results) 

149 

150 

151 

152def prune( 

153 modelparameters: dict, network: tuple, columns: list, 

154 iterations: int = 10, delAmounts: tuple = (0.1, 0.25, 0.5, 0.75, 0.9), 

155 delTypes: tuple = ("degree", "unif")) -> pd.DataFrame: 

156 """Generate pruned networks from input. 

157 

158 Assumes existence of columns "sender", "receiver", and "step". 

159 """ 

160 runDf = pd.DataFrame(network, columns = columns) 

161 pruning = PruneNetwork(runDf) 

162 result = pruning.deleteFromNetwork( 

163 iterations=iterations, 

164 delAmounts=delAmounts, 

165 delTypes=delTypes, 

166 ) 

167 return result.assign(**modelparameters)