Coverage for .tox/p311/lib/python3.10/site-packages/scicom/historicalletters/utils.py: 100%

39 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-25 15:05 +0200

1"""Utility functions for HistoricalLetters.""" 

2import random 

3 

4import geopandas as gpd 

5import mesa 

6import numpy as np 

7import pandas as pd 

8import shapely 

9from shapely import LineString, contains 

10 

11 

12def createData(population: int, populationDistribution: str) -> gpd.GeoDataFrame: 

13 """Create random coordinates of historically motivated choices. 

14 

15 The routine samples a population sample based on estimated 

16 population density of that coordinate. 

17 

18 The original CSV dataset is retrieved from 

19 https://doi.org/10.1371/journal.pone.0162678.s003 

20 """ 

21 initial_population_choices = pd.read_csv( 

22 populationDistribution, 

23 encoding="latin1", index_col=0, 

24 ) 

25 

26 # Calculate relative population ratio to estimated settlement area. 

27 # This will correspont to the probabilities to draw an agent from 

28 # these coordinates. 

29 relPop = [] 

30 

31 for _, row in initial_population_choices.iterrows(): 

32 relPop.append( 

33 row["Area"] / row["Pop"], 

34 ) 

35 

36 initial_population_choices.insert(0, "relPop", relPop) 

37 

38 # Four costal cities can not be considered, since the modern NUTS regions 

39 # give zero overlap to their coordinates, leading to potential errors when 

40 # agents move. 

41 excludeCoastal = ["Great Yarmouth", "Kingston-upon-Hull", "Calais", "Toulon"] 

42 initial_population_choices = initial_population_choices.query("~Settlement.isin(@excludeCoastal)") 

43 

44 loc_probabilities = [] 

45 loc_values = [] 

46 for _, row in initial_population_choices.iterrows(): 

47 loc_probabilities.append(row["relPop"]) 

48 loc_values.append( 

49 (row["longitude"], row["latitude"]), 

50 ) 

51 

52 coordinates = random.choices( 

53 loc_values, 

54 loc_probabilities, 

55 k=population, 

56 ) 

57 

58 data = pd.DataFrame( 

59 coordinates, 

60 columns=["longitude", "latitude"], 

61 ) 

62 

63 data.insert( 

64 0, 

65 "unique_id", 

66 [ 

67 "P" + str(x) for x in list(range(population)) 

68 ], 

69 ) 

70 

71 # Read the Geodataframe with EPSG:4326 projection. 

72 geodf = gpd.GeoDataFrame( 

73 data, 

74 geometry=gpd.points_from_xy(data.longitude, data.latitude), 

75 crs="EPSG:4326", 

76 ) 

77 

78 # Transform to EPSG:3857, since the NUTS shape files are in 

79 # that projection. 

80 return geodf.to_crs("EPSG:3857") 

81 

82 

83def getRegion(geometry: shapely.geometry.point.Point, model:mesa.Model) -> str: 

84 """Get region ID overlaping with input geometry. 

85 

86 Might e.g. fail if line of connection crosses international 

87 waters, since there is no NUTS region assigned then. 

88 """ 

89 regionID = [ 

90 x.unique_id for x in model.regions if contains(x.geometry, geometry) 

91 ] 

92 if regionID: 

93 return regionID[0] 

94 text = f"Can not find overlaping region to geometry {geometry}" 

95 raise IndexError(text) 

96 

97 

98def getPositionOnLine(start:shapely.Point, target:shapely.Point) -> shapely.Point: 

99 """Interpolate movement along line between two given points. 

100 

101 The amount of moving from start to target is random. 

102 """ 

103 segment = LineString([start, target]) 

104 return segment.interpolate(random.uniform(0.0, 1.0), normalized=True) 

105 

106def getNewTopic(start: tuple, target:tuple) -> tuple: 

107 """Interpolate new topic between two topics. 

108 

109 The amount of moving from start to target is random. 

110 """ 

111 p1 = np.array(start) 

112 p2 = np.array(target) 

113 p3 = p1 + random.uniform(0, 1) * (p2 -p1) 

114 return tuple(p3)