1 /* 2 Paradox Script Parser 3 Copyright (C) 2022 TheZipCreator 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <https://www.gnu.org/licenses/>. 17 */ 18 19 module pdxparser; 20 21 import std.variant, std.conv, std.array; 22 23 /// Thrown whenever a function is called on a node when it's not appropriate 24 class PDXInvalidTypeException : Exception { 25 this() { 26 super("Invalid type for operation"); 27 } 28 } 29 30 /// Thrown whenever parsing fails 31 class PDXParsingException : Exception { 32 this(string msg) { 33 super(msg); 34 } 35 } 36 37 /// Type of a node 38 enum NodeType { 39 ASSIGNMENT, /// 40 BLOCK, /// 41 VALUE /// 42 } 43 44 /// Represents a single usable thing in the given script 45 interface Node { 46 /// Gets all children of a block 47 Node[] children(); 48 /// Returns a `NodeType` that corresponds to the type 49 NodeType type(); 50 /// Should be equivalent to `node.children[i]` 51 Node opIndex(size_t i); 52 /// Returns the first assignment with key `s` 53 Node opIndex(string s); 54 /// equivalent to looping over node.children 55 int opApply(int delegate(Node) dg); 56 /// Gets the key of an assignment 57 string key(); 58 Variant value_(); // for some reason (seriously why the f**k did they do this) template functions in interfaces are final by default. So I have to do this instead 59 /// Gets the value of an assignment or the value of a value 60 T value(T)() { 61 return value_().get!T(); 62 } 63 /// Equivalent to .value!Block 64 final Block block() { 65 return value!Block; 66 } 67 string toString(); 68 } 69 70 /// Two values seperated by an equals. Usually an effect or condition 71 /// ex: 72 /// `culture = albanian` 73 class Assignment : Node { 74 string _key; 75 Variant _value; 76 this(string k, Variant v) { 77 this._key = k; 78 this._value = v; 79 } 80 81 Node[] children() { 82 throw new PDXInvalidTypeException; 83 } 84 NodeType type() { 85 return NodeType.ASSIGNMENT; 86 } 87 Node opIndex(size_t i) { 88 throw new PDXInvalidTypeException; 89 } 90 Node opIndex(string s) { 91 throw new PDXInvalidTypeException; 92 } 93 int opApply(int delegate(Node) dg) { 94 throw new PDXInvalidTypeException; 95 } 96 string key() { 97 return _key; 98 } 99 Variant value_() { 100 return _value; 101 } 102 override string toString() { 103 return _key~" = "~_value.toString(); 104 } 105 } 106 107 /// Represents a series of ret surrounded by curly braces (typically the value of an assignment) 108 class Block : Node { 109 Node[] _children; 110 this(Node[] children) { 111 _children = children; 112 } 113 114 Node[] children() { 115 return _children; 116 } 117 NodeType type() { 118 return NodeType.BLOCK; 119 } 120 Node opIndex(size_t i) { 121 return _children[i]; 122 } 123 Node opIndex(string s) { 124 foreach(child; _children) { 125 if(child.type == NodeType.ASSIGNMENT && child.key == s) 126 return child; 127 } 128 import core.exception : RangeError; 129 throw new RangeError("Cannot find key \""~s~"\""); 130 } 131 int opApply(int delegate(Node) dg) { 132 // TODO: this is broken with returns 133 foreach(ref Node child; _children) { 134 if(dg(child)) 135 return 1; 136 } 137 return 0; 138 } 139 string key() { 140 throw new PDXInvalidTypeException; 141 } 142 Variant value_() { 143 throw new PDXInvalidTypeException; 144 } 145 override string toString() { 146 auto ap = appender!string; 147 ap.put("{\n"); 148 foreach(child; this) { 149 ap.put(child.toString()~"\n"); 150 } 151 ap.put("\n}"); 152 return ap[]; 153 } 154 } 155 156 /// Represents a single value with no = after it. 157 class Value : Node { 158 Variant _value; 159 this(Variant v) { 160 _value = v; 161 } 162 163 Node[] children() { 164 throw new PDXInvalidTypeException; 165 } 166 NodeType type() { 167 return NodeType.VALUE; 168 } 169 Node opIndex(size_t i) { 170 throw new PDXInvalidTypeException; 171 } 172 int opApply(int delegate(Node) dg) { 173 throw new PDXInvalidTypeException; 174 } 175 Node opIndex(string s) { 176 throw new PDXInvalidTypeException; 177 } 178 string key() { 179 throw new PDXInvalidTypeException; 180 } 181 Variant value_() { 182 return _value; 183 } 184 override string toString() { 185 return _value.toString(); 186 } 187 } 188 189 /// Takes a paradox script and returns a Block representing all nodes within it 190 Node parse(string script) { 191 int loc = 0; 192 return parse(script~"\n}", &loc); 193 } 194 195 Block parse(string script, int* l) { 196 Node[] ret; 197 string value = ""; // stores value currently working on 198 string buf = ""; // stores previous value when parsing assignment (buf != "" means it's currently in an assignment) 199 bool seenSpace = false; 200 Variant get() { 201 // test whether it's a number 202 bool number = true; 203 bool dot = false; 204 foreach(char c; value) { 205 if(c == '.') { 206 if(dot) { 207 number = false; 208 break; 209 } 210 else dot = true; 211 } else if(c < '0' || c > '9') { 212 number = false; 213 break; 214 } 215 } 216 // TODO: maybe add date as a type? (e.g. 1444.4.4 would return a custom struct Date or something) 217 // TODO: "yes" and "no" still get converted to bools when surrounded by quotes, fix 218 if(number && dot) 219 return cast(Variant)(value.to!float); 220 else if(number) 221 return cast(Variant)(value.to!int); 222 else if(value == "yes") 223 return cast(Variant)true; 224 else if(value == "no") 225 return cast(Variant)false; 226 else 227 return cast(Variant)(value); 228 } 229 while(true) { 230 char c = script[*l]; 231 *l += 1; 232 if(*l > script.length) 233 throw new PDXParsingException("Unbalanced braces"); 234 switch(c) { 235 case ' ': 236 case '\t': 237 case '\n': 238 if(value != "") { 239 if(buf != "") { 240 // finish assignment 241 ret ~= new Assignment(buf, get); 242 buf = ""; 243 value = ""; 244 break; 245 } 246 seenSpace = true; 247 } 248 break; 249 case '{': 250 if(buf != "") { 251 *l += 1; 252 Block b = parse(script, l); 253 ret ~= new Assignment(buf, cast(Variant)b); 254 buf = ""; 255 value = ""; 256 break; 257 } 258 ret ~= parse(script, l); 259 break; 260 case '}': 261 if(seenSpace) { 262 ret ~= new Value(get); 263 value = c.to!string; 264 seenSpace = false; 265 } 266 return new Block(ret); 267 case '#': 268 while(script[*l] != '\n') 269 *l += 1; 270 break; 271 case '\r': 272 break; // windows 273 case '=': 274 buf = value; 275 value = ""; 276 seenSpace = false; 277 break; 278 case '"': 279 while(script[*l] != '"') { 280 value ~= script[*l]; 281 *l += 1; 282 } 283 *l += 1; 284 break; 285 default: 286 if(seenSpace) { 287 ret ~= new Value(get); 288 value = c.to!string; 289 seenSpace = false; 290 break; 291 } 292 value ~= c; 293 break; 294 } 295 } 296 } 297 298 /// Takes a filename, and parses a script from that file. Supported encodings are UTF-8 and ANSI 299 Node parseFromFile(string filename) { 300 import std.file, std.encoding; 301 import core.exception : UnicodeException; 302 // Paradox script files are sometimes UTF-8 and sometimes ANSI, so I have to handle both here 303 try { 304 return parse(readText(filename)); 305 } catch(UnicodeException e) { 306 string s; 307 transcode(cast(Latin1String)read(filename), s); 308 return parse(s); 309 } 310 } 311 312 unittest { 313 string albania = `government = monarchy 314 add_government_reform = autocracy_reform 315 government_rank = 1 316 primary_culture = albanian 317 religion = catholic 318 technology_group = eastern 319 capital = 4175 # Lezhe 320 321 # The League of Lezhe 322 1443.3.4 = { 323 monarch = { 324 name = "Gjergj Skanderbeg" 325 dynasty = "Kastrioti" 326 birth_date = 1405.1.1 327 adm = 6 328 dip = 5 329 mil = 6 330 leader = { name = "Skanderbeg" type = general fire = 5 shock = 5 manuever = 5 siege = 0} 331 } 332 clear_scripted_personalities = yes 333 add_ruler_personality = inspiring_leader_personality 334 add_ruler_personality = silver_tongue_personality 335 }`; // excerpt of "history/countries/ALB - Albania.txt" from EU4 336 auto res = parse(albania); 337 assert(res[0].key == "government"); 338 assert(res[0].value!string == "monarchy"); 339 assert(res[2].value!int == 1); 340 assert(res[7].key == "1443.3.4"); 341 assert(res[7].block[1].key == "clear_scripted_personalities"); 342 assert(res[7].block[0].block[1].value!string == "Kastrioti"); 343 assert(res[7].block[0].block[2].value!string != "aaa"); 344 } 345 346 unittest { 347 string valueTest = `values = { 1 1 1 }`; 348 auto res = parse(valueTest); 349 assert(res[0].block.children.length == 3); 350 foreach(v; res[0].block) { 351 assert(v.value!int == 1); 352 } 353 } 354 355 unittest { 356 string utf8file = `a = b`; 357 import std.file, std.encoding; 358 write("tmp.txt", utf8file); 359 auto res = parseFromFile("tmp.txt"); 360 assert(res[0].key == "a"); 361 assert(res[0].value!string == "b"); 362 Latin1String ansifile; 363 transcode(utf8file, ansifile); 364 write("tmp.txt", ansifile); 365 res = parseFromFile("tmp.txt"); 366 assert(res[0].key == "a"); 367 assert(res[0].value!string == "b"); 368 remove("tmp.txt"); 369 } 370 371 unittest { 372 string file = `# The Kingdom of God on Earth 373 country_event = { 374 id = catholic_flavor.2 375 title = catholic_flavor.2.t 376 desc = catholic_flavor.2.d 377 picture = POPE_PREACHING_eventPicture 378 379 major = yes 380 is_triggered_only = yes 381 382 option = { 383 name = catholic_flavor.2.a 384 add_government_reform = kingdom_of_god_reform 385 #set_government_rank = 3 386 } 387 388 option = { 389 name = catholic_flavor.2.b 390 add_prestige = 10 391 } 392 }`; // From EU4: events/Catholic.txt 393 auto tree = parse(file); 394 assert(tree.children.length == 1); 395 assert(tree[0].block["id"].value!string == "catholic_flavor.2"); 396 }