# Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd
# This module is part of the excelrd package, which is released under a
# BSD-style licence.
# No part of the content of this file was derived from the works of
# David Giffin.
"""
Module for parsing/evaluating Microsoft Excel formulas.
"""
import copy
import operator as opr
from struct import unpack
from .biffh import (
XLRDError,
error_text_from_code,
hex_char_dump,
unpack_string_update_pos,
unpack_unicode_update_pos,
)
from .timemachine import *
__all__ = [
"oBOOL",
"oERR",
"oNUM",
"oREF",
"oREL",
"oSTRG",
"oUNK",
"decompile_formula",
"dump_formula",
"evaluate_name_formula",
"okind_dict",
"rangename3d",
"rangename3drel",
"cellname",
"cellnameabs",
"colname",
"FMLA_TYPE_CELL",
"FMLA_TYPE_SHARED",
"FMLA_TYPE_ARRAY",
"FMLA_TYPE_COND_FMT",
"FMLA_TYPE_DATA_VAL",
"FMLA_TYPE_NAME",
"Operand",
"Ref3D",
]
FMLA_TYPE_CELL = 1
FMLA_TYPE_SHARED = 2
FMLA_TYPE_ARRAY = 4
FMLA_TYPE_COND_FMT = 8
FMLA_TYPE_DATA_VAL = 16
FMLA_TYPE_NAME = 32
ALL_FMLA_TYPES = 63
FMLA_TYPEDESCR_MAP = {
1: "CELL",
2: "SHARED",
4: "ARRAY",
8: "COND-FMT",
16: "DATA-VAL",
32: "NAME",
}
_TOKEN_NOT_ALLOWED = {
0x01: ALL_FMLA_TYPES - FMLA_TYPE_CELL, # tExp
0x02: ALL_FMLA_TYPES - FMLA_TYPE_CELL, # tTbl
0x0F: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tIsect
0x10: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tUnion/List
0x11: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tRange
0x20: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tArray
0x23: FMLA_TYPE_SHARED, # tName
0x39: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tNameX
0x3A: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tRef3d
0x3B: FMLA_TYPE_SHARED + FMLA_TYPE_COND_FMT + FMLA_TYPE_DATA_VAL, # tArea3d
0x2C: FMLA_TYPE_CELL + FMLA_TYPE_ARRAY, # tRefN
0x2D: FMLA_TYPE_CELL + FMLA_TYPE_ARRAY, # tAreaN
# plus weird stuff like tMem*
}.get
oBOOL = 3
oERR = 4
oMSNG = 5 # tMissArg
oNUM = 2
oREF = -1
oREL = -2
oSTRG = 1
oUNK = 0
okind_dict = {
-2: "oREL",
-1: "oREF",
0: "oUNK",
1: "oSTRG",
2: "oNUM",
3: "oBOOL",
4: "oERR",
5: "oMSNG",
}
listsep = "," #### probably should depend on locale
# sztabN[opcode] -> the number of bytes to consume.
# -1 means variable
# -2 means this opcode not implemented in this version.
# Which N to use? Depends on biff_version; see szdict.
sztab0 = [
-2,
4,
4,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
-1,
-2,
-1,
8,
4,
2,
2,
3,
9,
8,
2,
3,
8,
4,
7,
5,
5,
5,
2,
4,
7,
4,
7,
2,
2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
3,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
]
sztab1 = [
-2,
5,
5,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
-1,
-2,
-1,
11,
5,
2,
2,
3,
9,
9,
2,
3,
11,
4,
7,
7,
7,
7,
3,
4,
7,
4,
7,
3,
3,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
3,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
]
sztab2 = [
-2,
5,
5,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
-1,
-2,
-1,
11,
5,
2,
2,
3,
9,
9,
3,
4,
11,
4,
7,
7,
7,
7,
3,
4,
7,
4,
7,
3,
3,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
]
sztab3 = [
-2,
5,
5,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
-1,
-2,
-1,
-2,
-2,
2,
2,
3,
9,
9,
3,
4,
15,
4,
7,
7,
7,
7,
3,
4,
7,
4,
7,
3,
3,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
25,
18,
21,
18,
21,
-2,
-2,
]
sztab4 = [
-2,
5,
5,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
-1,
-1,
-1,
-2,
-2,
2,
2,
3,
9,
9,
3,
4,
5,
5,
9,
7,
7,
7,
3,
5,
9,
5,
9,
3,
3,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
-2,
7,
7,
11,
7,
11,
-2,
-2,
]
szdict = {
20: sztab0,
21: sztab0,
30: sztab1,
40: sztab2,
45: sztab2,
50: sztab3,
70: sztab3,
80: sztab4,
}
# For debugging purposes ... the name for each opcode
# (without the prefix "t" used on OOo docs)
onames = [
"Unk00",
"Exp",
"Tbl",
"Add",
"Sub",
"Mul",
"Div",
"Power",
"Concat",
"LT",
"LE",
"EQ",
"GE",
"GT",
"NE",
"Isect",
"List",
"Range",
"Uplus",
"Uminus",
"Percent",
"Paren",
"MissArg",
"Str",
"Extended",
"Attr",
"Sheet",
"EndSheet",
"Err",
"Bool",
"Int",
"Num",
"Array",
"Func",
"FuncVar",
"Name",
"Ref",
"Area",
"MemArea",
"MemErr",
"MemNoMem",
"MemFunc",
"RefErr",
"AreaErr",
"RefN",
"AreaN",
"MemAreaN",
"MemNoMemN",
"",
"",
"",
"",
"",
"",
"",
"",
"FuncCE",
"NameX",
"Ref3d",
"Area3d",
"RefErr3d",
"AreaErr3d",
"",
"",
]
func_defs = {
# index: (name, min#args, max#args, flags, #known_args, return_type, kargs)
0: ("COUNT", 0, 30, 0x04, 1, "V", "R"),
1: ("IF", 2, 3, 0x04, 3, "V", "VRR"),
2: ("ISNA", 1, 1, 0x02, 1, "V", "V"),
3: ("ISERROR", 1, 1, 0x02, 1, "V", "V"),
4: ("SUM", 0, 30, 0x04, 1, "V", "R"),
5: ("AVERAGE", 1, 30, 0x04, 1, "V", "R"),
6: ("MIN", 1, 30, 0x04, 1, "V", "R"),
7: ("MAX", 1, 30, 0x04, 1, "V", "R"),
8: ("ROW", 0, 1, 0x04, 1, "V", "R"),
9: ("COLUMN", 0, 1, 0x04, 1, "V", "R"),
10: ("NA", 0, 0, 0x02, 0, "V", ""),
11: ("NPV", 2, 30, 0x04, 2, "V", "VR"),
12: ("STDEV", 1, 30, 0x04, 1, "V", "R"),
13: ("DOLLAR", 1, 2, 0x04, 1, "V", "V"),
14: ("FIXED", 2, 3, 0x04, 3, "V", "VVV"),
15: ("SIN", 1, 1, 0x02, 1, "V", "V"),
16: ("COS", 1, 1, 0x02, 1, "V", "V"),
17: ("TAN", 1, 1, 0x02, 1, "V", "V"),
18: ("ATAN", 1, 1, 0x02, 1, "V", "V"),
19: ("PI", 0, 0, 0x02, 0, "V", ""),
20: ("SQRT", 1, 1, 0x02, 1, "V", "V"),
21: ("EXP", 1, 1, 0x02, 1, "V", "V"),
22: ("LN", 1, 1, 0x02, 1, "V", "V"),
23: ("LOG10", 1, 1, 0x02, 1, "V", "V"),
24: ("ABS", 1, 1, 0x02, 1, "V", "V"),
25: ("INT", 1, 1, 0x02, 1, "V", "V"),
26: ("SIGN", 1, 1, 0x02, 1, "V", "V"),
27: ("ROUND", 2, 2, 0x02, 2, "V", "VV"),
28: ("LOOKUP", 2, 3, 0x04, 2, "V", "VR"),
29: ("INDEX", 2, 4, 0x0C, 4, "R", "RVVV"),
30: ("REPT", 2, 2, 0x02, 2, "V", "VV"),
31: ("MID", 3, 3, 0x02, 3, "V", "VVV"),
32: ("LEN", 1, 1, 0x02, 1, "V", "V"),
33: ("VALUE", 1, 1, 0x02, 1, "V", "V"),
34: ("TRUE", 0, 0, 0x02, 0, "V", ""),
35: ("FALSE", 0, 0, 0x02, 0, "V", ""),
36: ("AND", 1, 30, 0x04, 1, "V", "R"),
37: ("OR", 1, 30, 0x04, 1, "V", "R"),
38: ("NOT", 1, 1, 0x02, 1, "V", "V"),
39: ("MOD", 2, 2, 0x02, 2, "V", "VV"),
40: ("DCOUNT", 3, 3, 0x02, 3, "V", "RRR"),
41: ("DSUM", 3, 3, 0x02, 3, "V", "RRR"),
42: ("DAVERAGE", 3, 3, 0x02, 3, "V", "RRR"),
43: ("DMIN", 3, 3, 0x02, 3, "V", "RRR"),
44: ("DMAX", 3, 3, 0x02, 3, "V", "RRR"),
45: ("DSTDEV", 3, 3, 0x02, 3, "V", "RRR"),
46: ("VAR", 1, 30, 0x04, 1, "V", "R"),
47: ("DVAR", 3, 3, 0x02, 3, "V", "RRR"),
48: ("TEXT", 2, 2, 0x02, 2, "V", "VV"),
49: ("LINEST", 1, 4, 0x04, 4, "A", "RRVV"),
50: ("TREND", 1, 4, 0x04, 4, "A", "RRRV"),
51: ("LOGEST", 1, 4, 0x04, 4, "A", "RRVV"),
52: ("GROWTH", 1, 4, 0x04, 4, "A", "RRRV"),
56: ("PV", 3, 5, 0x04, 5, "V", "VVVVV"),
57: ("FV", 3, 5, 0x04, 5, "V", "VVVVV"),
58: ("NPER", 3, 5, 0x04, 5, "V", "VVVVV"),
59: ("PMT", 3, 5, 0x04, 5, "V", "VVVVV"),
60: ("RATE", 3, 6, 0x04, 6, "V", "VVVVVV"),
61: ("MIRR", 3, 3, 0x02, 3, "V", "RVV"),
62: ("IRR", 1, 2, 0x04, 2, "V", "RV"),
63: ("RAND", 0, 0, 0x0A, 0, "V", ""),
64: ("MATCH", 2, 3, 0x04, 3, "V", "VRR"),
65: ("DATE", 3, 3, 0x02, 3, "V", "VVV"),
66: ("TIME", 3, 3, 0x02, 3, "V", "VVV"),
67: ("DAY", 1, 1, 0x02, 1, "V", "V"),
68: ("MONTH", 1, 1, 0x02, 1, "V", "V"),
69: ("YEAR", 1, 1, 0x02, 1, "V", "V"),
70: ("WEEKDAY", 1, 2, 0x04, 2, "V", "VV"),
71: ("HOUR", 1, 1, 0x02, 1, "V", "V"),
72: ("MINUTE", 1, 1, 0x02, 1, "V", "V"),
73: ("SECOND", 1, 1, 0x02, 1, "V", "V"),
74: ("NOW", 0, 0, 0x0A, 0, "V", ""),
75: ("AREAS", 1, 1, 0x02, 1, "V", "R"),
76: ("ROWS", 1, 1, 0x02, 1, "V", "R"),
77: ("COLUMNS", 1, 1, 0x02, 1, "V", "R"),
78: ("OFFSET", 3, 5, 0x04, 5, "R", "RVVVV"),
82: ("SEARCH", 2, 3, 0x04, 3, "V", "VVV"),
83: ("TRANSPOSE", 1, 1, 0x02, 1, "A", "A"),
86: ("TYPE", 1, 1, 0x02, 1, "V", "V"),
92: ("SERIESSUM", 4, 4, 0x02, 4, "V", "VVVA"),
97: ("ATAN2", 2, 2, 0x02, 2, "V", "VV"),
98: ("ASIN", 1, 1, 0x02, 1, "V", "V"),
99: ("ACOS", 1, 1, 0x02, 1, "V", "V"),
100: ("CHOOSE", 2, 30, 0x04, 2, "V", "VR"),
101: ("HLOOKUP", 3, 4, 0x04, 4, "V", "VRRV"),
102: ("VLOOKUP", 3, 4, 0x04, 4, "V", "VRRV"),
105: ("ISREF", 1, 1, 0x02, 1, "V", "R"),
109: ("LOG", 1, 2, 0x04, 2, "V", "VV"),
111: ("CHAR", 1, 1, 0x02, 1, "V", "V"),
112: ("LOWER", 1, 1, 0x02, 1, "V", "V"),
113: ("UPPER", 1, 1, 0x02, 1, "V", "V"),
114: ("PROPER", 1, 1, 0x02, 1, "V", "V"),
115: ("LEFT", 1, 2, 0x04, 2, "V", "VV"),
116: ("RIGHT", 1, 2, 0x04, 2, "V", "VV"),
117: ("EXACT", 2, 2, 0x02, 2, "V", "VV"),
118: ("TRIM", 1, 1, 0x02, 1, "V", "V"),
119: ("REPLACE", 4, 4, 0x02, 4, "V", "VVVV"),
120: ("SUBSTITUTE", 3, 4, 0x04, 4, "V", "VVVV"),
121: ("CODE", 1, 1, 0x02, 1, "V", "V"),
124: ("FIND", 2, 3, 0x04, 3, "V", "VVV"),
125: ("CELL", 1, 2, 0x0C, 2, "V", "VR"),
126: ("ISERR", 1, 1, 0x02, 1, "V", "V"),
127: ("ISTEXT", 1, 1, 0x02, 1, "V", "V"),
128: ("ISNUMBER", 1, 1, 0x02, 1, "V", "V"),
129: ("ISBLANK", 1, 1, 0x02, 1, "V", "V"),
130: ("T", 1, 1, 0x02, 1, "V", "R"),
131: ("N", 1, 1, 0x02, 1, "V", "R"),
140: ("DATEVALUE", 1, 1, 0x02, 1, "V", "V"),
141: ("TIMEVALUE", 1, 1, 0x02, 1, "V", "V"),
142: ("SLN", 3, 3, 0x02, 3, "V", "VVV"),
143: ("SYD", 4, 4, 0x02, 4, "V", "VVVV"),
144: ("DDB", 4, 5, 0x04, 5, "V", "VVVVV"),
148: ("INDIRECT", 1, 2, 0x0C, 2, "R", "VV"),
162: ("CLEAN", 1, 1, 0x02, 1, "V", "V"),
163: ("MDETERM", 1, 1, 0x02, 1, "V", "A"),
164: ("MINVERSE", 1, 1, 0x02, 1, "A", "A"),
165: ("MMULT", 2, 2, 0x02, 2, "A", "AA"),
167: ("IPMT", 4, 6, 0x04, 6, "V", "VVVVVV"),
168: ("PPMT", 4, 6, 0x04, 6, "V", "VVVVVV"),
169: ("COUNTA", 0, 30, 0x04, 1, "V", "R"),
183: ("PRODUCT", 0, 30, 0x04, 1, "V", "R"),
184: ("FACT", 1, 1, 0x02, 1, "V", "V"),
189: ("DPRODUCT", 3, 3, 0x02, 3, "V", "RRR"),
190: ("ISNONTEXT", 1, 1, 0x02, 1, "V", "V"),
193: ("STDEVP", 1, 30, 0x04, 1, "V", "R"),
194: ("VARP", 1, 30, 0x04, 1, "V", "R"),
195: ("DSTDEVP", 3, 3, 0x02, 3, "V", "RRR"),
196: ("DVARP", 3, 3, 0x02, 3, "V", "RRR"),
197: ("TRUNC", 1, 2, 0x04, 2, "V", "VV"),
198: ("ISLOGICAL", 1, 1, 0x02, 1, "V", "V"),
199: ("DCOUNTA", 3, 3, 0x02, 3, "V", "RRR"),
204: ("USDOLLAR", 1, 2, 0x04, 2, "V", "VV"),
205: ("FINDB", 2, 3, 0x04, 3, "V", "VVV"),
206: ("SEARCHB", 2, 3, 0x04, 3, "V", "VVV"),
207: ("REPLACEB", 4, 4, 0x02, 4, "V", "VVVV"),
208: ("LEFTB", 1, 2, 0x04, 2, "V", "VV"),
209: ("RIGHTB", 1, 2, 0x04, 2, "V", "VV"),
210: ("MIDB", 3, 3, 0x02, 3, "V", "VVV"),
211: ("LENB", 1, 1, 0x02, 1, "V", "V"),
212: ("ROUNDUP", 2, 2, 0x02, 2, "V", "VV"),
213: ("ROUNDDOWN", 2, 2, 0x02, 2, "V", "VV"),
214: ("ASC", 1, 1, 0x02, 1, "V", "V"),
215: ("DBCS", 1, 1, 0x02, 1, "V", "V"),
216: ("RANK", 2, 3, 0x04, 3, "V", "VRV"),
219: ("ADDRESS", 2, 5, 0x04, 5, "V", "VVVVV"),
220: ("DAYS360", 2, 3, 0x04, 3, "V", "VVV"),
221: ("TODAY", 0, 0, 0x0A, 0, "V", ""),
222: ("VDB", 5, 7, 0x04, 7, "V", "VVVVVVV"),
227: ("MEDIAN", 1, 30, 0x04, 1, "V", "R"),
228: ("SUMPRODUCT", 1, 30, 0x04, 1, "V", "A"),
229: ("SINH", 1, 1, 0x02, 1, "V", "V"),
230: ("COSH", 1, 1, 0x02, 1, "V", "V"),
231: ("TANH", 1, 1, 0x02, 1, "V", "V"),
232: ("ASINH", 1, 1, 0x02, 1, "V", "V"),
233: ("ACOSH", 1, 1, 0x02, 1, "V", "V"),
234: ("ATANH", 1, 1, 0x02, 1, "V", "V"),
235: ("DGET", 3, 3, 0x02, 3, "V", "RRR"),
244: ("INFO", 1, 1, 0x02, 1, "V", "V"),
247: ("DB", 4, 5, 0x04, 5, "V", "VVVVV"),
252: ("FREQUENCY", 2, 2, 0x02, 2, "A", "RR"),
261: ("ERROR.TYPE", 1, 1, 0x02, 1, "V", "V"),
269: ("AVEDEV", 1, 30, 0x04, 1, "V", "R"),
270: ("BETADIST", 3, 5, 0x04, 1, "V", "V"),
271: ("GAMMALN", 1, 1, 0x02, 1, "V", "V"),
272: ("BETAINV", 3, 5, 0x04, 1, "V", "V"),
273: ("BINOMDIST", 4, 4, 0x02, 4, "V", "VVVV"),
274: ("CHIDIST", 2, 2, 0x02, 2, "V", "VV"),
275: ("CHIINV", 2, 2, 0x02, 2, "V", "VV"),
276: ("COMBIN", 2, 2, 0x02, 2, "V", "VV"),
277: ("CONFIDENCE", 3, 3, 0x02, 3, "V", "VVV"),
278: ("CRITBINOM", 3, 3, 0x02, 3, "V", "VVV"),
279: ("EVEN", 1, 1, 0x02, 1, "V", "V"),
280: ("EXPONDIST", 3, 3, 0x02, 3, "V", "VVV"),
281: ("FDIST", 3, 3, 0x02, 3, "V", "VVV"),
282: ("FINV", 3, 3, 0x02, 3, "V", "VVV"),
283: ("FISHER", 1, 1, 0x02, 1, "V", "V"),
284: ("FISHERINV", 1, 1, 0x02, 1, "V", "V"),
285: ("FLOOR", 2, 2, 0x02, 2, "V", "VV"),
286: ("GAMMADIST", 4, 4, 0x02, 4, "V", "VVVV"),
287: ("GAMMAINV", 3, 3, 0x02, 3, "V", "VVV"),
288: ("CEILING", 2, 2, 0x02, 2, "V", "VV"),
289: ("HYPGEOMDIST", 4, 4, 0x02, 4, "V", "VVVV"),
290: ("LOGNORMDIST", 3, 3, 0x02, 3, "V", "VVV"),
291: ("LOGINV", 3, 3, 0x02, 3, "V", "VVV"),
292: ("NEGBINOMDIST", 3, 3, 0x02, 3, "V", "VVV"),
293: ("NORMDIST", 4, 4, 0x02, 4, "V", "VVVV"),
294: ("NORMSDIST", 1, 1, 0x02, 1, "V", "V"),
295: ("NORMINV", 3, 3, 0x02, 3, "V", "VVV"),
296: ("NORMSINV", 1, 1, 0x02, 1, "V", "V"),
297: ("STANDARDIZE", 3, 3, 0x02, 3, "V", "VVV"),
298: ("ODD", 1, 1, 0x02, 1, "V", "V"),
299: ("PERMUT", 2, 2, 0x02, 2, "V", "VV"),
300: ("POISSON", 3, 3, 0x02, 3, "V", "VVV"),
301: ("TDIST", 3, 3, 0x02, 3, "V", "VVV"),
302: ("WEIBULL", 4, 4, 0x02, 4, "V", "VVVV"),
303: ("SUMXMY2", 2, 2, 0x02, 2, "V", "AA"),
304: ("SUMX2MY2", 2, 2, 0x02, 2, "V", "AA"),
305: ("SUMX2PY2", 2, 2, 0x02, 2, "V", "AA"),
306: ("CHITEST", 2, 2, 0x02, 2, "V", "AA"),
307: ("CORREL", 2, 2, 0x02, 2, "V", "AA"),
308: ("COVAR", 2, 2, 0x02, 2, "V", "AA"),
309: ("FORECAST", 3, 3, 0x02, 3, "V", "VAA"),
310: ("FTEST", 2, 2, 0x02, 2, "V", "AA"),
311: ("INTERCEPT", 2, 2, 0x02, 2, "V", "AA"),
312: ("PEARSON", 2, 2, 0x02, 2, "V", "AA"),
313: ("RSQ", 2, 2, 0x02, 2, "V", "AA"),
314: ("STEYX", 2, 2, 0x02, 2, "V", "AA"),
315: ("SLOPE", 2, 2, 0x02, 2, "V", "AA"),
316: ("TTEST", 4, 4, 0x02, 4, "V", "AAVV"),
317: ("PROB", 3, 4, 0x04, 3, "V", "AAV"),
318: ("DEVSQ", 1, 30, 0x04, 1, "V", "R"),
319: ("GEOMEAN", 1, 30, 0x04, 1, "V", "R"),
320: ("HARMEAN", 1, 30, 0x04, 1, "V", "R"),
321: ("SUMSQ", 0, 30, 0x04, 1, "V", "R"),
322: ("KURT", 1, 30, 0x04, 1, "V", "R"),
323: ("SKEW", 1, 30, 0x04, 1, "V", "R"),
324: ("ZTEST", 2, 3, 0x04, 2, "V", "RV"),
325: ("LARGE", 2, 2, 0x02, 2, "V", "RV"),
326: ("SMALL", 2, 2, 0x02, 2, "V", "RV"),
327: ("QUARTILE", 2, 2, 0x02, 2, "V", "RV"),
328: ("PERCENTILE", 2, 2, 0x02, 2, "V", "RV"),
329: ("PERCENTRANK", 2, 3, 0x04, 2, "V", "RV"),
330: ("MODE", 1, 30, 0x04, 1, "V", "A"),
331: ("TRIMMEAN", 2, 2, 0x02, 2, "V", "RV"),
332: ("TINV", 2, 2, 0x02, 2, "V", "VV"),
336: ("CONCATENATE", 0, 30, 0x04, 1, "V", "V"),
337: ("POWER", 2, 2, 0x02, 2, "V", "VV"),
342: ("RADIANS", 1, 1, 0x02, 1, "V", "V"),
343: ("DEGREES", 1, 1, 0x02, 1, "V", "V"),
344: ("SUBTOTAL", 2, 30, 0x04, 2, "V", "VR"),
345: ("SUMIF", 2, 3, 0x04, 3, "V", "RVR"),
346: ("COUNTIF", 2, 2, 0x02, 2, "V", "RV"),
347: ("COUNTBLANK", 1, 1, 0x02, 1, "V", "R"),
350: ("ISPMT", 4, 4, 0x02, 4, "V", "VVVV"),
351: ("DATEDIF", 3, 3, 0x02, 3, "V", "VVV"),
352: ("DATESTRING", 1, 1, 0x02, 1, "V", "V"),
353: ("NUMBERSTRING", 2, 2, 0x02, 2, "V", "VV"),
354: ("ROMAN", 1, 2, 0x04, 2, "V", "VV"),
358: ("GETPIVOTDATA", 2, 2, 0x02, 2, "V", "RV"),
359: ("HYPERLINK", 1, 2, 0x04, 2, "V", "VV"),
360: ("PHONETIC", 1, 1, 0x02, 1, "V", "V"),
361: ("AVERAGEA", 1, 30, 0x04, 1, "V", "R"),
362: ("MAXA", 1, 30, 0x04, 1, "V", "R"),
363: ("MINA", 1, 30, 0x04, 1, "V", "R"),
364: ("STDEVPA", 1, 30, 0x04, 1, "V", "R"),
365: ("VARPA", 1, 30, 0x04, 1, "V", "R"),
366: ("STDEVA", 1, 30, 0x04, 1, "V", "R"),
367: ("VARA", 1, 30, 0x04, 1, "V", "R"),
368: ("BAHTTEXT", 1, 1, 0x02, 1, "V", "V"),
369: ("THAIDAYOFWEEK", 1, 1, 0x02, 1, "V", "V"),
370: ("THAIDIGIT", 1, 1, 0x02, 1, "V", "V"),
371: ("THAIMONTHOFYEAR", 1, 1, 0x02, 1, "V", "V"),
372: ("THAINUMSOUND", 1, 1, 0x02, 1, "V", "V"),
373: ("THAINUMSTRING", 1, 1, 0x02, 1, "V", "V"),
374: ("THAISTRINGLENGTH", 1, 1, 0x02, 1, "V", "V"),
375: ("ISTHAIDIGIT", 1, 1, 0x02, 1, "V", "V"),
376: ("ROUNDBAHTDOWN", 1, 1, 0x02, 1, "V", "V"),
377: ("ROUNDBAHTUP", 1, 1, 0x02, 1, "V", "V"),
378: ("THAIYEAR", 1, 1, 0x02, 1, "V", "V"),
379: ("RTD", 2, 5, 0x04, 1, "V", "V"),
}
tAttrNames = {
0x00: "Skip??", # seen in SAMPLES.XLS which shipped with Excel 5.0
0x01: "Volatile",
0x02: "If",
0x04: "Choose",
0x08: "Skip",
0x10: "Sum",
0x20: "Assign",
0x40: "Space",
0x41: "SpaceVolatile",
}
error_opcodes = {0x07, 0x08, 0x0A, 0x0B, 0x1C, 0x1D, 0x2F}
tRangeFuncs = (min, max, min, max, min, max)
tIsectFuncs = (max, min, max, min, max, min)
def do_box_funcs(box_funcs, boxa, boxb):
return tuple(func(numa, numb) for func, numa, numb in zip(box_funcs, boxa.coords, boxb.coords))
def adjust_cell_addr_biff8(rowval, colval, reldelta, browx=None, bcolx=None):
row_rel = (colval >> 15) & 1
col_rel = (colval >> 14) & 1
rowx = rowval
colx = colval & 0xFF
if reldelta:
if row_rel and rowx >= 32768:
rowx -= 65536
if col_rel and colx >= 128:
colx -= 256
else:
if row_rel:
rowx -= browx
if col_rel:
colx -= bcolx
return rowx, colx, row_rel, col_rel
def adjust_cell_addr_biff_le7(rowval, colval, reldelta, browx=None, bcolx=None):
row_rel = (rowval >> 15) & 1
col_rel = (rowval >> 14) & 1
rowx = rowval & 0x3FFF
colx = colval
if reldelta:
if row_rel and rowx >= 8192:
rowx -= 16384
if col_rel and colx >= 128:
colx -= 256
else:
if row_rel:
rowx -= browx
if col_rel:
colx -= bcolx
return rowx, colx, row_rel, col_rel
def get_cell_addr(data, pos, bv, reldelta, browx=None, bcolx=None):
if bv >= 80:
rowval, colval = unpack("<HH", data[pos : pos + 4])
# print " rv=%04xh cv=%04xh" % (rowval, colval)
return adjust_cell_addr_biff8(rowval, colval, reldelta, browx, bcolx)
else:
rowval, colval = unpack("<HB", data[pos : pos + 3])
# print " rv=%04xh cv=%04xh" % (rowval, colval)
return adjust_cell_addr_biff_le7(rowval, colval, reldelta, browx, bcolx)
def get_cell_range_addr(data, pos, bv, reldelta, browx=None, bcolx=None):
if bv >= 80:
row1val, row2val, col1val, col2val = unpack("<HHHH", data[pos : pos + 8])
# print " rv=%04xh cv=%04xh" % (row1val, col1val)
# print " rv=%04xh cv=%04xh" % (row2val, col2val)
res1 = adjust_cell_addr_biff8(row1val, col1val, reldelta, browx, bcolx)
res2 = adjust_cell_addr_biff8(row2val, col2val, reldelta, browx, bcolx)
return res1, res2
else:
row1val, row2val, col1val, col2val = unpack("<HHBB", data[pos : pos + 6])
# print " rv=%04xh cv=%04xh" % (row1val, col1val)
# print " rv=%04xh cv=%04xh" % (row2val, col2val)
res1 = adjust_cell_addr_biff_le7(row1val, col1val, reldelta, browx, bcolx)
res2 = adjust_cell_addr_biff_le7(row2val, col2val, reldelta, browx, bcolx)
return res1, res2
def get_externsheet_local_range(bk, refx, blah=0):
try:
info = bk._externsheet_info[refx]
except IndexError:
print(
"!!! get_externsheet_local_range: refx=%d, not in range(%d)"
% (refx, len(bk._externsheet_info)),
file=bk.logfile,
)
return (-101, -101)
ref_recordx, ref_first_sheetx, ref_last_sheetx = info
if ref_recordx == bk._supbook_addins_inx:
if blah:
print(
"/// get_externsheet_local_range(refx=%d) -> addins %r" % (refx, info),
file=bk.logfile,
)
assert ref_first_sheetx == 0xFFFE == ref_last_sheetx
return (-5, -5)
if ref_recordx != bk._supbook_locals_inx:
if blah:
print(
"/// get_externsheet_local_range(refx=%d) -> external %r" % (refx, info),
file=bk.logfile,
)
return (-4, -4) # external reference
if ref_first_sheetx == 0xFFFE == ref_last_sheetx:
if blah:
print(
"/// get_externsheet_local_range(refx=%d) -> unspecified sheet %r" % (refx, info),
file=bk.logfile,
)
return (-1, -1) # internal reference, any sheet
if ref_first_sheetx == 0xFFFF == ref_last_sheetx:
if blah:
print(
"/// get_externsheet_local_range(refx=%d) -> deleted sheet(s)" % (refx,),
file=bk.logfile,
)
return (-2, -2) # internal reference, deleted sheet(s)
nsheets = len(bk._all_sheets_map)
if not (0 <= ref_first_sheetx <= ref_last_sheetx < nsheets):
if blah:
print("/// get_externsheet_local_range(refx=%d) -> %r" % (refx, info), file=bk.logfile)
print("--- first/last sheet not in range(%d)" % nsheets, file=bk.logfile)
return (-102, -102) # stuffed up somewhere :-(
xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx]
xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx]
if not (0 <= xlrd_sheetx1 <= xlrd_sheetx2):
return (-3, -3) # internal reference, but to a macro sheet
return xlrd_sheetx1, xlrd_sheetx2
def get_externsheet_local_range_b57(bk, raw_extshtx, ref_first_sheetx, ref_last_sheetx, blah=0):
if raw_extshtx > 0:
if blah:
print(
"/// get_externsheet_local_range_b57(raw_extshtx=%d) -> external" % raw_extshtx,
file=bk.logfile,
)
return (-4, -4) # external reference
if ref_first_sheetx == -1 and ref_last_sheetx == -1:
return (-2, -2) # internal reference, deleted sheet(s)
nsheets = len(bk._all_sheets_map)
if not (0 <= ref_first_sheetx <= ref_last_sheetx < nsheets):
if blah:
print(
"/// get_externsheet_local_range_b57(%d, %d, %d) -> ???"
% (raw_extshtx, ref_first_sheetx, ref_last_sheetx),
file=bk.logfile,
)
print("--- first/last sheet not in range(%d)" % nsheets, file=bk.logfile)
return (-103, -103) # stuffed up somewhere :-(
xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx]
xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx]
if not (0 <= xlrd_sheetx1 <= xlrd_sheetx2):
return (-3, -3) # internal reference, but to a macro sheet
return xlrd_sheetx1, xlrd_sheetx2
class FormulaError(Exception):
pass
[docs]class Operand:
"""
Used in evaluating formulas.
The following table describes the kinds and how their values
are represented.
.. raw:: html
<table border="1" cellpadding="7">
<tr>
<th>Kind symbol</th>
<th>Kind number</th>
<th>Value representation</th>
</tr>
<tr>
<td>oBOOL</td>
<td align="center">3</td>
<td>integer: 0 => False; 1 => True</td>
</tr>
<tr>
<td>oERR</td>
<td align="center">4</td>
<td>None, or an int error code (same as XL_CELL_ERROR in the Cell class).
</td>
</tr>
<tr>
<td>oMSNG</td>
<td align="center">5</td>
<td>Used by Excel as a placeholder for a missing (not supplied) function
argument. Should *not* appear as a final formula result. Value is None.</td>
</tr>
<tr>
<td>oNUM</td>
<td align="center">2</td>
<td>A float. Note that there is no way of distinguishing dates.</td>
</tr>
<tr>
<td>oREF</td>
<td align="center">-1</td>
<td>The value is either None or a non-empty list of
absolute Ref3D instances.<br>
</td>
</tr>
<tr>
<td>oREL</td>
<td align="center">-2</td>
<td>The value is None or a non-empty list of
fully or partially relative Ref3D instances.
</td>
</tr>
<tr>
<td>oSTRG</td>
<td align="center">1</td>
<td>A Unicode string.</td>
</tr>
<tr>
<td>oUNK</td>
<td align="center">0</td>
<td>The kind is unknown or ambiguous. The value is None</td>
</tr>
</table>
"""
#: None means that the actual value of the operand is a variable
#: (depends on cell data), not a constant.
value = None
#: oUNK means that the kind of operand is not known unambiguously.
kind = oUNK
#: The reconstituted text of the original formula. Function names will be
#: in English irrespective of the original language, which doesn't seem
#: to be recorded anywhere. The separator is ",", not ";" or whatever else
#: might be more appropriate for the end-user's locale; patches welcome.
text = "?"
def __init__(self, akind=None, avalue=None, arank=0, atext="?"):
if akind is not None:
self.kind = akind
if avalue is not None:
self.value = avalue
self.rank = arank
# rank is an internal gizmo (operator precedence);
# it's used in reconstructing formula text.
self.text = atext
def __repr__(self):
kind_text = okind_dict.get(self.kind, "?Unknown kind?")
return f"Operand(kind={kind_text}, value={self.value!r}, text={self.text!r})"
[docs]class Ref3D(tuple):
"""
Represents an absolute or relative 3-dimensional reference to a box
of one or more cells.
The ``coords`` attribute is a tuple of the form::
(shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)
where ``0 <= thingxlo <= thingx < thingxhi``.
.. note::
It is quite possible to have ``thingx > nthings``; for example
``Print_Titles`` could have ``colxhi == 256`` and/or ``rowxhi == 65536``
irrespective of how many columns/rows are actually used in the worksheet.
The caller will need to decide how to handle this situation.
Keyword: :class:`IndexError` :-)
The components of the coords attribute are also available as individual
attributes: ``shtxlo``, ``shtxhi``, ``rowxlo``, ``rowxhi``, ``colxlo``, and
``colxhi``.
The ``relflags`` attribute is a 6-tuple of flags which indicate whether
the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0).
.. note::
There is necessarily no information available as to what cell(s)
the reference could possibly be relative to. The caller must decide what
if any use to make of ``oREL`` operands.
.. note:
A partially relative reference may well be a typo.
For example, define name ``A1Z10`` as ``$a$1:$z10`` (missing ``$`` after
``z``) while the cursor is on cell ``Sheet3!A27``.
The resulting :class:`Ref3D` instance will have
``coords = (2, 3, 0, -16, 0, 26)``
and ``relflags = (0, 0, 0, 1, 0, 0).<br>
So far, only one possibility of a sheet-relative component in
a reference has been noticed: a 2D reference located in the
"current sheet".
This will appear as ``coords = (0, 1, ...)`` and
``relflags = (1, 1, ...)``.
.. versionadded:: 0.6.0
"""
def __init__(self, atuple):
self.coords = atuple[0:6]
self.relflags = atuple[6:12]
if not self.relflags:
self.relflags = (0, 0, 0, 0, 0, 0)
(self.shtxlo, self.shtxhi, self.rowxlo, self.rowxhi, self.colxlo, self.colxhi) = self.coords
def __repr__(self):
if not self.relflags or self.relflags == (0, 0, 0, 0, 0, 0):
return f"Ref3D(coords={self.coords!r})"
else:
return f"Ref3D(coords={self.coords!r}, relflags={self.relflags!r})"
tAdd = 0x03
tSub = 0x04
tMul = 0x05
tDiv = 0x06
tPower = 0x07
tConcat = 0x08
tLT, tLE, tEQ, tGE, tGT, tNE = range(0x09, 0x0F)
def nop(x):
return x
def _opr_pow(x, y):
return x**y
def _opr_lt(x, y):
return x < y
def _opr_le(x, y):
return x <= y
def _opr_eq(x, y):
return x == y
def _opr_ge(x, y):
return x >= y
def _opr_gt(x, y):
return x > y
def _opr_ne(x, y):
return x != y
def num2strg(num):
"""
Attempt to emulate Excel's default conversion from number to string.
"""
s = str(num)
if s.endswith(".0"):
s = s[:-2]
return s
_arith_argdict = {oNUM: nop, oSTRG: float}
_cmp_argdict = {oNUM: nop, oSTRG: nop}
# Seems no conversions done on relops; in Excel, "1" > 9 produces TRUE.
_strg_argdict = {oNUM: num2strg, oSTRG: nop}
binop_rules = {
tAdd: (_arith_argdict, oNUM, opr.add, 30, "+"),
tSub: (_arith_argdict, oNUM, opr.sub, 30, "-"),
tMul: (_arith_argdict, oNUM, opr.mul, 40, "*"),
tDiv: (_arith_argdict, oNUM, opr.truediv, 40, "/"),
tPower: (_arith_argdict, oNUM, _opr_pow, 50, "^"),
tConcat: (_strg_argdict, oSTRG, opr.add, 20, "&"),
tLT: (_cmp_argdict, oBOOL, _opr_lt, 10, "<"),
tLE: (_cmp_argdict, oBOOL, _opr_le, 10, "<="),
tEQ: (_cmp_argdict, oBOOL, _opr_eq, 10, "="),
tGE: (_cmp_argdict, oBOOL, _opr_ge, 10, ">="),
tGT: (_cmp_argdict, oBOOL, _opr_gt, 10, ">"),
tNE: (_cmp_argdict, oBOOL, _opr_ne, 10, "<>"),
}
unop_rules = {
0x13: (lambda x: -x, 70, "-", ""), # unary minus
0x12: (lambda x: x, 70, "+", ""), # unary plus
0x14: (lambda x: x / 100.0, 60, "", "%"), # percent
}
LEAF_RANK = 90
FUNC_RANK = 90
STACK_ALARM_LEVEL = 5
STACK_PANIC_LEVEL = 10
def evaluate_name_formula(bk, nobj, namex, blah=0, level=0):
if level > STACK_ALARM_LEVEL:
blah = 1
data = nobj.raw_formula
fmlalen = nobj.basic_formula_len
bv = bk.biff_version
reldelta = 1 # All defined name formulas use "Method B" [OOo docs]
if blah:
print(
"::: evaluate_name_formula %r %r %d %d %r level=%d"
% (namex, nobj.name, fmlalen, bv, data, level),
file=bk.logfile,
)
hex_char_dump(data, 0, fmlalen, fout=bk.logfile)
if level > STACK_PANIC_LEVEL:
raise XLRDError("Excessive indirect references in NAME formula")
sztab = szdict[bv]
pos = 0
stack = []
any_rel = 0
any_err = 0
any_external = 0
unk_opnd = Operand(oUNK, None)
error_opnd = Operand(oERR, None)
spush = stack.append
def do_binop(opcd, stk):
assert len(stk) >= 2
bop = stk.pop()
aop = stk.pop()
argdict, result_kind, func, rank, sym = binop_rules[opcd]
otext = "".join(
[
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym,
"("[: bop.rank < rank],
bop.text,
")"[: bop.rank < rank],
]
)
resop = Operand(result_kind, None, rank, otext)
try:
bconv = argdict[bop.kind]
aconv = argdict[aop.kind]
except KeyError:
stk.append(resop)
return
if bop.value is None or aop.value is None:
stk.append(resop)
return
bval = bconv(bop.value)
aval = aconv(aop.value)
result = func(aval, bval)
if result_kind == oBOOL:
result = 1 if result else 0
resop.value = result
stk.append(resop)
def do_unaryop(opcode, result_kind, stk):
assert len(stk) >= 1
aop = stk.pop()
val = aop.value
func, rank, sym1, sym2 = unop_rules[opcode]
otext = "".join(
[
sym1,
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym2,
]
)
if val is not None:
val = func(val)
stk.append(Operand(result_kind, val, rank, otext))
def not_in_name_formula(op_arg, oname_arg):
msg = f"ERROR *** Token 0x{op_arg:02x} ({oname_arg}) found in NAME formula"
raise FormulaError(msg)
if fmlalen == 0:
stack = [unk_opnd]
while 0 <= pos < fmlalen:
op = BYTES_ORD(data[pos])
opcode = op & 0x1F
optype = (op & 0x60) >> 5
if optype:
opx = opcode + 32
else:
opx = opcode
oname = onames[opx] # + [" RVA"][optype]
sz = sztab[opx]
if blah:
print(
"Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh"
% (pos, op, oname, sz, opcode, optype),
file=bk.logfile,
)
print("Stack =", stack, file=bk.logfile)
if sz == -2:
msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' % (op, oname, bv)
raise FormulaError(msg)
if not optype:
if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl
not_in_name_formula(op, oname)
elif 0x03 <= opcode <= 0x0E:
# Add, Sub, Mul, Div, Power
# tConcat
# tLT, ..., tNE
do_binop(opcode, stack)
elif opcode == 0x0F: # tIsect
if blah:
print("tIsect pre", stack, file=bk.logfile)
assert len(stack) >= 2
bop = stack.pop()
aop = stack.pop()
sym = " "
rank = 80 ########## check #######
otext = "".join(
[
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym,
"("[: bop.rank < rank],
bop.text,
")"[: bop.rank < rank],
]
)
res = Operand(oREF)
res.text = otext
if bop.kind == oERR or aop.kind == oERR:
res.kind = oERR
elif bop.kind == oUNK or aop.kind == oUNK:
# This can happen with undefined
# (go search in the current sheet) labels.
# For example =Bob Sales
# Each label gets a NAME record with an empty formula (!)
# Evaluation of the tName token classifies it as oUNK
# res.kind = oREF
pass
elif bop.kind == oREF == aop.kind:
if aop.value is not None and bop.value is not None:
assert len(aop.value) == 1
assert len(bop.value) == 1
coords = do_box_funcs(tIsectFuncs, aop.value[0], bop.value[0])
res.value = [Ref3D(coords)]
elif bop.kind == oREL == aop.kind:
res.kind = oREL
if aop.value is not None and bop.value is not None:
assert len(aop.value) == 1
assert len(bop.value) == 1
coords = do_box_funcs(tIsectFuncs, aop.value[0], bop.value[0])
relfa = aop.value[0].relflags
relfb = bop.value[0].relflags
if relfa == relfb:
res.value = [Ref3D(coords + relfa)]
else:
pass
spush(res)
if blah:
print("tIsect post", stack, file=bk.logfile)
elif opcode == 0x10: # tList
if blah:
print("tList pre", stack, file=bk.logfile)
assert len(stack) >= 2
bop = stack.pop()
aop = stack.pop()
sym = ","
rank = 80 ########## check #######
otext = "".join(
[
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym,
"("[: bop.rank < rank],
bop.text,
")"[: bop.rank < rank],
]
)
res = Operand(oREF, None, rank, otext)
if bop.kind == oERR or aop.kind == oERR:
res.kind = oERR
elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL):
res.kind = oREF
if aop.kind == oREL or bop.kind == oREL:
res.kind = oREL
if aop.value is not None and bop.value is not None:
assert len(aop.value) >= 1
assert len(bop.value) == 1
res.value = aop.value + bop.value
else:
pass
spush(res)
if blah:
print("tList post", stack, file=bk.logfile)
elif opcode == 0x11: # tRange
if blah:
print("tRange pre", stack, file=bk.logfile)
assert len(stack) >= 2
bop = stack.pop()
aop = stack.pop()
sym = ":"
rank = 80 ########## check #######
otext = "".join(
[
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym,
"("[: bop.rank < rank],
bop.text,
")"[: bop.rank < rank],
]
)
res = Operand(oREF, None, rank, otext)
if bop.kind == oERR or aop.kind == oERR:
res = oERR
elif bop.kind == oREF == aop.kind:
if aop.value is not None and bop.value is not None:
assert len(aop.value) == 1
assert len(bop.value) == 1
coords = do_box_funcs(tRangeFuncs, aop.value[0], bop.value[0])
res.value = [Ref3D(coords)]
elif bop.kind == oREL == aop.kind:
res.kind = oREL
if aop.value is not None and bop.value is not None:
assert len(aop.value) == 1
assert len(bop.value) == 1
coords = do_box_funcs(tRangeFuncs, aop.value[0], bop.value[0])
relfa = aop.value[0].relflags
relfb = bop.value[0].relflags
if relfa == relfb:
res.value = [Ref3D(coords + relfa)]
else:
pass
spush(res)
if blah:
print("tRange post", stack, file=bk.logfile)
elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent
do_unaryop(opcode, oNUM, stack)
elif opcode == 0x15: # tParen
# source cosmetics
pass
elif opcode == 0x16: # tMissArg
spush(Operand(oMSNG, None, LEAF_RANK, ""))
elif opcode == 0x17: # tStr
if bv <= 70:
strg, newpos = unpack_string_update_pos(data, pos + 1, bk.encoding, lenlen=1)
else:
strg, newpos = unpack_unicode_update_pos(data, pos + 1, lenlen=1)
sz = newpos - pos
if blah:
print(" sz=%d strg=%r" % (sz, strg), file=bk.logfile)
text = '"' + strg.replace('"', '""') + '"'
spush(Operand(oSTRG, strg, LEAF_RANK, text))
elif opcode == 0x18: # tExtended
# new with BIFF 8
assert bv >= 80
# not in OOo docs
raise FormulaError("tExtended token not implemented")
elif opcode == 0x19: # tAttr
subop, nc = unpack("<BH", data[pos + 1 : pos + 4])
subname = tAttrNames.get(subop, "??Unknown??")
if subop == 0x04: # Choose
sz = nc * 2 + 6
elif subop == 0x10: # Sum (single arg)
sz = 4
if blah:
print("tAttrSum", stack, file=bk.logfile)
assert len(stack) >= 1
aop = stack[-1]
otext = "SUM(%s)" % aop.text
stack[-1] = Operand(oNUM, None, FUNC_RANK, otext)
else:
sz = 4
if blah:
print(
" subop=%02xh subname=t%s sz=%d nc=%02xh" % (subop, subname, sz, nc),
file=bk.logfile,
)
elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet
assert bv < 50
raise FormulaError("tSheet & tEndsheet tokens not implemented")
elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum
inx = opcode - 0x1C
nb = [1, 1, 2, 8][inx]
kind = [oERR, oBOOL, oNUM, oNUM][inx]
(value,) = unpack("<" + "BBHd"[inx], data[pos + 1 : pos + 1 + nb])
if inx == 2: # tInt
value = float(value)
text = str(value)
elif inx == 3: # tNum
text = str(value)
elif inx == 1: # tBool
text = ("FALSE", "TRUE")[value]
else:
text = '"' + error_text_from_code[value] + '"'
spush(Operand(kind, value, LEAF_RANK, text))
else:
raise FormulaError("Unhandled opcode: 0x%02x" % opcode)
if sz <= 0:
raise FormulaError("Size not set for opcode 0x%02x" % opcode)
pos += sz
continue
if opcode == 0x00: # tArray
spush(unk_opnd)
elif opcode == 0x01: # tFunc
nb = 1 + int(bv >= 40)
funcx = unpack("<" + " BH"[nb], data[pos + 1 : pos + 1 + nb])[0]
func_attrs = func_defs.get(funcx, None)
if not func_attrs:
print("*** formula/tFunc unknown FuncID:%d" % funcx, file=bk.logfile)
spush(unk_opnd)
else:
func_name, nargs = func_attrs[:2]
if blah:
print(
" FuncID=%d name=%s nargs=%d" % (funcx, func_name, nargs),
file=bk.logfile,
)
assert len(stack) >= nargs
if nargs:
argtext = listsep.join(arg.text for arg in stack[-nargs:])
otext = f"{func_name}({argtext})"
del stack[-nargs:]
else:
otext = func_name + "()"
res = Operand(oUNK, None, FUNC_RANK, otext)
spush(res)
elif opcode == 0x02: # tFuncVar
nb = 1 + int(bv >= 40)
nargs, funcx = unpack("<B" + " BH"[nb], data[pos + 1 : pos + 2 + nb])
prompt, nargs = divmod(nargs, 128)
macro, funcx = divmod(funcx, 32768)
if blah:
print(
" FuncID=%d nargs=%d macro=%d prompt=%d" % (funcx, nargs, macro, prompt),
file=bk.logfile,
)
func_attrs = func_defs.get(funcx, None)
if not func_attrs:
print("*** formula/tFuncVar unknown FuncID:%d" % funcx, file=bk.logfile)
spush(unk_opnd)
else:
func_name, minargs, maxargs = func_attrs[:3]
if blah:
print(
" name: %r, min~max args: %d~%d" % (func_name, minargs, maxargs),
file=bk.logfile,
)
assert minargs <= nargs <= maxargs
assert len(stack) >= nargs
assert len(stack) >= nargs
argtext = listsep.join(arg.text for arg in stack[-nargs:])
otext = f"{func_name}({argtext})"
res = Operand(oUNK, None, FUNC_RANK, otext)
if funcx == 1: # IF
testarg = stack[-nargs]
if testarg.kind not in (oNUM, oBOOL):
if blah and testarg.kind != oUNK:
print("IF testarg kind?", file=bk.logfile)
elif testarg.value not in (0, 1):
if blah and testarg.value is not None:
print("IF testarg value?", file=bk.logfile)
else:
if nargs == 2 and not testarg.value:
# IF(FALSE, tv) => FALSE
res.kind, res.value = oBOOL, 0
else:
respos = -nargs + 2 - int(testarg.value)
chosen = stack[respos]
if chosen.kind == oMSNG:
res.kind, res.value = oNUM, 0
else:
res.kind, res.value = chosen.kind, chosen.value
if blah:
print("$$$$$$ IF => constant", file=bk.logfile)
elif funcx == 100: # CHOOSE
testarg = stack[-nargs]
if testarg.kind == oNUM:
if 1 <= testarg.value < nargs:
chosen = stack[-nargs + int(testarg.value)]
if chosen.kind == oMSNG:
res.kind, res.value = oNUM, 0
else:
res.kind, res.value = chosen.kind, chosen.value
del stack[-nargs:]
spush(res)
elif opcode == 0x03: # tName
tgtnamex = unpack("<H", data[pos + 1 : pos + 3])[0] - 1
# Only change with BIFF version is number of trailing UNUSED bytes!
if blah:
print(" tgtnamex=%d" % tgtnamex, file=bk.logfile)
tgtobj = bk.name_obj_list[tgtnamex]
if not tgtobj.evaluated:
### recursive ###
evaluate_name_formula(bk, tgtobj, tgtnamex, blah, level + 1)
if tgtobj.macro or tgtobj.binary or tgtobj.any_err:
if blah:
tgtobj.dump(
bk.logfile,
header="!!! tgtobj has problems!!!",
footer="----------- --------",
)
res = Operand(oUNK, None)
any_err = any_err or tgtobj.macro or tgtobj.binary or tgtobj.any_err
any_rel = any_rel or tgtobj.any_rel
else:
assert len(tgtobj.stack) == 1
res = copy.deepcopy(tgtobj.stack[0])
res.rank = LEAF_RANK
if tgtobj.scope == -1:
res.text = tgtobj.name
else:
res.text = f"{bk._sheet_names[tgtobj.scope]}!{tgtobj.name}"
if blah:
print(" tName: setting text to", repr(res.text), file=bk.logfile)
spush(res)
elif opcode == 0x04: # tRef
# not_in_name_formula(op, oname)
res = get_cell_addr(data, pos + 1, bv, reldelta)
if blah:
print(" ", res, file=bk.logfile)
rowx, colx, row_rel, col_rel = res
shx1 = shx2 = 0 ####### N.B. relative to the CURRENT SHEET
any_rel = 1
coords = (shx1, shx2 + 1, rowx, rowx + 1, colx, colx + 1)
if blah:
print(" ", coords, file=bk.logfile)
res = Operand(oUNK, None)
if optype == 1:
relflags = (1, 1, row_rel, row_rel, col_rel, col_rel)
res = Operand(oREL, [Ref3D(coords + relflags)])
spush(res)
elif opcode == 0x05: # tArea
# not_in_name_formula(op, oname)
res1, res2 = get_cell_range_addr(data, pos + 1, bv, reldelta)
if blah:
print(" ", res1, res2, file=bk.logfile)
rowx1, colx1, row_rel1, col_rel1 = res1
rowx2, colx2, row_rel2, col_rel2 = res2
shx1 = shx2 = 0 ####### N.B. relative to the CURRENT SHEET
any_rel = 1
coords = (shx1, shx2 + 1, rowx1, rowx2 + 1, colx1, colx2 + 1)
if blah:
print(" ", coords, file=bk.logfile)
res = Operand(oUNK, None)
if optype == 1:
relflags = (1, 1, row_rel1, row_rel2, col_rel1, col_rel2)
res = Operand(oREL, [Ref3D(coords + relflags)])
spush(res)
elif opcode == 0x06: # tMemArea
not_in_name_formula(op, oname)
elif opcode == 0x09: # tMemFunc
nb = unpack("<H", data[pos + 1 : pos + 3])[0]
if blah:
print(" %d bytes of cell ref formula" % nb, file=bk.logfile)
# no effect on stack
elif opcode == 0x0C: # tRefN
not_in_name_formula(op, oname)
# res = get_cell_addr(data, pos+1, bv, reldelta=1)
# # note *ALL* tRefN usage has signed offset for relative addresses
# any_rel = 1
# if blah: print >> bk.logfile, " ", res
# spush(res)
elif opcode == 0x0D: # tAreaN
not_in_name_formula(op, oname)
# res = get_cell_range_addr(data, pos+1, bv, reldelta=1)
# # note *ALL* tAreaN usage has signed offset for relative addresses
# any_rel = 1
# if blah: print >> bk.logfile, " ", res
elif opcode == 0x1A: # tRef3d
if bv >= 80:
res = get_cell_addr(data, pos + 3, bv, reldelta)
refx = unpack("<H", data[pos + 1 : pos + 3])[0]
shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
else:
res = get_cell_addr(data, pos + 15, bv, reldelta)
raw_extshtx, raw_shx1, raw_shx2 = unpack("<hxxxxxxxxhh", data[pos + 1 : pos + 15])
if blah:
print("tRef3d", raw_extshtx, raw_shx1, raw_shx2, file=bk.logfile)
shx1, shx2 = get_externsheet_local_range_b57(
bk, raw_extshtx, raw_shx1, raw_shx2, blah
)
rowx, colx, row_rel, col_rel = res
is_rel = row_rel or col_rel
any_rel = any_rel or is_rel
coords = (shx1, shx2 + 1, rowx, rowx + 1, colx, colx + 1)
any_err |= shx1 < -1
if blah:
print(" ", coords, file=bk.logfile)
res = Operand(oUNK, None)
if is_rel:
relflags = (0, 0, row_rel, row_rel, col_rel, col_rel)
ref3d = Ref3D(coords + relflags)
res.kind = oREL
res.text = rangename3drel(bk, ref3d, r1c1=1)
else:
ref3d = Ref3D(coords)
res.kind = oREF
res.text = rangename3d(bk, ref3d)
res.rank = LEAF_RANK
if optype == 1:
res.value = [ref3d]
spush(res)
elif opcode == 0x1B: # tArea3d
if bv >= 80:
res1, res2 = get_cell_range_addr(data, pos + 3, bv, reldelta)
refx = unpack("<H", data[pos + 1 : pos + 3])[0]
shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
else:
res1, res2 = get_cell_range_addr(data, pos + 15, bv, reldelta)
raw_extshtx, raw_shx1, raw_shx2 = unpack("<hxxxxxxxxhh", data[pos + 1 : pos + 15])
if blah:
print("tArea3d", raw_extshtx, raw_shx1, raw_shx2, file=bk.logfile)
shx1, shx2 = get_externsheet_local_range_b57(
bk, raw_extshtx, raw_shx1, raw_shx2, blah
)
any_err |= shx1 < -1
rowx1, colx1, row_rel1, col_rel1 = res1
rowx2, colx2, row_rel2, col_rel2 = res2
is_rel = row_rel1 or col_rel1 or row_rel2 or col_rel2
any_rel = any_rel or is_rel
coords = (shx1, shx2 + 1, rowx1, rowx2 + 1, colx1, colx2 + 1)
if blah:
print(" ", coords, file=bk.logfile)
res = Operand(oUNK, None)
if is_rel:
relflags = (0, 0, row_rel1, row_rel2, col_rel1, col_rel2)
ref3d = Ref3D(coords + relflags)
res.kind = oREL
res.text = rangename3drel(bk, ref3d, r1c1=1)
else:
ref3d = Ref3D(coords)
res.kind = oREF
res.text = rangename3d(bk, ref3d)
res.rank = LEAF_RANK
if optype == 1:
res.value = [ref3d]
spush(res)
elif opcode == 0x19: # tNameX
dodgy = 0
res = Operand(oUNK, None)
if bv >= 80:
refx, tgtnamex = unpack("<HH", data[pos + 1 : pos + 5])
tgtnamex -= 1
origrefx = refx
else:
refx, tgtnamex = unpack("<hxxxxxxxxH", data[pos + 1 : pos + 13])
tgtnamex -= 1
origrefx = refx
if refx > 0:
refx -= 1
elif refx < 0:
refx = -refx - 1
else:
dodgy = 1
if blah:
print(
" origrefx=%d refx=%d tgtnamex=%d dodgy=%d"
% (origrefx, refx, tgtnamex, dodgy),
file=bk.logfile,
)
if tgtnamex == namex:
if blah:
print("!!!! Self-referential !!!!", file=bk.logfile)
dodgy = any_err = 1
if not dodgy:
if bv >= 80:
shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
elif origrefx > 0:
shx1, shx2 = (-4, -4) # external ref
else:
exty = bk._externsheet_type_b57[refx]
if exty == 4: # non-specific sheet in own doc't
shx1, shx2 = (-1, -1) # internal, any sheet
else:
shx1, shx2 = (-666, -666)
if dodgy or shx1 < -1:
otext = "<<Name #%d in external(?) file #%d>>" % (tgtnamex, origrefx)
res = Operand(oUNK, None, LEAF_RANK, otext)
else:
tgtobj = bk.name_obj_list[tgtnamex]
if not tgtobj.evaluated:
### recursive ###
evaluate_name_formula(bk, tgtobj, tgtnamex, blah, level + 1)
if tgtobj.macro or tgtobj.binary or tgtobj.any_err:
if blah:
tgtobj.dump(
bk.logfile,
header="!!! bad tgtobj !!!",
footer="------------------",
)
res = Operand(oUNK, None)
any_err = any_err or tgtobj.macro or tgtobj.binary or tgtobj.any_err
any_rel = any_rel or tgtobj.any_rel
else:
assert len(tgtobj.stack) == 1
res = copy.deepcopy(tgtobj.stack[0])
res.rank = LEAF_RANK
if tgtobj.scope == -1:
res.text = tgtobj.name
else:
res.text = f"{bk._sheet_names[tgtobj.scope]}!{tgtobj.name}"
if blah:
print(" tNameX: setting text to", repr(res.text), file=bk.logfile)
spush(res)
elif opcode in error_opcodes:
any_err = 1
spush(error_opnd)
else:
if blah:
print("FORMULA: /// Not handled yet: t" + oname, file=bk.logfile)
any_err = 1
if sz <= 0:
raise FormulaError("Fatal: token size is not positive")
pos += sz
any_rel = not not any_rel
if blah:
fprintf(
bk.logfile,
"End of formula. level=%d any_rel=%d any_err=%d stack=%r\n",
level,
not not any_rel,
any_err,
stack,
)
if len(stack) >= 2:
print("*** Stack has unprocessed args", file=bk.logfile)
print(file=bk.logfile)
nobj.stack = stack
if len(stack) != 1:
nobj.result = None
else:
nobj.result = stack[0]
nobj.any_rel = any_rel
nobj.any_err = any_err
nobj.any_external = any_external
nobj.evaluated = 1
#### under construction #############################################################################
def decompile_formula(
bk, fmla, fmlalen, fmlatype=None, browx=None, bcolx=None, blah=0, level=0, r1c1=0
):
if level > STACK_ALARM_LEVEL:
blah = 1
reldelta = fmlatype in (
FMLA_TYPE_SHARED,
FMLA_TYPE_NAME,
FMLA_TYPE_COND_FMT,
FMLA_TYPE_DATA_VAL,
)
data = fmla
bv = bk.biff_version
if blah:
print(
"::: decompile_formula len=%d fmlatype=%r browx=%r bcolx=%r reldelta=%d %r level=%d"
% (fmlalen, fmlatype, browx, bcolx, reldelta, data, level),
file=bk.logfile,
)
hex_char_dump(data, 0, fmlalen, fout=bk.logfile)
if level > STACK_PANIC_LEVEL:
raise XLRDError("Excessive indirect references in formula")
sztab = szdict[bv]
pos = 0
stack = []
any_rel = 0
any_err = 0
unk_opnd = Operand(oUNK, None)
error_opnd = Operand(oERR, None)
spush = stack.append
def do_binop(opcd, stk):
assert len(stk) >= 2
bop = stk.pop()
aop = stk.pop()
argdict, result_kind, func, rank, sym = binop_rules[opcd]
otext = "".join(
[
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym,
"("[: bop.rank < rank],
bop.text,
")"[: bop.rank < rank],
]
)
resop = Operand(result_kind, None, rank, otext)
stk.append(resop)
def do_unaryop(opcode, result_kind, stk):
assert len(stk) >= 1
aop = stk.pop()
func, rank, sym1, sym2 = unop_rules[opcode]
otext = "".join(
[
sym1,
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym2,
]
)
stk.append(Operand(result_kind, None, rank, otext))
def unexpected_opcode(op_arg, oname_arg):
msg = "ERROR *** Unexpected token 0x{:02x} ({}) found in formula type {}".format(
op_arg,
oname_arg,
FMLA_TYPEDESCR_MAP[fmlatype],
)
print(msg, file=bk.logfile)
# raise FormulaError(msg)
if fmlalen == 0:
stack = [unk_opnd]
while 0 <= pos < fmlalen:
op = BYTES_ORD(data[pos])
opcode = op & 0x1F
optype = (op & 0x60) >> 5
if optype:
opx = opcode + 32
else:
opx = opcode
oname = onames[opx] # + [" RVA"][optype]
sz = sztab[opx]
if blah:
print(
"Pos:%d Op:0x%02x opname:t%s Sz:%d opcode:%02xh optype:%02xh"
% (pos, op, oname, sz, opcode, optype),
file=bk.logfile,
)
print("Stack =", stack, file=bk.logfile)
if sz == -2:
msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' % (op, oname, bv)
raise FormulaError(msg)
if _TOKEN_NOT_ALLOWED(opx, 0) & fmlatype:
unexpected_opcode(op, oname)
if not optype:
if opcode <= 0x01: # tExp
if bv >= 30:
fmt = "<x2H"
else:
fmt = "<xHB"
assert pos == 0 and fmlalen == sz and not stack
rowx, colx = unpack(fmt, data)
text = "SHARED FMLA at rowx=%d colx=%d" % (rowx, colx)
spush(Operand(oUNK, None, LEAF_RANK, text))
if not fmlatype & (FMLA_TYPE_CELL | FMLA_TYPE_ARRAY):
unexpected_opcode(op, oname)
elif 0x03 <= opcode <= 0x0E:
# Add, Sub, Mul, Div, Power
# tConcat
# tLT, ..., tNE
do_binop(opcode, stack)
elif opcode == 0x0F: # tIsect
if blah:
print("tIsect pre", stack, file=bk.logfile)
assert len(stack) >= 2
bop = stack.pop()
aop = stack.pop()
sym = " "
rank = 80 ########## check #######
otext = "".join(
[
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym,
"("[: bop.rank < rank],
bop.text,
")"[: bop.rank < rank],
]
)
res = Operand(oREF)
res.text = otext
if bop.kind == oERR or aop.kind == oERR:
res.kind = oERR
elif bop.kind == oUNK or aop.kind == oUNK:
# This can happen with undefined
# (go search in the current sheet) labels.
# For example =Bob Sales
# Each label gets a NAME record with an empty formula (!)
# Evaluation of the tName token classifies it as oUNK
# res.kind = oREF
pass
elif bop.kind == oREF == aop.kind:
pass
elif bop.kind == oREL == aop.kind:
res.kind = oREL
else:
pass
spush(res)
if blah:
print("tIsect post", stack, file=bk.logfile)
elif opcode == 0x10: # tList
if blah:
print("tList pre", stack, file=bk.logfile)
assert len(stack) >= 2
bop = stack.pop()
aop = stack.pop()
sym = ","
rank = 80 ########## check #######
otext = "".join(
[
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym,
"("[: bop.rank < rank],
bop.text,
")"[: bop.rank < rank],
]
)
res = Operand(oREF, None, rank, otext)
if bop.kind == oERR or aop.kind == oERR:
res.kind = oERR
elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL):
res.kind = oREF
if aop.kind == oREL or bop.kind == oREL:
res.kind = oREL
else:
pass
spush(res)
if blah:
print("tList post", stack, file=bk.logfile)
elif opcode == 0x11: # tRange
if blah:
print("tRange pre", stack, file=bk.logfile)
assert len(stack) >= 2
bop = stack.pop()
aop = stack.pop()
sym = ":"
rank = 80 ########## check #######
otext = "".join(
[
"("[: aop.rank < rank],
aop.text,
")"[: aop.rank < rank],
sym,
"("[: bop.rank < rank],
bop.text,
")"[: bop.rank < rank],
]
)
res = Operand(oREF, None, rank, otext)
if bop.kind == oERR or aop.kind == oERR:
res = oERR
elif bop.kind == oREF == aop.kind:
pass
else:
pass
spush(res)
if blah:
print("tRange post", stack, file=bk.logfile)
elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent
do_unaryop(opcode, oNUM, stack)
elif opcode == 0x15: # tParen
# source cosmetics
pass
elif opcode == 0x16: # tMissArg
spush(Operand(oMSNG, None, LEAF_RANK, ""))
elif opcode == 0x17: # tStr
if bv <= 70:
strg, newpos = unpack_string_update_pos(data, pos + 1, bk.encoding, lenlen=1)
else:
strg, newpos = unpack_unicode_update_pos(data, pos + 1, lenlen=1)
sz = newpos - pos
if blah:
print(" sz=%d strg=%r" % (sz, strg), file=bk.logfile)
text = '"' + strg.replace('"', '""') + '"'
spush(Operand(oSTRG, None, LEAF_RANK, text))
elif opcode == 0x18: # tExtended
# new with BIFF 8
assert bv >= 80
# not in OOo docs, don't even know how to determine its length
raise FormulaError("tExtended token not implemented")
elif opcode == 0x19: # tAttr
subop, nc = unpack("<BH", data[pos + 1 : pos + 4])
subname = tAttrNames.get(subop, "??Unknown??")
if subop == 0x04: # Choose
sz = nc * 2 + 6
elif subop == 0x10: # Sum (single arg)
sz = 4
if blah:
print("tAttrSum", stack, file=bk.logfile)
assert len(stack) >= 1
aop = stack[-1]
otext = "SUM(%s)" % aop.text
stack[-1] = Operand(oNUM, None, FUNC_RANK, otext)
else:
sz = 4
if blah:
print(
" subop=%02xh subname=t%s sz=%d nc=%02xh" % (subop, subname, sz, nc),
file=bk.logfile,
)
elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet
assert bv < 50
raise FormulaError("tSheet & tEndsheet tokens not implemented")
elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum
inx = opcode - 0x1C
nb = [1, 1, 2, 8][inx]
kind = [oERR, oBOOL, oNUM, oNUM][inx]
(value,) = unpack("<" + "BBHd"[inx], data[pos + 1 : pos + 1 + nb])
if inx == 2: # tInt
value = float(value)
text = str(value)
elif inx == 3: # tNum
text = str(value)
elif inx == 1: # tBool
text = ("FALSE", "TRUE")[value]
else:
text = '"' + error_text_from_code[value] + '"'
spush(Operand(kind, None, LEAF_RANK, text))
else:
raise FormulaError("Unhandled opcode: 0x%02x" % opcode)
if sz <= 0:
raise FormulaError("Size not set for opcode 0x%02x" % opcode)
pos += sz
continue
if opcode == 0x00: # tArray
spush(unk_opnd)
elif opcode == 0x01: # tFunc
nb = 1 + int(bv >= 40)
funcx = unpack("<" + " BH"[nb], data[pos + 1 : pos + 1 + nb])[0]
func_attrs = func_defs.get(funcx, None)
if not func_attrs:
print("*** formula/tFunc unknown FuncID:%d" % funcx, file=bk.logfile)
spush(unk_opnd)
else:
func_name, nargs = func_attrs[:2]
if blah:
print(
" FuncID=%d name=%s nargs=%d" % (funcx, func_name, nargs),
file=bk.logfile,
)
assert len(stack) >= nargs
if nargs:
argtext = listsep.join(arg.text for arg in stack[-nargs:])
otext = f"{func_name}({argtext})"
del stack[-nargs:]
else:
otext = func_name + "()"
res = Operand(oUNK, None, FUNC_RANK, otext)
spush(res)
elif opcode == 0x02: # tFuncVar
nb = 1 + int(bv >= 40)
nargs, funcx = unpack("<B" + " BH"[nb], data[pos + 1 : pos + 2 + nb])
prompt, nargs = divmod(nargs, 128)
macro, funcx = divmod(funcx, 32768)
if blah:
print(
" FuncID=%d nargs=%d macro=%d prompt=%d" % (funcx, nargs, macro, prompt),
file=bk.logfile,
)
#### TODO #### if funcx == 255: # call add-in function
if funcx == 255:
func_attrs = ("CALL_ADDIN", 1, 30)
else:
func_attrs = func_defs.get(funcx, None)
if not func_attrs:
print("*** formula/tFuncVar unknown FuncID:%d" % funcx, file=bk.logfile)
spush(unk_opnd)
else:
func_name, minargs, maxargs = func_attrs[:3]
if blah:
print(
" name: %r, min~max args: %d~%d" % (func_name, minargs, maxargs),
file=bk.logfile,
)
assert minargs <= nargs <= maxargs
assert len(stack) >= nargs
assert len(stack) >= nargs
argtext = listsep.join(arg.text for arg in stack[-nargs:])
otext = f"{func_name}({argtext})"
res = Operand(oUNK, None, FUNC_RANK, otext)
del stack[-nargs:]
spush(res)
elif opcode == 0x03: # tName
tgtnamex = unpack("<H", data[pos + 1 : pos + 3])[0] - 1
# Only change with BIFF version is number of trailing UNUSED bytes!
if blah:
print(" tgtnamex=%d" % tgtnamex, file=bk.logfile)
tgtobj = bk.name_obj_list[tgtnamex]
if tgtobj.scope == -1:
otext = tgtobj.name
else:
otext = f"{bk._sheet_names[tgtobj.scope]}!{tgtobj.name}"
if blah:
print(" tName: setting text to", repr(otext), file=bk.logfile)
res = Operand(oUNK, None, LEAF_RANK, otext)
spush(res)
elif opcode == 0x04: # tRef
res = get_cell_addr(data, pos + 1, bv, reldelta, browx, bcolx)
if blah:
print(" ", res, file=bk.logfile)
rowx, colx, row_rel, col_rel = res
is_rel = row_rel or col_rel
if is_rel:
okind = oREL
else:
okind = oREF
otext = cellnamerel(rowx, colx, row_rel, col_rel, browx, bcolx, r1c1)
res = Operand(okind, None, LEAF_RANK, otext)
spush(res)
elif opcode == 0x05: # tArea
res1, res2 = get_cell_range_addr(data, pos + 1, bv, reldelta, browx, bcolx)
if blah:
print(" ", res1, res2, file=bk.logfile)
rowx1, colx1, row_rel1, col_rel1 = res1
rowx2, colx2, row_rel2, col_rel2 = res2
coords = (rowx1, rowx2 + 1, colx1, colx2 + 1)
relflags = (row_rel1, row_rel2, col_rel1, col_rel2)
if sum(relflags): # relative
okind = oREL
else:
okind = oREF
if blah:
print(" ", coords, relflags, file=bk.logfile)
otext = rangename2drel(coords, relflags, browx, bcolx, r1c1)
res = Operand(okind, None, LEAF_RANK, otext)
spush(res)
elif opcode == 0x06: # tMemArea
not_in_name_formula(op, oname)
elif opcode == 0x09: # tMemFunc
nb = unpack("<H", data[pos + 1 : pos + 3])[0]
if blah:
print(" %d bytes of cell ref formula" % nb, file=bk.logfile)
# no effect on stack
elif opcode == 0x0C: # tRefN
res = get_cell_addr(data, pos + 1, bv, reldelta, browx, bcolx)
# note *ALL* tRefN usage has signed offset for relative addresses
any_rel = 1
if blah:
print(" ", res, file=bk.logfile)
rowx, colx, row_rel, col_rel = res
is_rel = row_rel or col_rel
if is_rel:
okind = oREL
else:
okind = oREF
otext = cellnamerel(rowx, colx, row_rel, col_rel, browx, bcolx, r1c1)
res = Operand(okind, None, LEAF_RANK, otext)
spush(res)
elif opcode == 0x0D: # tAreaN
# res = get_cell_range_addr(data, pos+1, bv, reldelta, browx, bcolx)
# # note *ALL* tAreaN usage has signed offset for relative addresses
# any_rel = 1
# if blah: print >> bk.logfile, " ", res
res1, res2 = get_cell_range_addr(data, pos + 1, bv, reldelta, browx, bcolx)
if blah:
print(" ", res1, res2, file=bk.logfile)
rowx1, colx1, row_rel1, col_rel1 = res1
rowx2, colx2, row_rel2, col_rel2 = res2
coords = (rowx1, rowx2 + 1, colx1, colx2 + 1)
relflags = (row_rel1, row_rel2, col_rel1, col_rel2)
if sum(relflags): # relative
okind = oREL
else:
okind = oREF
if blah:
print(" ", coords, relflags, file=bk.logfile)
otext = rangename2drel(coords, relflags, browx, bcolx, r1c1)
res = Operand(okind, None, LEAF_RANK, otext)
spush(res)
elif opcode == 0x1A: # tRef3d
if bv >= 80:
res = get_cell_addr(data, pos + 3, bv, reldelta, browx, bcolx)
refx = unpack("<H", data[pos + 1 : pos + 3])[0]
shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
else:
res = get_cell_addr(data, pos + 15, bv, reldelta, browx, bcolx)
raw_extshtx, raw_shx1, raw_shx2 = unpack("<hxxxxxxxxhh", data[pos + 1 : pos + 15])
if blah:
print("tRef3d", raw_extshtx, raw_shx1, raw_shx2, file=bk.logfile)
shx1, shx2 = get_externsheet_local_range_b57(
bk, raw_extshtx, raw_shx1, raw_shx2, blah
)
rowx, colx, row_rel, col_rel = res
is_rel = row_rel or col_rel
any_rel = any_rel or is_rel
coords = (shx1, shx2 + 1, rowx, rowx + 1, colx, colx + 1)
any_err |= shx1 < -1
if blah:
print(" ", coords, file=bk.logfile)
res = Operand(oUNK, None)
if is_rel:
relflags = (0, 0, row_rel, row_rel, col_rel, col_rel)
ref3d = Ref3D(coords + relflags)
res.kind = oREL
res.text = rangename3drel(bk, ref3d, browx, bcolx, r1c1)
else:
ref3d = Ref3D(coords)
res.kind = oREF
res.text = rangename3d(bk, ref3d)
res.rank = LEAF_RANK
res.value = None
spush(res)
elif opcode == 0x1B: # tArea3d
if bv >= 80:
res1, res2 = get_cell_range_addr(data, pos + 3, bv, reldelta)
refx = unpack("<H", data[pos + 1 : pos + 3])[0]
shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
else:
res1, res2 = get_cell_range_addr(data, pos + 15, bv, reldelta)
raw_extshtx, raw_shx1, raw_shx2 = unpack("<hxxxxxxxxhh", data[pos + 1 : pos + 15])
if blah:
print("tArea3d", raw_extshtx, raw_shx1, raw_shx2, file=bk.logfile)
shx1, shx2 = get_externsheet_local_range_b57(
bk, raw_extshtx, raw_shx1, raw_shx2, blah
)
any_err |= shx1 < -1
rowx1, colx1, row_rel1, col_rel1 = res1
rowx2, colx2, row_rel2, col_rel2 = res2
is_rel = row_rel1 or col_rel1 or row_rel2 or col_rel2
any_rel = any_rel or is_rel
coords = (shx1, shx2 + 1, rowx1, rowx2 + 1, colx1, colx2 + 1)
if blah:
print(" ", coords, file=bk.logfile)
res = Operand(oUNK, None)
if is_rel:
relflags = (0, 0, row_rel1, row_rel2, col_rel1, col_rel2)
ref3d = Ref3D(coords + relflags)
res.kind = oREL
res.text = rangename3drel(bk, ref3d, browx, bcolx, r1c1)
else:
ref3d = Ref3D(coords)
res.kind = oREF
res.text = rangename3d(bk, ref3d)
res.rank = LEAF_RANK
spush(res)
elif opcode == 0x19: # tNameX
dodgy = 0
res = Operand(oUNK, None)
if bv >= 80:
refx, tgtnamex = unpack("<HH", data[pos + 1 : pos + 5])
tgtnamex -= 1
origrefx = refx
else:
refx, tgtnamex = unpack("<hxxxxxxxxH", data[pos + 1 : pos + 13])
tgtnamex -= 1
origrefx = refx
if refx > 0:
refx -= 1
elif refx < 0:
refx = -refx - 1
else:
dodgy = 1
if blah:
print(
" origrefx=%d refx=%d tgtnamex=%d dodgy=%d"
% (origrefx, refx, tgtnamex, dodgy),
file=bk.logfile,
)
# if tgtnamex == namex:
# if blah: print >> bk.logfile, "!!!! Self-referential !!!!"
# dodgy = any_err = 1
if not dodgy:
if bv >= 80:
shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
elif origrefx > 0:
shx1, shx2 = (-4, -4) # external ref
else:
exty = bk._externsheet_type_b57[refx]
if exty == 4: # non-specific sheet in own doc't
shx1, shx2 = (-1, -1) # internal, any sheet
else:
shx1, shx2 = (-666, -666)
okind = oUNK
ovalue = None
if shx1 == -5: # addin func name
okind = oSTRG
ovalue = bk.addin_func_names[tgtnamex]
otext = '"' + ovalue.replace('"', '""') + '"'
elif dodgy or shx1 < -1:
otext = "<<Name #%d in external(?) file #%d>>" % (tgtnamex, origrefx)
else:
tgtobj = bk.name_obj_list[tgtnamex]
if tgtobj.scope == -1:
otext = tgtobj.name
else:
otext = f"{bk._sheet_names[tgtobj.scope]}!{tgtobj.name}"
if blah:
print(" tNameX: setting text to", repr(res.text), file=bk.logfile)
res = Operand(okind, ovalue, LEAF_RANK, otext)
spush(res)
elif opcode in error_opcodes:
any_err = 1
spush(error_opnd)
else:
if blah:
print("FORMULA: /// Not handled yet: t" + oname, file=bk.logfile)
any_err = 1
if sz <= 0:
raise FormulaError("Fatal: token size is not positive")
pos += sz
any_rel = not not any_rel
if blah:
print(
"End of formula. level=%d any_rel=%d any_err=%d stack=%r"
% (level, not not any_rel, any_err, stack),
file=bk.logfile,
)
if len(stack) >= 2:
print("*** Stack has unprocessed args", file=bk.logfile)
print(file=bk.logfile)
if len(stack) != 1:
result = None
else:
result = stack[0].text
return result
#### under deconstruction ###
def dump_formula(bk, data, fmlalen, bv, reldelta, blah=0, isname=0):
if blah:
print("dump_formula", fmlalen, bv, len(data), file=bk.logfile)
hex_char_dump(data, 0, fmlalen, fout=bk.logfile)
assert bv >= 80 #### this function needs updating ####
sztab = szdict[bv]
pos = 0
stack = []
any_rel = 0
any_err = 0
spush = stack.append
while 0 <= pos < fmlalen:
op = BYTES_ORD(data[pos])
opcode = op & 0x1F
optype = (op & 0x60) >> 5
if optype:
opx = opcode + 32
else:
opx = opcode
oname = onames[opx] # + [" RVA"][optype]
sz = sztab[opx]
if blah:
print(
"Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh"
% (pos, op, oname, sz, opcode, optype),
file=bk.logfile,
)
if not optype:
if 0x01 <= opcode <= 0x02: # tExp, tTbl
# reference to a shared formula or table record
rowx, colx = unpack("<HH", data[pos + 1 : pos + 5])
if blah:
print(" ", (rowx, colx), file=bk.logfile)
elif opcode == 0x10: # tList
if blah:
print("tList pre", stack, file=bk.logfile)
assert len(stack) >= 2
bop = stack.pop()
aop = stack.pop()
spush(aop + bop)
if blah:
print("tlist post", stack, file=bk.logfile)
elif opcode == 0x11: # tRange
if blah:
print("tRange pre", stack, file=bk.logfile)
assert len(stack) >= 2
bop = stack.pop()
aop = stack.pop()
assert len(aop) == 1
assert len(bop) == 1
result = do_box_funcs(tRangeFuncs, aop[0], bop[0])
spush(result)
if blah:
print("tRange post", stack, file=bk.logfile)
elif opcode == 0x0F: # tIsect
if blah:
print("tIsect pre", stack, file=bk.logfile)
assert len(stack) >= 2
bop = stack.pop()
aop = stack.pop()
assert len(aop) == 1
assert len(bop) == 1
result = do_box_funcs(tIsectFuncs, aop[0], bop[0])
spush(result)
if blah:
print("tIsect post", stack, file=bk.logfile)
elif opcode == 0x19: # tAttr
subop, nc = unpack("<BH", data[pos + 1 : pos + 4])
subname = tAttrNames.get(subop, "??Unknown??")
if subop == 0x04: # Choose
sz = nc * 2 + 6
else:
sz = 4
if blah:
print(
" subop=%02xh subname=t%s sz=%d nc=%02xh" % (subop, subname, sz, nc),
file=bk.logfile,
)
elif opcode == 0x17: # tStr
if bv <= 70:
nc = BYTES_ORD(data[pos + 1])
strg = data[pos + 2 : pos + 2 + nc] # left in 8-bit encoding
sz = nc + 2
else:
strg, newpos = unpack_unicode_update_pos(data, pos + 1, lenlen=1)
sz = newpos - pos
if blah:
print(" sz=%d strg=%r" % (sz, strg), file=bk.logfile)
else:
if sz <= 0:
print("**** Dud size; exiting ****", file=bk.logfile)
return
pos += sz
continue
if opcode == 0x00: # tArray
pass
elif opcode == 0x01: # tFunc
nb = 1 + int(bv >= 40)
funcx = unpack("<" + " BH"[nb], data[pos + 1 : pos + 1 + nb])
if blah:
print(" FuncID=%d" % funcx, file=bk.logfile)
elif opcode == 0x02: # tFuncVar
nb = 1 + int(bv >= 40)
nargs, funcx = unpack("<B" + " BH"[nb], data[pos + 1 : pos + 2 + nb])
prompt, nargs = divmod(nargs, 128)
macro, funcx = divmod(funcx, 32768)
if blah:
print(
" FuncID=%d nargs=%d macro=%d prompt=%d" % (funcx, nargs, macro, prompt),
file=bk.logfile,
)
elif opcode == 0x03: # tName
namex = unpack("<H", data[pos + 1 : pos + 3])
# Only change with BIFF version is the number of trailing UNUSED bytes!!!
if blah:
print(" namex=%d" % namex, file=bk.logfile)
elif opcode == 0x04: # tRef
res = get_cell_addr(data, pos + 1, bv, reldelta)
if blah:
print(" ", res, file=bk.logfile)
elif opcode == 0x05: # tArea
res = get_cell_range_addr(data, pos + 1, bv, reldelta)
if blah:
print(" ", res, file=bk.logfile)
elif opcode == 0x09: # tMemFunc
nb = unpack("<H", data[pos + 1 : pos + 3])[0]
if blah:
print(" %d bytes of cell ref formula" % nb, file=bk.logfile)
elif opcode == 0x0C: # tRefN
res = get_cell_addr(data, pos + 1, bv, reldelta=1)
# note *ALL* tRefN usage has signed offset for relative addresses
any_rel = 1
if blah:
print(" ", res, file=bk.logfile)
elif opcode == 0x0D: # tAreaN
res = get_cell_range_addr(data, pos + 1, bv, reldelta=1)
# note *ALL* tAreaN usage has signed offset for relative addresses
any_rel = 1
if blah:
print(" ", res, file=bk.logfile)
elif opcode == 0x1A: # tRef3d
refx = unpack("<H", data[pos + 1 : pos + 3])[0]
res = get_cell_addr(data, pos + 3, bv, reldelta)
if blah:
print(" ", refx, res, file=bk.logfile)
rowx, colx, row_rel, col_rel = res
any_rel = any_rel or row_rel or col_rel
shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
any_err |= shx1 < -1
coords = (shx1, shx2 + 1, rowx, rowx + 1, colx, colx + 1)
if blah:
print(" ", coords, file=bk.logfile)
if optype == 1:
spush([coords])
elif opcode == 0x1B: # tArea3d
refx = unpack("<H", data[pos + 1 : pos + 3])[0]
res1, res2 = get_cell_range_addr(data, pos + 3, bv, reldelta)
if blah:
print(" ", refx, res1, res2, file=bk.logfile)
rowx1, colx1, row_rel1, col_rel1 = res1
rowx2, colx2, row_rel2, col_rel2 = res2
any_rel = any_rel or row_rel1 or col_rel1 or row_rel2 or col_rel2
shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
any_err |= shx1 < -1
coords = (shx1, shx2 + 1, rowx1, rowx2 + 1, colx1, colx2 + 1)
if blah:
print(" ", coords, file=bk.logfile)
if optype == 1:
spush([coords])
elif opcode == 0x19: # tNameX
refx, namex = unpack("<HH", data[pos + 1 : pos + 5])
if blah:
print(" refx=%d namex=%d" % (refx, namex), file=bk.logfile)
elif opcode in error_opcodes:
any_err = 1
else:
if blah:
print("FORMULA: /// Not handled yet: t" + oname, file=bk.logfile)
any_err = 1
if sz <= 0:
print("**** Dud size; exiting ****", file=bk.logfile)
return
pos += sz
if blah:
print(
"End of formula. any_rel=%d any_err=%d stack=%r" % (not not any_rel, any_err, stack),
file=bk.logfile,
)
if len(stack) >= 2:
print("*** Stack has unprocessed args", file=bk.logfile)
# === Some helper functions for displaying cell references ===
# I'm aware of only one possibility of a sheet-relative component in
# a reference: a 2D reference located in the "current sheet".
# excelrd stores this internally with bounds of (0, 1, ...) and
# relative flags of (1, 1, ...). These functions display the
# sheet component as empty, just like Excel etc.
def rownamerel(rowx, rowxrel, browx=None, r1c1=0):
# if no base rowx is provided, we have to return r1c1
if browx is None:
r1c1 = True
if not rowxrel:
if r1c1:
return "R%d" % (rowx + 1)
return "$%d" % (rowx + 1)
if r1c1:
if rowx:
return "R[%d]" % rowx
return "R"
return "%d" % ((browx + rowx) % 65536 + 1)
def colnamerel(colx, colxrel, bcolx=None, r1c1=0):
# if no base colx is provided, we have to return r1c1
if bcolx is None:
r1c1 = True
if not colxrel:
if r1c1:
return "C%d" % (colx + 1)
return "$" + colname(colx)
if r1c1:
if colx:
return "C[%d]" % colx
return "C"
return colname((bcolx + colx) % 256)
[docs]def cellname(rowx, colx):
"""Utility function: ``(5, 7)`` => ``'H6'``"""
return "%s%d" % (colname(colx), rowx + 1)
[docs]def cellnameabs(rowx, colx, r1c1=0):
"""Utility function: ``(5, 7)`` => ``'$H$6'``"""
if r1c1:
return "R%dC%d" % (rowx + 1, colx + 1)
return "$%s$%d" % (colname(colx), rowx + 1)
def cellnamerel(rowx, colx, rowxrel, colxrel, browx=None, bcolx=None, r1c1=0):
if not rowxrel and not colxrel:
return cellnameabs(rowx, colx, r1c1)
if (rowxrel and browx is None) or (colxrel and bcolx is None):
# must flip the whole cell into R1C1 mode
r1c1 = True
c = colnamerel(colx, colxrel, bcolx, r1c1)
r = rownamerel(rowx, rowxrel, browx, r1c1)
if r1c1:
return r + c
return c + r
[docs]def colname(colx):
"""Utility function: ``7`` => ``'H'``, ``27`` => ``'AB'``"""
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
if colx <= 25:
return alphabet[colx]
else:
xdiv26, xmod26 = divmod(colx, 26)
return alphabet[xdiv26 - 1] + alphabet[xmod26]
def rangename2d(rlo, rhi, clo, chi, r1c1=0):
"""``(5, 20, 7, 10)`` => ``'$H$6:$J$20'``"""
if r1c1:
return
if rhi == rlo + 1 and chi == clo + 1:
return cellnameabs(rlo, clo, r1c1)
return f"{cellnameabs(rlo, clo, r1c1)}:{cellnameabs(rhi - 1, chi - 1, r1c1)}"
def rangename2drel(rlo_rhi_clo_chi, rlorel_rhirel_clorel_chirel, browx=None, bcolx=None, r1c1=0):
rlo, rhi, clo, chi = rlo_rhi_clo_chi
rlorel, rhirel, clorel, chirel = rlorel_rhirel_clorel_chirel
if (rlorel or rhirel) and browx is None:
r1c1 = True
if (clorel or chirel) and bcolx is None:
r1c1 = True
return "{}:{}".format(
cellnamerel(rlo, clo, rlorel, clorel, browx, bcolx, r1c1),
cellnamerel(rhi - 1, chi - 1, rhirel, chirel, browx, bcolx, r1c1),
)
[docs]def rangename3d(book, ref3d):
"""
Utility function:
``Ref3D(1, 4, 5, 20, 7, 10)`` =>
``'Sheet2:Sheet3!$H$6:$J$20'``
(assuming Excel's default sheetnames)
"""
coords = ref3d.coords
return f"{sheetrange(book, *coords[:2])}!{rangename2d(*coords[2:6])}"
[docs]def rangename3drel(book, ref3d, browx=None, bcolx=None, r1c1=0):
"""
Utility function:
``Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1))``
In R1C1 mode => ``'Sheet1!R[-32]C[-13]:R[-23]C[12]'``
In A1 mode => depends on base cell ``(browx, bcolx)``
"""
coords = ref3d.coords
relflags = ref3d.relflags
shdesc = sheetrangerel(book, coords[:2], relflags[:2])
rngdesc = rangename2drel(coords[2:6], relflags[2:6], browx, bcolx, r1c1)
if not shdesc:
return rngdesc
return f"{shdesc}!{rngdesc}"
def quotedsheetname(shnames, shx):
if shx >= 0:
shname = shnames[shx]
else:
shname = {
-1: "?internal; any sheet?",
-2: "internal; deleted sheet",
-3: "internal; macro sheet",
-4: "<<external>>",
}.get(shx, "?error %d?" % shx)
if "'" in shname:
return "'" + shname.replace("'", "''") + "'"
if " " in shname:
return "'" + shname + "'"
return shname
def sheetrange(book, slo, shi):
shnames = book.sheet_names()
shdesc = quotedsheetname(shnames, slo)
if slo != shi - 1:
shdesc += ":" + quotedsheetname(shnames, shi - 1)
return shdesc
def sheetrangerel(book, srange, srangerel):
slo, shi = srange
slorel, shirel = srangerel
if not slorel and not shirel:
return sheetrange(book, slo, shi)
assert (slo == 0 == shi - 1) and slorel and shirel
return ""
# ==============================================================