Evaluating a string as a mathematical expression in JavaScript
Neel Somani -
I recently made a graphing calculator for my calculus class. In doing so, I realized that I needed to first write a string parser to convert a user inputted equation into the actual mathematical result. This is the portion of the code that I'm going to focus on for this post.
For simplicity, we will not cover adding functions such as sine, cosine, or tangent. Instead, this tutorial will cover how to handle the basic order of operations in JavaScript -- that is, PEMDAS.
You can try out the string parser right here:
I figured that I would work backwards in programming this parser. For those who don't know, PEMDAS = parentheses, exponents, division/multiplication, and addition/subtraction. Addition/subtraction and multiplication/division are handled in similar ways, and we can extend this procedure to solve for exponents. Lastly, we will add recursion to handle parentheses.
1. String standardization
Before I started parsing any equation, I wanted to have it in a form that was more or less accounted for. For example, "-" should be changed to "+-", to make all subtraction into addition. The format "2(" should become "2*(".
First I used a clever little function to replace all of the instances of a string with a given substring, using split()
and join()
.
function replaceAll(haystack, needle, replace) { return haystack.split(needle).join(replace); } // replace all fx
My formatting function needed to handle a few things:
- make the string lowercase
- change "-(" to "-1*(", change "n(" to "n*(" where n is an integer, change ")(" to ")*("
- make all substraction into addition of negatives, and simplify signs like "--" to "+"
- remove "+" if it is the first character in the equation (as this is meaningless)
Here is a function I wrote to standardize strings:
function reformat(s) { s = s.toLowerCase(); s = replaceAll(s, " ", ""); s = replaceAll(s, "-(", "-1*("); s = replaceAll(s, ")(", ")*("); s = replaceAll(s, "-", "+-"); s = replaceAll(s, "--", "+"); s = replaceAll(s, "++", "+"); s = replaceAll(s, "(+", "("); for (var i = 0; i < 10; i++) { s = replaceAll(s, i + "(", i + "*" + "("); } while(s.charAt(0) == "+") s = s.substr(1); return s; } // standardize string format
Before we handle any string, we should pass it to reformat()
.
2. Handling addition
Now that our strings are in an expected format, we can try handling equations such as "2+4+5".
The way that I handled this is by first checking if there were any plus signs, and if so, get the terms on the left and right and add them together. We would get a term by iterating through the string in a particular direction, stringing together each character until we hit a symbol (or something else that isn't a number).
First I needed a function to detect if a string contained a substring:
function strContain(haystack, needle) { return haystack.indexOf(needle) > -1; } // custom true/false contains
Then I needed a function to tell me if a particular character should be consider a part of a term:
function isParseable(n) { return (!isNaN(n) || n == "."); } // determine if char should be added to side
Then I wrote a function that was specific for solving addition:
function solveStr(eq) { while (strContain(eq, "+")) { var middleIndex = eq.indexOf("+"); var rightSearch = middleIndex + 1; var right = ""; while (rightSearch <= eq.length) { if (isParseable(eq[rightSearch])) { right = right + eq[rightSearch]; rightSearch++; } else { break; } } var leftSearch = middleIndex - 1; var left = ""; while (leftSearch >= 0) { if (isParseable(eq[leftSearch])) { left = eq[leftSearch] + left; leftSearch--; } else { break; } } eq = replaceAll(eq, left + "+" + right, parseFloat(left) + parseFloat(right)); } return eq; }
Great! This function can add. Clearly there is a lot of redundancy however, and this code could be more general to handle multiplication/division as well.
3. Generalizing the addition function
I rewrote the function to make it more general, separating the key parts into their own functions. I generalized the code for getting a term on a particular side of a symbol:
function getSide(haystack, middle, direction) { var i = middle + direction; var term = ""; var limit = (direction == -1) ? 0 : haystack.length; // set the stopping point, when you have gone too far while (i * direction <= limit) { // while the current position is >= 0, or <= upper limit if (isParseable(haystack[i])) { if (direction == 1) term = term + haystack[i]; else term = haystack[i] + term; i += direction; } else { return term; } } return term; } // general fx to get two terms of any fx (multiply, add, etc)
The function takes "-1" as the direction to get the term to the left of a symbol, and "1" to get the term to the right.
This makes our solveStr()
into a much shorter piece of code:
function solveStr(eq) { while (strContain(eq, "+")) { var middleIndex = eq.indexOf("+"); var left = getSide(eq, middleIndex, -1); var right = getSide(eq, middleIndex, 1); eq = replaceAll(eq, left+"+"+right, parseFloat(left) + parseFloat(right)); } return eq; }
Still, this function is not quite general enough. We could separate much of this into its own function as well.
function allocFx(eq, symbol, alloc) { if (strContain(eq, symbol)) { var middleIndex = eq.indexOf(symbol); var left = getSide(eq, middleIndex, -1); var right = getSide(eq, middleIndex, 1); eq = replaceAll(eq, left+symbol+right, alloc(left, right)); } return eq; } // fx to generically map a symbol to a function for parsing function solveStr(eq) { while (strContain(eq, "+")) eq = allocFx(eq, "+", function(l,r) { return parseFloat(r) + parseFloat(l); }); return eq; }
Now I can easily add any symbol (for example, "*"), and parse it properly.
function solveStr(eq) { while (strContain(eq, "*")) eq = allocFx(eq, "*", function(l,r) { return parseFloat(r) * parseFloat(l); }); while (strContain(eq, "+")) eq = allocFx(eq, "+", function(l,r) { return parseFloat(r) + parseFloat(l); }); return eq; }
4. Handling multiplication/division
I realized that multiplication and division must be handled from left to right, since (4*3)/(2*3) != 4*(3/2)*3.
Here is a loop to handle the symbols from left to right.
function solveStr(eq) { while (strContain(eq, "*") || strContain(eq, "/")) { var multiply = true; if (eq.indexOf("*") < eq.indexOf("/")) { multiply = (strContain(eq, "*")); } else { multiply = !(strContain(eq, "/")); } eq = (multiply) ? allocFx(eq, "*", function(l, r) { return parseFloat(l)*parseFloat(r); }) : allocFx(eq, "/", function(l, r) { return parseFloat(l)/parseFloat(r); }); } while (strContain(eq, "+")) eq = allocFx(eq, "+", function(l, r) { return parseFloat(l)+parseFloat(r); }); return eq; }
5. Exponents
Initially I thought that exponents would be as easy as implementing the same allocFx()
function that we had been using. Then I realized that -3^2 is equal to -9, not 9. getSide()
includes minus signs as parseable, since they are parseable for addition, subtraction, multiplication, and division. This is not the case for exponents, so I rewrote the functions to pass an optional boolean parameter (minus
) to indicate whether the left-most minus sign is to be appended to the term.
First we edit isParseable
to take a boolean minus
, which should be false if we want to take the minus sign.
function isParseable(n, minus) { return (!isNaN(n) || (n == "-" && !minus) || n == "."); } // determine if char should be added to side
Then I edit getSide
to take the minus
parameter. The parameter is only relevant for the left term -- the right term never needs to neglect the minus sign.
function getSide(haystack, middle, direction, minus) { var i = middle + direction; var term = ""; var limit = (direction == -1) ? 0 : haystack.length; // set the stopping point, when you have gone too far while (i * direction <= limit) { // while the current position is >= 0, or <= upper limit if (isParseable(haystack[i], minus)) { if (direction == 1) term = term + haystack[i]; else term = haystack[i] + term; i += direction; } else { return term; } } return term; } // general fx to get two terms of any fx (multiply, add, etc)
And finally, we edit the allocFx()
code to make the parameter optional. If the parameter is not sent, we will make the variable false (to pick up minus signs by default).
function allocFx(eq, symbol, alloc, minus) { minus = (typeof minus !== 'undefined'); // sometimes we want to capture minus signs, sometimes not if (strContain(eq, symbol)) { var middleIndex = eq.indexOf(symbol); var left = getSide(eq, middleIndex, -1, minus); var right = getSide(eq, middleIndex, 1, false); eq = replaceAll(eq, left+symbol+right, alloc(left, right)); } return eq; } // fx to generically map a symbol to a function for parsing
And now we can simply edit the solveStr()
function to include exponents.
function solveStr(eq) { while (strContain(eq, "^")) eq = allocFx(eq, "^", function(l, r) { return Math.pow(parseFloat(l),parseFloat(r)); }, false); while (strContain(eq, "*") || strContain(eq, "/")) { var multiply = true; if (eq.indexOf("*") < eq.indexOf("/")) { multiply = (strContain(eq, "*")); } else { multiply = !(strContain(eq, "/")); } eq = (multiply) ? allocFx(eq, "*", function(l, r) { return parseFloat(l)*parseFloat(r); }) : allocFx(eq, "/", function(l, r) { return parseFloat(l)/parseFloat(r); }); } while (strContain(eq, "+")) eq = allocFx(eq, "+", function(l, r) { return parseFloat(l)+parseFloat(r); }); return eq; }
6. Recursion for parentheses
The final step is to account for parentheses. Parentheses introduce the obvious need for recursion. Aside from this, we now want to distinguish between -3^2 (which is -9) and (-3)^2 (which is 9).
First I wrote some code to handle the recursion. The code should find the outermost set of parentheses, pass this expression back to itself, and get the result. Then it can replace the parenthetical with the actual value.
Next I replaced our traditional "^" for exponentiation with "&" if it happens to be next to a parenthesis. We will handle the ampersand differently than the "^" -- the ampersand should treat minus signs as parseable. For example, (-3)^2 should become -3&2, which will be parsed as 9. On the other hand, -3^2 will simply become -9. I heavily commented the code I wrote:
function solveStr(eq) { firstNest: while (strContain(eq, "(")) { // while the string has any parentheses var first = eq.indexOf("("); // first get the earliest open parentheses var last = first + 1; // start searching at the character after var layer = 1; // we might run into more parentheses, so this integer will keep track while (layer != 0) { // loop this until we've found the close parenthesis if (eq[last] == ")") { // if we run into a close parenthesis, then subtract one from "layer" layer--; if (layer == 0) break; // if it is the corresponding closing parenthesis for our outermost open parenthesis, then we can deal with this expression } else if (eq[last] == "(") { // if we see an open parenthesis, add one to "layer" layer++; } last++; // increment the character we're looking at if (last > eq.length) break firstNest; // if the parentheses are incorrectly nested, don't bother with this string } var nested = eq.substr(first + 1, last - first - 1); // get the expression between the parentheses if (last + 1 <= eq.length) { // if there is exponentiation, change to a different symbol if (eq[last + 1] == "^") { eq = eq.substr(0, last + 1) + "&" + eq.substr((last+1)+1); } } var solvedStr = solveStr(nested); var preStr = "(" + nested + ")"; eq = eq.replace(preStr, solvedStr); // replace parenthetical with value } while (strContain(eq, "^")) eq = allocFx(eq, "^", function(l, r) { return Math.pow(parseFloat(l),parseFloat(r)); }, false); while (strContain(eq, "&")) eq = allocFx(eq, "&", function(l, r) { return Math.pow(parseFloat(l),parseFloat(r)); }); // account for things like (-3)^2 while (strContain(eq, "*") || strContain(eq, "/")) { var multiply = true; if (eq.indexOf("*") < eq.indexOf("/")) { multiply = (strContain(eq, "*")); } else { multiply = !(strContain(eq, "/")); } eq = (multiply) ? allocFx(eq, "*", function(l, r) { return parseFloat(l)*parseFloat(r); }) : allocFx(eq, "/", function(l, r) { return parseFloat(l)/parseFloat(r); }); } while (strContain(eq, "+")) eq = allocFx(eq, "+", function(l, r) { return parseFloat(l)+parseFloat(r); }); return eq; } // main recursive fx + PEMDAS
Conclusion
Our string parser is complete. This is obviously not a comprehensive solution, and nor is it perfect. But parsing an equation is a good exercise in string manipulation and recursion, and it is certainly worth trying out.
To use this parser, we would first format the equation with reformat
, and then pass it to solveStr
. Feel free to check out the full code if anything is unclear!