Switched to bigint in lexer

2022-06-06 10:00:53 +02:00
parent 960523b443
commit 4f876a36a4
4 changed files with 46 additions and 17 deletions
--- a/big_int_c3.cpp
+++ b/big_int_c3.cpp
@@ -2,6 +2,7 @@
 // Use of this source code is governed by the GNU LGPLv3.0 license
 // a copy of which can be found in the LICENSE file.

+struct Token;
 function void parsing_error(Token *token, const char *str, ...);
 #define malloc_arena(x) exp_alloc(&pernament_arena, x)
 #define ALLOC_DIGITS(_digits) (uint64_t *)((_digits) ? malloc_arena(sizeof(uint64_t) * (_digits)) : NULL)
@@ -70,6 +71,33 @@ void bigint_incr(BigInt *x);
 size_t bigint_popcount_signed(const BigInt *bi, size_t bit_count);
 size_t bigint_popcount_unsigned(const BigInt *big_int);

+//-----------------------------------------------------------------------------
+//
+//-----------------------------------------------------------------------------
+static BigInt
+bigint_u64(U64 value){
+  BigInt result;
+  bigint_init_unsigned(&result, value);
+  return result;
+}
+
+static BigInt
+bigint_s64(S64 value){
+  BigInt result;
+  bigint_init_signed(&result, value);
+  return result;
+}
+
+function BigInt
+bigint_mul(const BigInt *a, const BigInt *b){
+  BigInt result;
+  bigint_mul(&result, a, b);
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+//
+//-----------------------------------------------------------------------------
 static inline uint32_t u32_min(uint32_t a, uint32_t b)
 {
  return a < b ? a : b;
--- a/main.cpp
+++ b/main.cpp
@@ -32,7 +32,6 @@ For now I don't thing it should be overloadable.
 -------------------------------------------------------------------------------

@todo
-[ ] - Converting from U64 token to S64 Atom introduces unnanounced error (negates) - probably need big int
 [ ] - Passing down program to compile through command line
 [ ] - More for loop variations
 [ ] - Write up on order independent declarations
@@ -65,6 +64,7 @@ For now I don't thing it should be overloadable.
 [x] - More basic types
 [x] - Implementing required operations int128
 [x] - Add basic support for floats
+[x] - Converting from U64 token to S64 Atom introduces unnanounced error (negates) - probably need big int
 [x] - Add basic setup for new type system
 [x] - Access through struct names to constants Arena.CONSTANT
 [x] - Enums
@@ -93,9 +93,9 @@ For now I don't thing it should be overloadable.

 #include "base.cpp"
 #include "base_unicode.cpp"
+#include "big_int_c3.cpp"
 #include "new_lex.cpp"
 #include "types.h"
-#include "big_int_c3.cpp"
 // #include "big_int.cpp"
 #include "new_ast.cpp"
 #include "new_parse.cpp"
@@ -120,7 +120,6 @@ int main(){

  String result = {};
 #if 0
-#endif
  result = compile_file("globals.kl"_s);
  printf("%s", result.str);
  result = compile_file("enums.kl"_s);
@@ -131,6 +130,7 @@ int main(){
  printf("%s", result.str);
  result = compile_file("lambdas.kl"_s);
  printf("%s", result.str);
+#endif
  result = compile_file("new_types.kl"_s);
  printf("%s", result.str);

--- a/new_ast.cpp
+++ b/new_ast.cpp
@@ -316,13 +316,18 @@ ast_float(Token *pos, F64 value){
 }

 function Ast_Atom *
-ast_int(Token *pos, U64 integer){
+ast_int(Token *pos, BigInt val){
  AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM);
  result->type = untyped_int;
-  bigint_init_unsigned(&result->big_int_val, integer);
+  bigint_init_bigint(&result->big_int_val, &val);
  return result;
 }

+function Ast_Atom *
+ast_int(Token *pos, U64 value){
+  return ast_int(pos, bigint_u64(value));
+}
+
 function Ast_Expr *
 ast_expr_binary(Ast_Expr *left, Ast_Expr *right, Token *op){
  AST_NEW(Binary, BINARY, op, AST_EXPR);
--- a/new_lex.cpp
+++ b/new_lex.cpp
@@ -103,7 +103,7 @@ struct Token{

  union {
    U32 unicode;
-    U64 int_val;
+    BigInt int_val;
    F64 f64_val;
    String error_val;
    Intern_String intern_val;
@@ -209,19 +209,15 @@ token_error(Token *t, String error_val){
 function void
 lex_parse_u64(Token *t){
  t->kind = TK_Integer;
-  U64 result = 0;
-  U64 m = 1;
+  BigInt m = bigint_u64(1); // @leak, it accumulates and potentially needs allocation
+  BigInt val10 = bigint_u64(10);
+
  for(S64 i = t->len - 1; i >= 0; --i){
-    U64 val = t->str[i] - '0';
-    U64 new_val = val * m;
-    if((result + new_val) < result){
-      token_error(t, "Integer overflow"_s);
-      return;
+    BigInt val = bigint_u64(t->str[i] - '0'); // I dont think this is a leak, too small
+    BigInt new_val = bigint_mul(&val, &m); // @leak
+    bigint_add(&t->int_val, &t->int_val, &new_val);
+    bigint_mul(&m, &m, &val10);
  }
-    result+=new_val;
-    m *= 10;
-  }
-  t->int_val = result;
 }

 function void