Satellite Basic v.001 r.03

This is Satellite Basic version 001 revision 03, and it was just created about 3 days ago, from the post date of this post. I haven’t played around much with the file, in fact I don’t think you can even get it to run at this point, not without some effort. So I am still trying to think of a solution to this problem. When the AI produces the code it doesn’t check to see if the code even works, a major step in software development. Like a major part of software development is spent making sure it works, in fact that is how most of the time is spent, and it’s skipping this altogether, right now. You can get it to look at the file again and look for errors but even then it doesn’t really test the code all that well, it just writes it. But still worth alot, let’s look at Satellite Basic C++ 001.03:
// common.hpp
#ifndef COMMON_HPP
#define COMMON_HPP
#include <string>
#include <vector>
#include <cstdint>
#include <variant>
// Opcodes for the Satellite Basic Virtual Machine
enum OpCode : uint8_t {
OP_HALT = 0x00, // Stop execution
OP_PUSH_INT = 0x01, // Push a 32-bit integer onto the stack. Operand: 4-byte int.
OP_POP = 0x02, // Pop value from stack (discard)
OP_STORE_LOCAL = 0x03, // Pop value from stack, store in local variable. Operand: 1-byte local_idx.
OP_LOAD_LOCAL = 0x04, // Load local variable's value onto stack. Operand: 1-byte local_idx.
OP_ADD_INT = 0x05, // Pop two integers, add them, push result.
OP_PRINT_INT = 0x06, // Pop integer from stack and print it.
OP_INPUT_INT_AND_STORE_LOCAL = 0x07, // Read integer, store in local variable. Operand: 1-byte local_idx.
OP_PRINT_STRING_LITERAL = 0x08, // Print a string literal. Operands: 2-byte length, then string bytes.
OP_CALL = 0x09, // Call a function. Operands: 2-byte function_address, 1-byte arg_count.
OP_RETURN_VALUE = 0x0A, // Return from function with a value on stack.
OP_JUMP = 0x0B, // Unconditional jump. Operand: 2-byte address.
// Opcodes for Booleans and Comparisons (Step 1)
OP_PUSH_TRUE = 0x0C, // Push boolean true (1) onto stack
OP_PUSH_FALSE = 0x0D, // Push boolean false (0) onto stack
OP_EQUAL = 0x0E, // Pop two values, push 1 if equal, 0 otherwise
OP_NOT_EQUAL = 0x0F, // Pop two values, push 1 if not equal, 0 otherwise
OP_GREATER = 0x10, // Pop two values (a, b -> b > a), push 1 if true, 0 otherwise
OP_GREATER_EQUAL = 0x11, // Pop two values (a, b -> b >= a), push 1 if true, 0 otherwise
OP_LESS = 0x12, // Pop two values (a, b -> b < a), push 1 if true, 0 otherwise
OP_LESS_EQUAL = 0x13, // Pop two values (a, b -> b <= a), push 1 if true, 0 otherwise
OP_PRINT_BOOL = 0x14, // Pop boolean from stack and print "true" or "false"
OP_JUMP_IF_FALSE = 0x15, // Pop value, if 0 (false), jump. Operand: 2-byte address. (For if/while/logic)
// Logical Operators (Step 3)
OP_LOGICAL_NOT = 0x16, // Pop value, push 1 if 0, push 0 if non-0.
// Native Math Functions (Step 5)
OP_NATIVE_ABS = 0x17, // Pop int, push abs(int)
OP_NATIVE_POW = 0x18, // Pop exponent, pop base, push pow(base, exponent)
OP_NATIVE_SQRT = 0x19, // Pop int, push sqrt(int) (integer part)
};
// Token types for the Lexer
enum class TokenType {
// Keywords
INT, // For type 'int'
BOOL, // For type 'bool' (Step 1)
TRUE_KEYWORD, // For 'true' literal (Step 1)
FALSE_KEYWORD, // For 'false' literal (Step 1)
PRINT, // For 'print' function-like statement
INPUT_INT, // For 'input_int' function-like statement
RETURN, // For 'return' statement
IF, // For 'if' statement (Step 2)
ELSE, // For 'else' statement (Step 2)
WHILE, // For 'while' statement (Step 4)
// Operators and Punctuation
EQUALS, // = (assignment)
PLUS, // +
LPAREN, // (
RPAREN, // )
LBRACE, // {
RBRACE, // }
SEMICOLON, // ;
COMMA, // ,
// Comparison Operators (Step 1)
EQUAL_EQUAL, // ==
BANG_EQUAL, // !=
LESS, // <
LESS_EQUAL, // <=
GREATER, // >
GREATER_EQUAL, // >=
// Logical Operators (Step 3)
BANG, // !
AMPERSAND_AMPERSAND,// &&
PIPE_PIPE, // ||
// Literals
IDENTIFIER,
INTEGER_LITERAL,
STRING_LITERAL,
// Control
END_OF_FILE,
// Errors/Unknown
UNKNOWN
};
// Represents the type of a variable or expression in the compiler
enum class ValueType {
TYPE_INT,
TYPE_BOOL,
TYPE_STRING,
TYPE_VOID,
TYPE_UNINITIALIZED,
TYPE_ERROR
};
struct Token {
TokenType type;
std::string lexeme;
std::variant<std::monostate, int32_t, std::string, bool> literal;
int line;
ValueType value_type_hint = ValueType::TYPE_UNINITIALIZED;
Token(TokenType t, std::string l, int ln, std::variant<std::monostate, int32_t, std::string, bool> lit = std::monostate{})
: type(t), lexeme(std::move(l)), line(ln), literal(std::move(lit)) {}
};
// Helper to convert int32_t to 4 bytes (big-endian)
inline void int32_to_bytes_big_endian(int32_t value, std::vector<uint8_t>& bytes) {
bytes.push_back(static_cast<uint8_t>((value >> 24) & 0xFF));
bytes.push_back(static_cast<uint8_t>((value >> 16) & 0xFF));
bytes.push_back(static_cast<uint8_t>((value >> 8) & 0xFF));
bytes.push_back(static_cast<uint8_t>(value & 0xFF));
}
// Helper to convert 4 bytes (big-endian) to int32_t
inline int32_t bytes_to_int32_big_endian(const uint8_t* bytes) {
return (static_cast<int32_t>(bytes[0]) << 24) |
(static_cast<int32_t>(bytes[1]) << 16) |
(static_cast<int32_t>(bytes[2]) << 8) |
(static_cast<int32_t>(bytes[3]));
}
// Helper to convert uint16_t to 2 bytes (big-endian)
inline void uint16_to_bytes_big_endian(uint16_t value, std::vector<uint8_t>& bytes) {
bytes.push_back(static_cast<uint8_t>((value >> 8) & 0xFF));
bytes.push_back(static_cast<uint8_t>(value & 0xFF));
}
// Helper to convert 2 bytes (big-endian) to uint16_t
inline uint16_t bytes_to_uint16_big_endian(const uint8_t* bytes) {
return (static_cast<uint16_t>(bytes[0]) << 8) |
(static_cast<uint16_t>(bytes[1]));
}
#endif // COMMON_HPP
// lexer.hpp
#ifndef LEXER_HPP
#define LEXER_HPP
#include "common.hpp"
#include <string>
#include <vector>
#include <map>
class Lexer {
public:
Lexer(const std::string& source);
std::vector<Token> scan_tokens();
private:
std::string source_code;
size_t start = 0;
size_t current = 0;
int line = 1;
std::vector<Token> tokens;
static const std::map<std::string, TokenType> keywords;
bool is_at_end();
char advance();
void add_token(TokenType type);
void add_token(TokenType type, const std::variant<std::monostate, int32_t, std::string, bool>& literal_value);
bool match(char expected);
char peek();
char peek_next();
void consume_string();
void consume_number();
void consume_identifier();
void skip_whitespace_and_comments();
};
#endif // LEXER_HPP
// lexer.cpp
#include "lexer.hpp"
#include <iostream>
#include <algorithm>
const std::map<std::string, TokenType> Lexer::keywords = {
{"int", TokenType::INT},
{"bool", TokenType::BOOL_KEYWORD},
{"true", TokenType::TRUE_KEYWORD},
{"false", TokenType::FALSE_KEYWORD},
{"print", TokenType::PRINT},
{"input_int", TokenType::INPUT_INT},
{"return", TokenType::RETURN},
{"if", TokenType::IF},
{"else", TokenType::ELSE},
{"while", TokenType::WHILE}
// Native function names (abs, pow, sqrt) are treated as IDENTIFIERs by the lexer
// The compiler will distinguish them.
};
Lexer::Lexer(const std::string& source) : source_code(source) {}
bool Lexer::is_at_end() {
return current >= source_code.length();
}
char Lexer::advance() {
current++;
return source_code[current - 1];
}
void Lexer::add_token(TokenType type) {
add_token(type, std::monostate{});
}
void Lexer::add_token(TokenType type, const std::variant<std::monostate, int32_t, std::string, bool>& literal_value) {
std::string text = source_code.substr(start, current - start);
tokens.emplace_back(type, text, line, literal_value);
}
bool Lexer::match(char expected) {
if (is_at_end()) return false;
if (source_code[current] != expected) return false;
current++;
return true;
}
char Lexer::peek() {
if (is_at_end()) return '\0';
return source_code[current];
}
char Lexer::peek_next() {
if (current + 1 >= source_code.length()) return '\0';
return source_code[current + 1];
}
void Lexer::consume_string() {
while (peek() != '"' && !is_at_end()) {
if (peek() == '\n') line++;
advance();
}
if (is_at_end()) {
std::cerr << "Line " << line << ": Unterminated string." << std::endl;
return;
}
advance();
std::string value = source_code.substr(start + 1, current - start - 2);
add_token(TokenType::STRING_LITERAL, value);
}
void Lexer::consume_number() {
while (isdigit(peek())) advance();
std::string num_str = source_code.substr(start, current - start);
try {
add_token(TokenType::INTEGER_LITERAL, std::stoi(num_str));
} catch (const std::out_of_range& oor) {
std::cerr << "Line " << line << ": Integer literal out of range: " << num_str << std::endl;
add_token(TokenType::UNKNOWN, num_str);
}
}
void Lexer::consume_identifier() {
while (isalnum(peek()) || peek() == '_') advance();
std::string text = source_code.substr(start, current - start);
TokenType type;
auto it = keywords.find(text);
if (it != keywords.end()) {
type = it->second;
if (type == TokenType::TRUE_KEYWORD) {
add_token(type, true);
} else if (type == TokenType::FALSE_KEYWORD) {
add_token(type, false);
} else {
add_token(type);
}
} else {
type = TokenType::IDENTIFIER; // Native functions like 'abs' will be IDENTIFIERs here
add_token(type);
}
}
void Lexer::skip_whitespace_and_comments() {
while (true) {
char c = peek();
switch (c) {
case ' ':
case '\r':
case '\t':
advance();
break;
case '\n':
line++;
advance();
break;
case '/':
if (peek_next() == '/') {
while (peek() != '\n' && !is_at_end()) {
advance();
}
} else {
return;
}
break;
default:
return;
}
}
}
std::vector<Token> Lexer::scan_tokens() {
tokens.clear();
line = 1;
current = 0;
start = 0;
while (!is_at_end()) {
start = current;
skip_whitespace_and_comments();
if (is_at_end()) break;
start = current;
char c = advance();
if (isalpha(c) || c == '_') {
consume_identifier();
} else if (isdigit(c)) {
consume_number();
} else {
switch (c) {
case '(': add_token(TokenType::LPAREN); break;
case ')': add_token(TokenType::RPAREN); break;
case '{': add_token(TokenType::LBRACE); break;
case '}': add_token(TokenType::RBRACE); break;
case ';': add_token(TokenType::SEMICOLON); break;
case ',': add_token(TokenType::COMMA); break;
case '=':
add_token(match('=') ? TokenType::EQUAL_EQUAL : TokenType::EQUALS);
break;
case '!':
add_token(match('=') ? TokenType::BANG_EQUAL : TokenType::BANG);
break;
case '<':
add_token(match('=') ? TokenType::LESS_EQUAL : TokenType::LESS);
break;
case '>':
add_token(match('=') ? TokenType::GREATER_EQUAL : TokenType::GREATER);
break;
case '+': add_token(TokenType::PLUS); break;
case '&':
if (match('&')) { add_token(TokenType::AMPERSAND_AMPERSAND); }
else { add_token(TokenType::UNKNOWN, std::string(1,c)); }
break;
case '|':
if (match('|')) { add_token(TokenType::PIPE_PIPE); }
else { add_token(TokenType::UNKNOWN, std::string(1,c)); }
break;
case '"': consume_string(); break;
default:
std::cerr << "Line " << line << ": Unexpected character '" << c << "'" << std::endl;
add_token(TokenType::UNKNOWN, std::string(1, c));
break;
}
}
}
tokens.emplace_back(TokenType::END_OF_FILE, "", line);
return tokens;
}
// compiler.hpp
#ifndef COMPILER_HPP
#define COMPILER_HPP
#include "common.hpp"
#include "lexer.hpp"
#include <vector>
#include <string>
#include <map>
#include <stdexcept>
struct FunctionInfo {
std::string name;
uint16_t address;
uint8_t arity;
uint8_t locals_count;
ValueType return_type = ValueType::TYPE_INT;
};
struct Local {
Token name_token;
ValueType type;
uint8_t index_in_frame;
};
// Step 5: Structure for Native Function Info
struct NativeFunctionSignature {
std::string name;
uint8_t arity;
std::vector<ValueType> param_types; // For type checking args
ValueType return_type;
OpCode opcode_to_emit; // The dedicated opcode for this native function
};
class Compiler {
public:
Compiler(const std::vector<Token>& tokens);
std::vector<uint8_t> compile();
private:
const std::vector<Token>& tokens_list;
size_t current_token_idx = 0;
std::vector<uint8_t> bytecode;
std::vector<Local> current_function_locals;
uint8_t current_locals_count;
bool is_in_function_scope = false;
ValueType current_function_return_type = ValueType::TYPE_INT;
std::vector<FunctionInfo> functions_table;
std::map<std::string, size_t> function_name_to_table_idx;
// Step 5: Table for native functions
static const std::map<std::string, NativeFunctionSignature> native_functions;
Token advance();
Token peek() const;
Token previous() const;
bool is_at_end() const;
bool check(TokenType type) const;
bool match(TokenType type);
Token consume(TokenType type, const std::string& error_message);
void synchronize();
uint8_t declare_local_variable(const Token& name_token, ValueType type);
const Local* resolve_local(const Token& name_token);
void program();
void top_level_item();
void function_declaration();
ValueType parse_type();
void var_declaration_statement();
void statement();
void print_statement();
void assignment_statement();
void expression_statement();
void block();
void return_statement();
void if_statement();
void while_statement();
ValueType expression();
ValueType logical_or();
ValueType logical_and();
ValueType comparison();
ValueType term();
ValueType unary();
ValueType primary();
ValueType call_expression(const Token& callee_name_token);
void emit_byte(uint8_t byte);
void emit_op(OpCode op);
void emit_op_byte(OpCode op, uint8_t byte_operand);
void emit_op_word(OpCode op, uint16_t word_operand);
void emit_int32_operand(int32_t value);
void emit_uint16_operand(uint16_t value);
size_t emit_jump(OpCode jump_instruction);
void patch_jump(size_t jump_offset);
class ParseError : public std::runtime_error {
public:
ParseError(const std::string& message) : std::runtime_error(message) {}
};
ParseError error(const Token& token, const std::string& message);
};
#endif // COMPILER_HPP
// compiler.cpp
#include "compiler.hpp"
#include <iostream>
#include <algorithm>
// Step 5: Define native function signatures
const std::map<std::string, NativeFunctionSignature> Compiler::native_functions = {
{"abs", {"abs", 1, {ValueType::TYPE_INT}, ValueType::TYPE_INT, OP_NATIVE_ABS}},
{"pow", {"pow", 2, {ValueType::TYPE_INT, ValueType::TYPE_INT}, ValueType::TYPE_INT, OP_NATIVE_POW}},
{"sqrt", {"sqrt",1, {ValueType::TYPE_INT}, ValueType::TYPE_INT, OP_NATIVE_SQRT}}
};
Compiler::Compiler(const std::vector<Token>& tokens) : tokens_list(tokens) {}
// --- Helper Methods (mostly unchanged) ---
Token Compiler::advance() {
if (!is_at_end()) current_token_idx++;
return previous();
}
Token Compiler::peek() const {
if (tokens_list.empty() || current_token_idx >= tokens_list.size()) {
if (!tokens_list.empty()) return tokens_list.back();
static Token eof_token(TokenType::END_OF_FILE, "", 0);
return eof_token;
}
return tokens_list[current_token_idx];
}
Token Compiler::previous() const {
if (current_token_idx == 0) {
if (!tokens_list.empty()) return tokens_list[0];
static Token dummy_token(TokenType::UNKNOWN, "", 0);
return dummy_token;
}
return tokens_list[current_token_idx - 1];
}
bool Compiler::is_at_end() const {
if (tokens_list.empty()) return true;
return peek().type == TokenType::END_OF_FILE;
}
bool Compiler::check(TokenType type) const {
if (is_at_end()) return false;
return peek().type == type;
}
bool Compiler::match(TokenType type) {
if (!check(type)) return false;
advance();
return true;
}
Token Compiler::consume(TokenType type, const std::string& error_message) {
if (check(type)) return advance();
throw error(peek(), error_message);
}
Compiler::ParseError Compiler::error(const Token& token, const std::string& message) {
std::cerr << "Line " << token.line << " [Error]";
if (token.type == TokenType::END_OF_FILE) {
std::cerr << " at end";
} else {
std::cerr << " at '" << token.lexeme << "'";
}
std::cerr << ": " << message << std::endl;
return ParseError(message);
}
void Compiler::synchronize() {
advance();
while (!is_at_end()) {
if (previous().type == TokenType::SEMICOLON) return;
switch (peek().type) {
case TokenType::INT: case TokenType::BOOL_KEYWORD: case TokenType::PRINT:
case TokenType::RETURN: case TokenType::IF: case TokenType::WHILE:
case TokenType::LBRACE: case TokenType::RBRACE:
return;
default: break;
}
advance();
}
}
// --- Variable/Local Handling (unchanged) ---
uint8_t Compiler::declare_local_variable(const Token& name_token, ValueType type) {
for (const auto& local : current_function_locals) {
if (local.name_token.lexeme == name_token.lexeme) {
throw error(name_token, "Variable '" + name_token.lexeme + "' already declared in this scope.");
}
}
if (current_locals_count >= 255) {
throw error(name_token, "Too many local variables/parameters in function (max 255).");
}
current_function_locals.push_back({name_token, type, current_locals_count});
return current_locals_count++;
}
const Local* Compiler::resolve_local(const Token& name_token) {
for (int i = current_function_locals.size() - 1; i >= 0; i--) {
if (current_function_locals[i].name_token.lexeme == name_token.lexeme) {
return ¤t_function_locals[i];
}
}
throw error(name_token, "Undeclared variable '" + name_token.lexeme + "'.");
return nullptr;
}
// --- Bytecode Emission (unchanged) ---
void Compiler::emit_byte(uint8_t byte) { bytecode.push_back(byte); }
void Compiler::emit_op(OpCode op) { emit_byte(static_cast<uint8_t>(op)); }
void Compiler::emit_op_byte(OpCode op, uint8_t byte_operand) { emit_op(op); emit_byte(byte_operand); }
void Compiler::emit_op_word(OpCode op, uint16_t word_operand) { emit_op(op); emit_uint16_operand(word_operand); }
void Compiler::emit_int32_operand(int32_t value) { int32_to_bytes_big_endian(value, bytecode); }
void Compiler::emit_uint16_operand(uint16_t value) { uint16_to_bytes_big_endian(value, bytecode); }
size_t Compiler::emit_jump(OpCode jump_instruction) {
emit_op(jump_instruction);
emit_byte(0xFF); emit_byte(0xFF);
return bytecode.size() - 2;
}
void Compiler::patch_jump(size_t jump_offset) {
uint16_t target_address = bytecode.size();
if (target_address > UINT16_MAX) { error(peek(), "Jump target address too large."); }
bytecode[jump_offset] = static_cast<uint8_t>((target_address >> 8) & 0xFF);
bytecode[jump_offset + 1] = static_cast<uint8_t>(target_address & 0xFF);
}
// --- Statement Parsers (mostly unchanged, if_statement and while_statement are placeholders) ---
void Compiler::program() {
size_t main_jump = emit_jump(OP_JUMP);
while(!is_at_end() && (peek().type == TokenType::INT || peek().type == TokenType::BOOL_KEYWORD) &&
(current_token_idx + 2 < tokens_list.size() && tokens_list[current_token_idx + 2].type == TokenType::LPAREN) ) {
try { function_declaration(); } catch (const ParseError&) { synchronize(); }
}
patch_jump(main_jump);
is_in_function_scope = false;
current_function_locals.clear(); current_locals_count = 0;
current_function_return_type = ValueType::TYPE_VOID;
while (!is_at_end()) {
try { top_level_item(); } catch (const ParseError&) { synchronize(); }
}
}
void Compiler::top_level_item() {
if (peek().type == TokenType::INT || peek().type == TokenType::BOOL_KEYWORD) {
if (current_token_idx + 2 < tokens_list.size() && tokens_list[current_token_idx + 2].type != TokenType::LPAREN) {
var_declaration_statement();
} else if (current_token_idx + 2 >= tokens_list.size() || (tokens_list[current_token_idx + 1].type == TokenType::IDENTIFIER && tokens_list[current_token_idx + 2].type == TokenType::SEMICOLON) ) {
var_declaration_statement();
}
} else { statement(); }
}
ValueType Compiler::parse_type() {
if (match(TokenType::INT)) return ValueType::TYPE_INT;
if (match(TokenType::BOOL_KEYWORD)) return ValueType::TYPE_BOOL;
throw error(peek(), "Expected a type name ('int' or 'bool').");
}
void Compiler::function_declaration() {
ValueType return_type = parse_type();
Token func_name_token = consume(TokenType::IDENTIFIER, "Expect function name.");
if (function_name_to_table_idx.count(func_name_token.lexeme) || native_functions.count(func_name_token.lexeme)) { // Check native too
error(func_name_token, "Function or native function '" + func_name_token.lexeme + "' already defined."); return;
}
FunctionInfo func_info; func_info.name = func_name_token.lexeme; func_info.address = static_cast<uint16_t>(bytecode.size());
func_info.arity = 0; func_info.locals_count = 0; func_info.return_type = return_type;
is_in_function_scope = true; ValueType previous_func_return_type = current_function_return_type; current_function_return_type = return_type;
std::vector<Local> previous_locals = current_function_locals; uint8_t previous_locals_count = current_locals_count;
current_function_locals.clear(); current_locals_count = 0;
consume(TokenType::LPAREN, "Expect '(' after function name.");
if (!check(TokenType::RPAREN)) {
do {
ValueType param_type = parse_type(); Token param_name = consume(TokenType::IDENTIFIER, "Expect parameter name.");
declare_local_variable(param_name, param_type); func_info.arity++;
} while (match(TokenType::COMMA));
}
consume(TokenType::RPAREN, "Expect ')' after parameters.");
consume(TokenType::LBRACE, "Expect '{' before function body.");
while (!check(TokenType::RBRACE) && !is_at_end()) {
if (peek().type == TokenType::INT || peek().type == TokenType::BOOL_KEYWORD) { var_declaration_statement(); } else { statement(); }
}
consume(TokenType::RBRACE, "Expect '}' after function body.");
func_info.locals_count = current_locals_count - func_info.arity;
if (bytecode.empty() || bytecode.back() != static_cast<uint8_t>(OP_RETURN_VALUE)) { // Ensure function returns
if (return_type == ValueType::TYPE_INT) { emit_op(OP_PUSH_INT); emit_int32_operand(0); }
else if (return_type == ValueType::TYPE_BOOL) { emit_op(OP_PUSH_FALSE); }
if (return_type != ValueType::TYPE_VOID) { emit_op(OP_RETURN_VALUE); }
}
functions_table.push_back(func_info); function_name_to_table_idx[func_info.name] = functions_table.size() - 1;
current_function_locals = previous_locals; current_locals_count = previous_locals_count;
current_function_return_type = previous_func_return_type; is_in_function_scope = false;
}
void Compiler::var_declaration_statement() {
ValueType type = parse_type(); Token name = consume(TokenType::IDENTIFIER, "Expect variable name.");
declare_local_variable(name, type); consume(TokenType::SEMICOLON, "Expect ';' after variable declaration.");
}
void Compiler::statement() {
if (match(TokenType::PRINT)) { print_statement(); }
else if (match(TokenType::LBRACE)) { block(); }
else if (match(TokenType::RETURN)) { return_statement(); }
else if (match(TokenType::IF)) { if_statement(); }
else if (match(TokenType::WHILE)) { while_statement(); }
else if (peek().type == TokenType::IDENTIFIER && current_token_idx + 1 < tokens_list.size() && tokens_list[current_token_idx + 1].type == TokenType::EQUALS) { assignment_statement(); }
else if (peek().type == TokenType::IDENTIFIER && current_token_idx + 1 < tokens_list.size() && tokens_list[current_token_idx + 1].type == TokenType::LPAREN) { expression_statement(); }
else if (peek().type == TokenType::SEMICOLON) { advance(); }
else { if (!is_at_end()) { throw error(peek(), "Expected a statement."); } }
}
void Compiler::block() {
while (!check(TokenType::RBRACE) && !is_at_end()) {
if (peek().type == TokenType::INT || peek().type == TokenType::BOOL_KEYWORD) { var_declaration_statement(); } else { statement(); }
}
consume(TokenType::RBRACE, "Expect '}' after block.");
}
void Compiler::return_statement() {
if (!is_in_function_scope) { error(previous(), "Cannot return from top-level code."); }
if (match(TokenType::SEMICOLON)) {
if (current_function_return_type == ValueType::TYPE_INT) { emit_op(OP_PUSH_INT); emit_int32_operand(0); }
else if (current_function_return_type == ValueType::TYPE_BOOL) { emit_op(OP_PUSH_FALSE); }
else { error(previous(), "Non-void function must return a value."); }
} else {
ValueType returned_type = expression();
if (returned_type != current_function_return_type && !( (current_function_return_type == ValueType::TYPE_INT && returned_type == ValueType::TYPE_BOOL) || (current_function_return_type == ValueType::TYPE_BOOL && returned_type == ValueType::TYPE_INT) ) ) {
// error(previous(), "Return type mismatch.");
}
consume(TokenType::SEMICOLON, "Expect ';' after return value.");
}
emit_op(OP_RETURN_VALUE);
}
void Compiler::print_statement() {
consume(TokenType::LPAREN, "Expect '(' after 'print'.");
if (peek().type == TokenType::STRING_LITERAL) {
Token str_token = advance(); const std::string& str_val = std::get<std::string>(str_token.literal);
if (str_val.length() > UINT16_MAX) { throw error(str_token, "String literal too long."); }
emit_op(OP_PRINT_STRING_LITERAL); emit_uint16_operand(static_cast<uint16_t>(str_val.length()));
for (char c : str_val) { emit_byte(static_cast<uint8_t>(c)); }
} else {
ValueType expr_type = expression();
if (expr_type == ValueType::TYPE_INT) { emit_op(OP_PRINT_INT); }
else if (expr_type == ValueType::TYPE_BOOL) { emit_op(OP_PRINT_BOOL); }
else { error(previous(), "Cannot print this expression type."); }
}
consume(TokenType::RPAREN, "Expect ')' after print arguments.");
consume(TokenType::SEMICOLON, "Expect ';' after print statement.");
}
void Compiler::assignment_statement() {
Token name = consume(TokenType::IDENTIFIER, "Expect variable name for assignment.");
const Local* local = resolve_local(name); if (!local) return;
consume(TokenType::EQUALS, "Expect '=' for assignment.");
if (peek().type == TokenType::INPUT_INT) {
if (local->type != ValueType::TYPE_INT) { error(name, "Cannot assign result of 'input_int()' to non-int variable."); }
advance(); consume(TokenType::LPAREN, "Expect '(' after 'input_int'."); consume(TokenType::RPAREN, "Expect ')' after 'input_int()'.");
emit_op_byte(OP_INPUT_INT_AND_STORE_LOCAL, local->index_in_frame);
} else {
ValueType rhs_type = expression();
if (local->type == ValueType::TYPE_INT && rhs_type == ValueType::TYPE_BOOL) {}
else if (local->type == ValueType::TYPE_BOOL && rhs_type == ValueType::TYPE_INT) {}
else if (local->type != rhs_type) { /* error(name, "Type mismatch in assignment."); */ }
emit_op_byte(OP_STORE_LOCAL, local->index_in_frame);
}
consume(TokenType::SEMICOLON, "Expect ';' after assignment.");
}
void Compiler::expression_statement() {
ValueType expr_type = expression();
consume(TokenType::SEMICOLON, "Expect ';' after expression statement.");
if (expr_type != ValueType::TYPE_VOID) { emit_op(OP_POP); }
}
void Compiler::if_statement() {
consume(TokenType::LPAREN, "Expect '(' after 'if'.");
ValueType condition_type = expression();
if (condition_type != ValueType::TYPE_BOOL) { error(previous(), "If condition must be a boolean expression."); }
consume(TokenType::RPAREN, "Expect ')' after if condition.");
size_t then_jump = emit_jump(OP_JUMP_IF_FALSE);
statement();
if (match(TokenType::ELSE)) {
size_t else_jump = emit_jump(OP_JUMP);
patch_jump(then_jump);
statement();
patch_jump(else_jump);
} else { patch_jump(then_jump); }
}
void Compiler::while_statement() {
size_t loop_start = bytecode.size();
consume(TokenType::LPAREN, "Expect '(' after 'while'.");
ValueType condition_type = expression();
if (condition_type != ValueType::TYPE_BOOL) { error(previous(), "While condition must be a boolean expression."); }
consume(TokenType::RPAREN, "Expect ')' after while condition.");
size_t exit_jump = emit_jump(OP_JUMP_IF_FALSE);
statement();
emit_op(OP_JUMP); emit_uint16_operand(static_cast<uint16_t>(loop_start));
patch_jump(exit_jump);
}
// --- Expression Parsers ---
ValueType Compiler::expression() { return logical_or(); }
ValueType Compiler::logical_or() {
ValueType left_type = logical_and();
while (match(TokenType::PIPE_PIPE)) {
Token op = previous();
if (left_type != ValueType::TYPE_BOOL) { error(op, "Left operand of '||' must be boolean."); }
size_t else_jump = emit_jump(OP_JUMP_IF_FALSE);
size_t end_jump = emit_jump(OP_JUMP);
patch_jump(else_jump);
emit_op(OP_POP);
ValueType right_type = logical_and();
if (right_type != ValueType::TYPE_BOOL) { error(op, "Right operand of '||' must be boolean."); }
patch_jump(end_jump);
left_type = ValueType::TYPE_BOOL;
}
return left_type;
}
ValueType Compiler::logical_and() {
ValueType left_type = comparison();
while (match(TokenType::AMPERSAND_AMPERSAND)) {
Token op = previous();
if (left_type != ValueType::TYPE_BOOL) { error(op, "Left operand of '&&' must be boolean."); }
size_t end_jump = emit_jump(OP_JUMP_IF_FALSE);
emit_op(OP_POP);
ValueType right_type = comparison();
if (right_type != ValueType::TYPE_BOOL) { error(op, "Right operand of '&&' must be boolean."); }
patch_jump(end_jump);
left_type = ValueType::TYPE_BOOL;
}
return left_type;
}
ValueType Compiler::comparison() {
ValueType left_type = term();
while (match(TokenType::EQUAL_EQUAL) || match(TokenType::BANG_EQUAL) ||
match(TokenType::LESS) || match(TokenType::LESS_EQUAL) ||
match(TokenType::GREATER) || match(TokenType::GREATER_EQUAL)) {
Token op = previous(); ValueType right_type = term();
if (left_type != ValueType::TYPE_INT || right_type != ValueType::TYPE_INT) {
error(op, "Comparison operators currently only support integers."); return ValueType::TYPE_ERROR;
}
switch (op.type) {
case TokenType::EQUAL_EQUAL: emit_op(OP_EQUAL); break;
case TokenType::BANG_EQUAL: emit_op(OP_NOT_EQUAL); break;
case TokenType::LESS: emit_op(OP_LESS); break;
case TokenType::LESS_EQUAL: emit_op(OP_LESS_EQUAL); break;
case TokenType::GREATER: emit_op(OP_GREATER); break;
case TokenType::GREATER_EQUAL: emit_op(OP_GREATER_EQUAL); break;
default: error(op, "Unhandled comparison operator."); return ValueType::TYPE_ERROR;
}
left_type = ValueType::TYPE_BOOL;
}
return left_type;
}
ValueType Compiler::term() {
ValueType left_type = unary();
while (match(TokenType::PLUS)) {
Token op = previous(); ValueType right_type = unary();
if (left_type != ValueType::TYPE_INT || right_type != ValueType::TYPE_INT) {
error(op, "Addition currently only supports integers."); return ValueType::TYPE_ERROR;
}
emit_op(OP_ADD_INT);
}
return left_type;
}
ValueType Compiler::unary() {
if (match(TokenType::BANG)) {
Token op = previous();
ValueType operand_type = unary();
if (operand_type != ValueType::TYPE_BOOL) {
error(op, "Operand of '!' must be boolean.");
}
emit_op(OP_LOGICAL_NOT);
return ValueType::TYPE_BOOL;
}
return primary();
}
ValueType Compiler::primary() {
if (match(TokenType::INTEGER_LITERAL)) {
emit_op(OP_PUSH_INT); emit_int32_operand(std::get<int32_t>(previous().literal));
return ValueType::TYPE_INT;
} else if (match(TokenType::TRUE_KEYWORD)) {
emit_op(OP_PUSH_TRUE); return ValueType::TYPE_BOOL;
} else if (match(TokenType::FALSE_KEYWORD)) {
emit_op(OP_PUSH_FALSE); return ValueType::TYPE_BOOL;
} else if (peek().type == TokenType::IDENTIFIER &&
(current_token_idx + 1 < tokens_list.size() && tokens_list[current_token_idx + 1].type == TokenType::LPAREN)) {
Token callee_name_token = advance();
// Step 5: Check for native functions first
auto native_it = native_functions.find(callee_name_token.lexeme);
if (native_it != native_functions.end()) {
const NativeFunctionSignature& native_sig = native_it->second;
consume(TokenType::LPAREN, "Expect '(' for native function call arguments.");
uint8_t arg_count = 0;
if (!check(TokenType::RPAREN)) {
do {
ValueType arg_type = expression();
if (arg_count < native_sig.param_types.size() && arg_type != native_sig.param_types[arg_count]) {
// Basic type check, can be more sophisticated
error(previous(), "Type mismatch for argument " + std::to_string(arg_count + 1) + " of native function '" + native_sig.name + "'.");
}
arg_count++;
} while (match(TokenType::COMMA));
}
consume(TokenType::RPAREN, "Expect ')' after native function call arguments.");
if (arg_count != native_sig.arity) {
throw error(callee_name_token, "Native function '" + native_sig.name + "' called with incorrect number of arguments. Expected " +
std::to_string(native_sig.arity) + ", got " + std::to_string(arg_count) + ".");
}
emit_op(native_sig.opcode_to_emit);
return native_sig.return_type;
} else {
// It's a user-defined function call
return call_expression(callee_name_token);
}
} else if (match(TokenType::IDENTIFIER)) {
const Local* local = resolve_local(previous());
if (!local) return ValueType::TYPE_ERROR;
emit_op_byte(OP_LOAD_LOCAL, local->index_in_frame);
return local->type;
} else if (match(TokenType::LPAREN)) {
ValueType expr_type = expression();
consume(TokenType::RPAREN, "Expect ')' after expression.");
return expr_type;
} else {
throw error(peek(), "Expect expression primary.");
}
}
ValueType Compiler::call_expression(const Token& callee_name_token) {
// LPAREN is NOT consumed before this call, it's part of the lookahead in primary()
// consume(TokenType::LPAREN, "Expect '(' for function call arguments."); // This was an error
uint8_t arg_count = 0;
if (!check(TokenType::RPAREN)) { // Check before consuming LPAREN if it was part of lookahead
do {
expression(); arg_count++;
} while (match(TokenType::COMMA));
}
consume(TokenType::RPAREN, "Expect ')' after function call arguments.");
auto it = function_name_to_table_idx.find(callee_name_token.lexeme);
if (it == function_name_to_table_idx.end()) {
throw error(callee_name_token, "Undefined function '" + callee_name_token.lexeme + "'.");
}
const FunctionInfo& func_info = functions_table[it->second];
if (arg_count != func_info.arity) {
throw error(callee_name_token, "Function '" + callee_name_token.lexeme + "' called with incorrect number of arguments. Expected " +
std::to_string(func_info.arity) + ", got " + std::to_string(arg_count) + ".");
}
emit_op(OP_CALL); emit_uint16_operand(func_info.address); emit_byte(arg_count);
return func_info.return_type;
}
std::vector<uint8_t> Compiler::compile() {
bytecode.clear(); functions_table.clear(); function_name_to_table_idx.clear();
current_function_locals.clear(); current_locals_count = 0; is_in_function_scope = false; current_token_idx = 0;
if (tokens_list.empty() || (tokens_list.size() == 1 && tokens_list[0].type == TokenType::END_OF_FILE)) {
emit_op(OP_HALT); return bytecode;
}
program();
if (bytecode.empty() || (bytecode.back() != static_cast<uint8_t>(OP_HALT) && bytecode.back() != static_cast<uint8_t>(OP_RETURN_VALUE) ) ) {
bool needs_halt = true;
if (!bytecode.empty()) {
for(uint8_t b : bytecode) { if (b == static_cast<uint8_t>(OP_HALT)) { needs_halt = false; break; } }
}
if(needs_halt) emit_op(OP_HALT);
}
return bytecode;
}
// vm.hpp
#ifndef VM_HPP
#define VM_HPP
#include "common.hpp"
#include <vector>
#include <string>
#include <iostream>
#include <limits>
#include <cmath> // For std::abs, std::pow, std::sqrt
const size_t STACK_MAX = 1024;
const size_t CALL_STACK_MAX = 256;
struct CallFrame {
uint16_t return_address;
size_t stack_base_offset;
};
class VM {
public:
VM();
void load_bytecode(const std::vector<uint8_t>& code);
void run();
private:
std::vector<uint8_t> bytecode_stream;
size_t ip;
std::vector<int32_t> operand_stack;
std::vector<CallFrame> call_frames_stack;
void push_operand(int32_t value);
int32_t pop_operand();
int32_t peek_operand() const;
uint8_t read_byte_operand();
uint16_t read_short_operand();
int32_t read_int_operand();
};
#endif // VM_HPP
// vm.cpp
#include "vm.hpp"
#include <iostream>
#include <stdexcept>
#include <iomanip>
#include <algorithm>
#include <cmath> // For std::abs, std::pow, std::sqrt
VM::VM() : ip(0) {
operand_stack.reserve(STACK_MAX);
call_frames_stack.reserve(CALL_STACK_MAX);
}
void VM::load_bytecode(const std::vector<uint8_t>& code) {
bytecode_stream = code;
ip = 0;
operand_stack.clear();
call_frames_stack.clear();
}
void VM::push_operand(int32_t value) {
if (operand_stack.size() >= STACK_MAX) {
std::cerr << "Runtime Error: Operand stack overflow." << std::endl;
throw std::runtime_error("Operand stack overflow");
}
operand_stack.push_back(value);
}
int32_t VM::pop_operand() {
if (operand_stack.empty()) {
std::cerr << "Runtime Error: Operand stack underflow (pop)." << std::endl;
throw std::runtime_error("Operand stack underflow");
}
int32_t value = operand_stack.back();
operand_stack.pop_back();
return value;
}
int32_t VM::peek_operand() const {
if (operand_stack.empty()) {
std::cerr << "Runtime Error: Operand stack underflow (peek)." << std::endl;
throw std::runtime_error("Operand stack underflow");
}
return operand_stack.back();
}
uint8_t VM::read_byte_operand() {
if (ip >= bytecode_stream.size()) {
std::cerr << "Runtime Error: Attempt to read past end of bytecode (byte)." << std::endl;
throw std::runtime_error("Bytecode read error");
}
return bytecode_stream[ip++];
}
uint16_t VM::read_short_operand() {
if (ip + 1 >= bytecode_stream.size()) {
std::cerr << "Runtime Error: Attempt to read past end of bytecode (short)." << std::endl;
throw std::runtime_error("Bytecode read error for short");
}
uint8_t bytes_arr[2];
bytes_arr[0] = bytecode_stream[ip++];
bytes_arr[1] = bytecode_stream[ip++];
return bytes_to_uint16_big_endian(bytes_arr);
}
int32_t VM::read_int_operand() {
if (ip + 3 >= bytecode_stream.size()) {
std::cerr << "Runtime Error: Attempt to read past end of bytecode (int)." << std::endl;
throw std::runtime_error("Bytecode read error for int");
}
uint8_t bytes_arr[4];
bytes_arr[0] = bytecode_stream[ip++];
bytes_arr[1] = bytecode_stream[ip++];
bytes_arr[2] = bytecode_stream[ip++];
bytes_arr[3] = bytecode_stream[ip++];
return bytes_to_int32_big_endian(bytes_arr);
}
void VM::run() {
if (bytecode_stream.empty()) {
return;
}
if (call_frames_stack.empty()) {
call_frames_stack.push_back({0xFFFF , 0 });
}
try {
while (ip < bytecode_stream.size()) {
uint8_t instruction = read_byte_operand();
switch (static_cast<OpCode>(instruction)) {
case OP_HALT: return;
case OP_PUSH_INT: push_operand(read_int_operand()); break;
case OP_PUSH_TRUE: push_operand(1); break;
case OP_PUSH_FALSE: push_operand(0); break;
case OP_POP: pop_operand(); break;
case OP_STORE_LOCAL: {
uint8_t local_idx = read_byte_operand();
if (call_frames_stack.empty()) throw std::runtime_error("Store local: No call frame.");
size_t frame_base = call_frames_stack.back().stack_base_offset;
if (frame_base + local_idx >= STACK_MAX) throw std::runtime_error("Store local: Invalid stack access.");
if (frame_base + local_idx >= operand_stack.size()) {
operand_stack.resize(frame_base + local_idx + 1, 0);
}
operand_stack[frame_base + local_idx] = pop_operand();
break;
}
case OP_LOAD_LOCAL: {
uint8_t local_idx = read_byte_operand();
if (call_frames_stack.empty()) throw std::runtime_error("Load local: No call frame.");
size_t frame_base = call_frames_stack.back().stack_base_offset;
if (frame_base + local_idx >= operand_stack.size()) {
throw std::runtime_error("Load local: Invalid stack access, index out of bounds.");
}
push_operand(operand_stack[frame_base + local_idx]);
break;
}
case OP_ADD_INT: {
int32_t b = pop_operand(); int32_t a = pop_operand();
push_operand(a + b); break;
}
case OP_PRINT_INT: std::cout << pop_operand() << std::endl; break;
case OP_PRINT_BOOL:
std::cout << (pop_operand() != 0 ? "true" : "false") << std::endl;
break;
case OP_INPUT_INT_AND_STORE_LOCAL: {
uint8_t local_idx = read_byte_operand();
if (call_frames_stack.empty()) throw std::runtime_error("Input local: No call frame.");
size_t frame_base = call_frames_stack.back().stack_base_offset;
if (frame_base + local_idx >= STACK_MAX) throw std::runtime_error("Input local: Invalid stack access.");
if (frame_base + local_idx >= operand_stack.size()) {
operand_stack.resize(frame_base + local_idx + 1, 0);
}
int32_t val;
if (!(std::cin >> val)) {
std::cerr << "Runtime Error: Invalid integer input. Storing 0." << std::endl;
std::cin.clear();
std::cin.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
operand_stack[frame_base + local_idx] = 0;
} else {
operand_stack[frame_base + local_idx] = val;
std::cin.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
}
break;
}
case OP_PRINT_STRING_LITERAL: {
uint16_t length = read_short_operand();
if (ip + length > bytecode_stream.size()) {
std::cerr << "Runtime Error: String literal data exceeds bytecode boundary." << std::endl;
throw std::runtime_error("Bytecode read error for string data");
}
std::string str_val; str_val.reserve(length);
for (uint16_t i = 0; i < length; ++i) str_val += static_cast<char>(read_byte_operand());
std::cout << str_val << std::endl;
break;
}
case OP_CALL: {
uint16_t func_address = read_short_operand();
uint8_t arg_count = read_byte_operand();
if (call_frames_stack.size() >= CALL_STACK_MAX) throw std::runtime_error("Call stack overflow.");
size_t new_frame_base = operand_stack.size() - arg_count;
call_frames_stack.push_back({ip , new_frame_base});
ip = func_address;
break;
}
case OP_RETURN_VALUE: {
if (call_frames_stack.size() <= 1) {
std::cerr << "Runtime Error: Cannot return from main scope with OP_RETURN_VALUE." << std::endl;
return;
}
int32_t return_value = pop_operand();
CallFrame returning_frame = call_frames_stack.back();
call_frames_stack.pop_back();
ip = returning_frame.return_address;
if (operand_stack.size() < returning_frame.stack_base_offset) {
throw std::runtime_error("Return: Inconsistent stack state.");
}
operand_stack.resize(returning_frame.stack_base_offset);
push_operand(return_value);
break;
}
case OP_JUMP: ip = read_short_operand(); break;
case OP_JUMP_IF_FALSE: {
uint16_t jump_address = read_short_operand();
if (pop_operand() == 0) {
ip = jump_address;
}
break;
}
case OP_EQUAL: {
int32_t b = pop_operand(); int32_t a = pop_operand();
push_operand(a == b ? 1 : 0); break;
}
case OP_NOT_EQUAL: {
int32_t b = pop_operand(); int32_t a = pop_operand();
push_operand(a != b ? 1 : 0); break;
}
case OP_GREATER: {
int32_t a = pop_operand(); int32_t b = pop_operand();
push_operand(b > a ? 1 : 0); break;
}
case OP_GREATER_EQUAL: {
int32_t a = pop_operand(); int32_t b = pop_operand();
push_operand(b >= a ? 1 : 0); break;
}
case OP_LESS: {
int32_t a = pop_operand(); int32_t b = pop_operand();
push_operand(b < a ? 1 : 0); break;
}
case OP_LESS_EQUAL: {
int32_t a = pop_operand(); int32_t b = pop_operand();
push_operand(b <= a ? 1 : 0); break;
}
case OP_LOGICAL_NOT: {
push_operand(pop_operand() == 0 ? 1 : 0);
break;
}
// Step 5: Native Math Functions
case OP_NATIVE_ABS: {
push_operand(std::abs(pop_operand()));
break;
}
case OP_NATIVE_POW: {
int32_t exponent = pop_operand();
int32_t base = pop_operand();
if (exponent < 0) { // Simple handling for integer exponentiation
std::cerr << "Runtime Error: pow() does not support negative exponents." << std::endl;
push_operand(0); // Or throw error / specific value
} else {
int32_t result = 1;
for (int32_t i = 0; i < exponent; ++i) result *= base;
push_operand(result);
}
break;
}
case OP_NATIVE_SQRT: {
int32_t value = pop_operand();
if (value < 0) {
std::cerr << "Runtime Error: sqrt() input cannot be negative." << std::endl;
push_operand(0); // Or throw error / specific value
} else {
push_operand(static_cast<int32_t>(std::sqrt(value)));
}
break;
}
default:
std::cerr << "Runtime Error: Unknown opcode 0x" << std::hex << (int)instruction << std::dec << " at ip " << (ip-1) << std::endl;
throw std::runtime_error("Unknown opcode");
}
}
} catch (const std::runtime_error& e) {
// Errors already printed
}
}
// main.cpp
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <iomanip>
#include "lexer.hpp"
#include "compiler.hpp"
#include "vm.hpp"
#include "common.hpp"
// Function to read entire file into a string
std::string read_file_to_string(const std::string& filepath) {
std::ifstream file_stream(filepath);
if (!file_stream.is_open()) {
std::cerr << "Error: Could not open file " << filepath << std::endl;
return "";
}
std::stringstream buffer;
buffer << file_stream.rdbuf();
file_stream.close();
return buffer.str();
}
// Function to write bytecode to a file
bool write_bytecode_to_file(const std::string& filepath, const std::vector<uint8_t>& bytecode_data) {
std::ofstream out_file(filepath, std::ios::binary);
if (!out_file.is_open()) {
std::cerr << "Error: Could not open file " << filepath << " for writing." << std::endl;
return false;
}
out_file.write(reinterpret_cast<const char*>(bytecode_data.data()), bytecode_data.size());
if (!out_file) {
std::cerr << "Error: Failed to write all bytecode to " << filepath << std::endl;
out_file.close();
return false;
}
out_file.close();
return true;
}
// Function to read bytecode from a file
std::vector<uint8_t> read_bytecode_from_file(const std::string& filepath) {
std::ifstream in_file(filepath, std::ios::binary | std::ios::ate);
if (!in_file.is_open()) {
std::cerr << "Error: Could not open file " << filepath << " for reading." << std::endl;
return {};
}
std::streamsize size = in_file.tellg();
if (size == 0) {
in_file.close();
return {};
}
in_file.seekg(0, std::ios::beg);
std::vector<uint8_t> bytecode_buffer(size);
if (!in_file.read(reinterpret_cast<char*>(bytecode_buffer.data()), size)) {
std::cerr << "Error: Failed to read bytecode from " << filepath << std::endl;
in_file.close();
return {};
}
in_file.close();
return bytecode_buffer;
}
void print_usage() {
std::cout << "Satellite Basic (v1.r3 - Complete with Std Lib Math) CLI" << std::endl; // Updated for Step 5
std::cout << "Usage:" << std::endl;
std::cout << " satellite_basic_cli compile <source_file.satbl> <output_file.satbe>" << std::endl;
std::cout << " satellite_basic_cli run <executable_file.satbe>" << std::endl;
std::cout << " satellite_basic_cli lex <source_file.satbl>" << std::endl;
}
void debug_print_tokens(const std::vector<Token>& tokens_list) {
std::cout << "--- Tokens ---" << std::endl;
for (const auto& token : tokens_list) {
std::cout << "Line " << token.line << ": \"" << token.lexeme << "\" (Type: " << static_cast<int>(token.type) << ")";
if (std::holds_alternative<int32_t>(token.literal)) {
std::cout << " Value: " << std::get<int32_t>(token.literal);
} else if (std::holds_alternative<std::string>(token.literal)) {
std::cout << " Value: \"" << std::get<std::string>(token.literal) << "\"";
} else if (std::holds_alternative<bool>(token.literal)) {
std::cout << " Value: " << (std::get<bool>(token.literal) ? "true" : "false");
}
std::cout << std::endl;
}
std::cout << "--------------" << std::endl;
}
void debug_print_bytecode(const std::vector<uint8_t>& bytecode_to_print) {
std::cout << "--- Bytecode (hex) ---" << std::endl;
if (bytecode_to_print.empty()) {
std::cout << "(empty)" << std::endl;
} else {
for (size_t i = 0; i < bytecode_to_print.size(); ++i) {
std::cout << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(bytecode_to_print[i]) << " ";
if ((i + 1) % 16 == 0 && (i + 1) < bytecode_to_print.size()) std::cout << std::endl;
}
if (!bytecode_to_print.empty() && bytecode_to_print.size() % 16 != 0) std::cout << std::endl;
}
std::cout << std::dec;
std::cout << "----------------------" << std::endl;
}
int main(int argc, char* argv[]) {
if (argc < 2) {
print_usage();
return 1;
}
std::string command = argv[1];
if (command == "compile" && argc == 4) {
std::string source_filepath = argv[2];
std::string output_filepath = argv[3];
if (source_filepath.rfind(".satbl") == std::string::npos && source_filepath.rfind(".SATBL") == std::string::npos) {
std::cerr << "Warning: Source file '" << source_filepath << "' may not have expected .satbl extension." << std::endl;
}
if (output_filepath.rfind(".satbe") == std::string::npos && output_filepath.rfind(".SATBE") == std::string::npos) {
std::cerr << "Warning: Output file '" << output_filepath << "' may not have expected .satbe extension." << std::endl;
}
std::string source_code = read_file_to_string(source_filepath);
if (source_code.empty()) {
std::ifstream test_open(source_filepath);
if (!test_open.is_open() && !source_filepath.empty()) {
return 1;
}
}
Lexer lexer(source_code);
std::vector<Token> tokens_list = lexer.scan_tokens();
// debug_print_tokens(tokens_list);
Compiler compiler(tokens_list);
std::vector<uint8_t> bytecode_result = compiler.compile();
if (bytecode_result.empty() && !(tokens_list.empty() || (tokens_list.size()==1 && tokens_list[0].type == TokenType::END_OF_FILE))) {
std::cerr << "Compilation failed. No bytecode generated due to errors." << std::endl;
return 1;
}
// debug_print_bytecode(bytecode_result);
if (write_bytecode_to_file(output_filepath, bytecode_result)) {
std::cout << "Compiled " << source_filepath << " to " << output_filepath << std::endl;
} else {
return 1;
}
} else if (command == "run" && argc == 3) {
std::string executable_filepath = argv[2];
if (executable_filepath.rfind(".satbe") == std::string::npos && executable_filepath.rfind(".SATBE") == std::string::npos) {
std::cerr << "Warning: Executable file '" << executable_filepath << "' may not have .satbe extension." << std::endl;
}
std::vector<uint8_t> bytecode_to_run = read_bytecode_from_file(executable_filepath);
if (bytecode_to_run.empty()) {
std::ifstream test_open(executable_filepath);
if (!test_open.is_open() && !executable_filepath.empty()) {
return 1;
}
}
VM vm;
vm.load_bytecode(bytecode_to_run);
vm.run();
} else if (command == "lex" && argc == 3) {
std::string source_filepath = argv[2];
std::string source_code = read_file_to_string(source_filepath);
if (source_code.empty()) {
std::ifstream test_open(source_filepath);
if (!test_open.is_open() && !source_filepath.empty()) {
return 1;
}
}
Lexer lexer(source_code);
std::vector<Token> tokens_list = lexer.scan_tokens();
debug_print_tokens(tokens_list);
}
else {
print_usage();
return 1;
}
return 0;
}
I must admit it is pretty impressive to come up with that after only a few minutes. That took me like, 15 minutes x 15 tries. Because each revision comes as a 5 step process, with each revision being made of 5 steps. So it takes these smaller steps to achieve an ultimate goal, such as creating a standard library, or adding threading support to the language. So that takes about 1 hour and 15 minutes to make something that doesn’t yet even run. The testing is not even there, a key part of software development. Like a very key part they are leaving out. Anyways see if you can get it to run…