From d706b10aa3752b3ba3d69cc3ca20bb13b7560b60 Mon Sep 17 00:00:00 2001 From: Lewis Revill Date: Wed, 19 Jun 2024 14:12:38 +0100 Subject: [PATCH] feat: Generate calls via the GOT This change involves moving the generation of the GOT from variable_generator.rs to codegen.rs, in order to also cover not only global variables but also functions and 'programs' too. Once these have been given an associated index in the GOT we can use that to replace normal direct function calls with indirect calls to a function pointer stored in the GOT. We don't do this for calls with external linkage since these won't be subject to online change. --- src/codegen.rs | 104 +++++++++++++++++- .../generators/expression_generator.rs | 49 ++++++++- src/codegen/generators/variable_generator.rs | 86 +-------------- 3 files changed, 150 insertions(+), 89 deletions(-) diff --git a/src/codegen.rs b/src/codegen.rs index dd98c3b7aca..935a27d5e42 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -1,6 +1,8 @@ // Copyright (c) 2020 Ghaith Hachem and Mathias Rieder use std::{ cell::RefCell, + collections::HashMap, + fs::{read_to_string, write}, ops::Deref, path::{Path, PathBuf}, }; @@ -34,6 +36,7 @@ use inkwell::{ module::Module, passes::PassBuilderOptions, targets::{CodeModel, FileType, InitializationConfig, RelocMode}, + types::BasicTypeEnum, }; use plc_ast::ast::{CompilationUnit, LinkageType}; use plc_diagnostics::diagnostics::Diagnostic; @@ -84,6 +87,40 @@ pub struct GeneratedModule<'ink> { type MainFunction = unsafe extern "C" fn(*mut T) -> U; type MainEmptyFunction = unsafe extern "C" fn() -> U; +pub fn read_got_layout(location: &str, format: ConfigFormat) -> Result, Diagnostic> { + if !Path::new(location).is_file() { + // Assume if the file doesn't exist that there is no existing GOT layout yet. write_got_layout will handle + // creating our file when we want to. + return Ok(HashMap::new()); + } + + let s = + read_to_string(location).map_err(|_| Diagnostic::new("GOT layout could not be read from file"))?; + match format { + ConfigFormat::JSON => serde_json::from_str(&s) + .map_err(|_| Diagnostic::new("Could not deserialize GOT layout from JSON")), + ConfigFormat::TOML => { + toml::de::from_str(&s).map_err(|_| Diagnostic::new("Could not deserialize GOT layout from TOML")) + } + } +} + +pub fn write_got_layout( + got_entries: HashMap, + location: &str, + format: ConfigFormat, +) -> Result<(), Diagnostic> { + let s = match format { + ConfigFormat::JSON => serde_json::to_string(&got_entries) + .map_err(|_| Diagnostic::new("Could not serialize GOT layout to JSON"))?, + ConfigFormat::TOML => toml::ser::to_string(&got_entries) + .map_err(|_| Diagnostic::new("Could not serialize GOT layout to TOML"))?, + }; + + write(location, s).map_err(|_| Diagnostic::new("GOT layout could not be written to file"))?; + Ok(()) +} + impl<'ink> CodeGen<'ink> { /// constructs a new code-generator that generates CompilationUnits into a module with the given module_name pub fn new( @@ -127,7 +164,6 @@ impl<'ink> CodeGen<'ink> { annotations, &index, &mut self.debug, - self.got_layout_file.clone(), ); //Generate global variables @@ -135,6 +171,72 @@ impl<'ink> CodeGen<'ink> { variable_generator.generate_global_variables(dependencies, &self.module_location)?; index.merge(llvm_gv_index); + // Build our GOT layout here. We need to find all the names for globals, programs, and + // functions and assign them indices in the GOT, taking into account prior indices. + let program_globals = global_index + .get_program_instances() + .into_iter() + .fold(Vec::new(), |mut acc, p| { + acc.push(p.get_name()); + acc.push(p.get_qualified_name()); + acc + }); + let functions = global_index.get_pous().values() + .filter_map(|p| match p { + PouIndexEntry::Function { name, linkage: LinkageType::Internal, is_generated: false, .. } + | PouIndexEntry::Function { name, linkage: LinkageType::Internal, .. } => Some(name.as_ref()), + _ => None, + }); + let all_names = global_index + .get_globals() + .values() + .map(|g| g.get_qualified_name()) + .chain(program_globals) + .chain(functions) + .map(|n| n.to_lowercase()); + + if let Some((location, format)) = &self.got_layout_file { + let got_entries = read_got_layout(location.as_str(), *format)?; + let mut new_symbols = Vec::new(); + let mut new_got_entries = HashMap::new(); + let mut new_got = HashMap::new(); + + for name in all_names { + if let Some(idx) = got_entries.get(&name.to_string()) { + new_got_entries.insert(name.to_string(), *idx); + index.associate_got_index(&name, *idx)?; + new_got.insert(*idx, name.to_string()); + } else { + new_symbols.push(name.to_string()); + } + } + + // Put any names that weren't there last time in any free space in the GOT. + let mut idx: u64 = 0; + for name in &new_symbols { + while new_got.contains_key(&idx) { + idx += 1; + } + new_got_entries.insert(name.to_string(), idx); + index.associate_got_index(name, idx)?; + new_got.insert(idx, name.to_string()); + } + + // Now we can write new_got_entries back out to a file. + write_got_layout(new_got_entries, location.as_str(), *format)?; + + // Construct our GOT as a new global array. We initialise this array in the loader code. + let got_size = new_got.keys().max().map_or(0, |m| *m + 1); + let _got = llvm.create_global_variable( + &self.module, + "__custom_got", + BasicTypeEnum::ArrayType(Llvm::get_array_type( + BasicTypeEnum::PointerType(llvm.context.i8_type().ptr_type(0.into())), + got_size.try_into().expect("the computed custom GOT size is too large"), + )), + ); + } + //Generate opaque functions for implementations and associate them with their types let llvm = Llvm::new(context, context.create_builder()); let llvm_impl_index = pou_generator::generate_implementation_stubs( diff --git a/src/codegen/generators/expression_generator.rs b/src/codegen/generators/expression_generator.rs index 9450d7c2788..4eaf0c9f00c 100644 --- a/src/codegen/generators/expression_generator.rs +++ b/src/codegen/generators/expression_generator.rs @@ -17,10 +17,10 @@ use crate::{ }; use inkwell::{ builder::Builder, - types::{BasicType, BasicTypeEnum}, + types::{BasicType, BasicTypeEnum, FunctionType}, values::{ - ArrayValue, BasicMetadataValueEnum, BasicValue, BasicValueEnum, FloatValue, IntValue, PointerValue, - StructValue, VectorValue, + ArrayValue, BasicMetadataValueEnum, BasicValue, BasicValueEnum, CallableValue, + CallSiteValue, FloatValue, IntValue, PointerValue, StructValue, VectorValue, }, AddressSpace, FloatPredicate, IntPredicate, }; @@ -316,6 +316,39 @@ impl<'ink, 'b> ExpressionCodeGenerator<'ink, 'b> { } } + /// Generate an access to the appropriate GOT entry to achieve a call to the given function. + pub fn generate_got_call( + &self, + qualified_name: &str, + function_type: &FunctionType<'ink>, + args: &[BasicMetadataValueEnum<'ink>], + ) -> Result>, Diagnostic> { + // We will generate a GEP, which has as its base address the magic constant which + // will eventually be replaced by the location of the GOT. + let base = self + .llvm + .context + .i64_type() + .const_int(0xdeadbeef00000000, false) + .const_to_pointer(function_type.ptr_type(0.into()).ptr_type(0.into())); + + self.llvm_index + .find_got_index(qualified_name) + .map(|idx| { + let mut ptr = self.llvm.load_array_element( + base, + &[self.llvm.context.i32_type().const_int(idx, false)], + "", + )?; + ptr = self.llvm.load_pointer(&ptr, "").into_pointer_value(); + let callable = CallableValue::try_from(ptr) + .map_err(|_| Diagnostic::new("Pointer was not a function pointer"))?; + + Ok(self.llvm.builder.build_call(callable, args, "call")) + }) + .transpose() + } + /// generates a binary expression (e.g. a + b, x AND y, etc.) and returns the resulting `BasicValueEnum` /// - `left` the AstStatement left of the operator /// - `right` the AstStatement right of the operator @@ -512,9 +545,17 @@ impl<'ink, 'b> ExpressionCodeGenerator<'ink, 'b> { None }; + // Check for the function within the GOT. If it's there, we need to generate an indirect + // call to its location within the GOT, which should contain a function pointer. + // First get the function type so our function pointer can have the correct type. + let qualified_name = self.annotations.get_qualified_name(operator) + .expect("Shouldn't have got this far without a name for the function"); + let function_type = function.get_type(); + let call = self.generate_got_call(qualified_name, &function_type, &arguments_list)? + .unwrap_or_else(|| self.llvm.builder.build_call(function, &arguments_list, "call")); + // if the target is a function, declare the struct locally // assign all parameters into the struct values - let call = &self.llvm.builder.build_call(function, &arguments_list, "call"); // so grab either: // - the out-pointer if we generated one in by_ref_func_out diff --git a/src/codegen/generators/variable_generator.rs b/src/codegen/generators/variable_generator.rs index 760cb41f8f5..71b4c59d9e7 100644 --- a/src/codegen/generators/variable_generator.rs +++ b/src/codegen/generators/variable_generator.rs @@ -5,15 +5,11 @@ use crate::{ codegen::{debug::Debug, llvm_index::LlvmTypedIndex, llvm_typesystem::cast_if_needed}, index::{get_initializer_name, Index, PouIndexEntry, VariableIndexEntry}, resolver::{AnnotationMap, AstAnnotations, Dependency}, - ConfigFormat, }; use indexmap::IndexSet; -use inkwell::{module::Module, types::BasicTypeEnum, values::GlobalValue}; +use inkwell::{module::Module, values::GlobalValue}; use plc_ast::ast::LinkageType; use plc_diagnostics::diagnostics::Diagnostic; -use std::collections::HashMap; -use std::fs::{read_to_string, write}; -use std::path::Path; use super::{ data_type_generator::get_default_for, @@ -24,40 +20,6 @@ use super::{ use crate::codegen::debug::DebugBuilderEnum; use crate::index::FxIndexSet; -pub fn read_got_layout(location: &str, format: ConfigFormat) -> Result, Diagnostic> { - if !Path::new(location).is_file() { - // Assume if the file doesn't exist that there is no existing GOT layout yet. write_got_layout will handle - // creating our file when we want to. - return Ok(HashMap::new()); - } - - let s = - read_to_string(location).map_err(|_| Diagnostic::new("GOT layout could not be read from file"))?; - match format { - ConfigFormat::JSON => serde_json::from_str(&s) - .map_err(|_| Diagnostic::new("Could not deserialize GOT layout from JSON")), - ConfigFormat::TOML => { - toml::de::from_str(&s).map_err(|_| Diagnostic::new("Could not deserialize GOT layout from TOML")) - } - } -} - -pub fn write_got_layout( - got_entries: HashMap, - location: &str, - format: ConfigFormat, -) -> Result<(), Diagnostic> { - let s = match format { - ConfigFormat::JSON => serde_json::to_string(&got_entries) - .map_err(|_| Diagnostic::new("Could not serialize GOT layout to JSON"))?, - ConfigFormat::TOML => toml::ser::to_string(&got_entries) - .map_err(|_| Diagnostic::new("Could not serialize GOT layout to TOML"))?, - }; - - write(location, s).map_err(|_| Diagnostic::new("GOT layout could not be written to file"))?; - Ok(()) -} - pub struct VariableGenerator<'ctx, 'b> { module: &'b Module<'ctx>, llvm: &'b Llvm<'ctx>, @@ -65,7 +27,6 @@ pub struct VariableGenerator<'ctx, 'b> { annotations: &'b AstAnnotations, types_index: &'b LlvmTypedIndex<'ctx>, debug: &'b mut DebugBuilderEnum<'ctx>, - got_layout_file: Option<(String, ConfigFormat)>, } impl<'ctx, 'b> VariableGenerator<'ctx, 'b> { @@ -76,9 +37,8 @@ impl<'ctx, 'b> VariableGenerator<'ctx, 'b> { annotations: &'b AstAnnotations, types_index: &'b LlvmTypedIndex<'ctx>, debug: &'b mut DebugBuilderEnum<'ctx>, - got_layout_file: Option<(String, ConfigFormat)>, ) -> Self { - VariableGenerator { module, llvm, global_index, annotations, types_index, debug, got_layout_file } + VariableGenerator { module, llvm, global_index, annotations, types_index, debug } } pub fn generate_global_variables( @@ -140,48 +100,6 @@ impl<'ctx, 'b> VariableGenerator<'ctx, 'b> { ); } - if let Some((location, format)) = &self.got_layout_file { - let got_entries = read_got_layout(location.as_str(), *format)?; - let mut new_globals = Vec::new(); - let mut new_got_entries = HashMap::new(); - let mut new_got = HashMap::new(); - - for (name, _) in &globals { - if let Some(idx) = got_entries.get(&name.to_string()) { - new_got_entries.insert(name.to_string(), *idx); - index.associate_got_index(name, *idx); - new_got.insert(*idx, name.to_string()); - } else { - new_globals.push(name.to_string()); - } - } - - // Put any globals that weren't there last time in any free space in the GOT. - let mut idx: u64 = 0; - for name in &new_globals { - while new_got.contains_key(&idx) { - idx += 1; - } - new_got_entries.insert(name.to_string(), idx); - index.associate_got_index(name, idx); - new_got.insert(idx, name.to_string()); - } - - // Now we can write new_got_entries back out to a file. - write_got_layout(new_got_entries, location.as_str(), *format)?; - - // Construct our GOT as a new global array. We initialise this array in the loader code. - let got_size = new_got.keys().max().map_or(0, |m| *m + 1); - let _got = self.llvm.create_global_variable( - self.module, - "__custom_got", - BasicTypeEnum::ArrayType(Llvm::get_array_type( - BasicTypeEnum::PointerType(self.llvm.context.i8_type().ptr_type(0.into())), - got_size.try_into().expect("the computed custom GOT size is too large"), - )), - ); - } - Ok(index) }