commit clang-extract for openSUSE:Factory
Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package clang-extract for openSUSE:Factory checked in at 2024-07-04 16:22:42 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/clang-extract (Old) and /work/SRC/openSUSE:Factory/.clang-extract.new.2080 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "clang-extract" Thu Jul 4 16:22:42 2024 rev:2 rq:1185287 version:0~20240703.0b3e33c Changes: -------- --- /work/SRC/openSUSE:Factory/clang-extract/clang-extract.changes 2024-06-25 23:07:31.058393581 +0200 +++ /work/SRC/openSUSE:Factory/.clang-extract.new.2080/clang-extract.changes 2024-07-04 16:23:33.324126476 +0200 @@ -1,0 +2,31 @@ +Thu Jul 04 06:42:17 UTC 2024 - mvetter@suse.com + +- Update to version 0~20240703.0b3e33c: + * Initialize OutputFunctionPrototypeHeader + +------------------------------------------------------------------- +Tue Jul 02 17:56:02 UTC 2024 - mvetter@suse.com + +- Update to version 0~20240702.cbdca46: + * README.md: Remove dependency on python311-subprocess-tee + * testsuite: attr: Adapt testcase to be executed on ppc64le +- Remove BR python3-subprocess-tee + +------------------------------------------------------------------- +Tue Jul 02 16:46:42 UTC 2024 - mvetter@suse.com + +- Update to version 0~20240702.1f3826f: + * testsuite/attr: Skip tests on s390x + * libtest: Add special dg-skip-on-archs option + +------------------------------------------------------------------- +Fri Jun 28 05:08:31 UTC 2024 - mvetter@suse.com + +- Update to version 0~20240627.52683d1: + * testsuite: Adapt registers usage to aarch64 + * SymbolExternalizer: Refactor Get_Range_Of_Identifier + * SymbolExternalizer: Rewrite to use RecursiveASTVisitor + * SymbolExternalizer: Reduce indentation of _Externalize_Symbol + * SymbolExternalizer: Simplify Externalize_Symbols + +------------------------------------------------------------------- Old: ---- clang-extract-0~20240619.f7c935b.tar.xz New: ---- _service clang-extract-0~20240703.0b3e33c.tar.xz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ clang-extract.spec ++++++ --- /var/tmp/diff_new_pack.KJjwQX/_old 2024-07-04 16:23:33.904147658 +0200 +++ /var/tmp/diff_new_pack.KJjwQX/_new 2024-07-04 16:23:33.904147658 +0200 @@ -17,7 +17,7 @@ Name: clang-extract -Version: 0~20240619.f7c935b +Version: 0~20240703.0b3e33c Release: 0 Summary: A tool to extract code content from source files License: Apache-2.0 WITH LLVM-exception AND NCSA @@ -34,7 +34,6 @@ BuildRequires: python3-pexpect BuildRequires: python3-psutil BuildRequires: python3-pytest -BuildRequires: python3-subprocess-tee %description A tool to extract code content from source files using the clang and LLVM infrastructure. ++++++ _service ++++++ <services> <service name="tar_scm" mode="manual"> <param name="scm">git</param> <param name="url">https://github.com/SUSE/clang-extract</param> <param name="revision">0b3e33c774708b32937c21c8a61dec4bd4fa5027</param> <param name="versionformat">0~%cd.%h</param> <param name="changesgenerate">enable</param> <param name="changesauthor">mvetter@suse.com</param> </service> <service name="recompress" mode="manual"> <param name="file">*.tar</param> <param name="compression">xz</param> </service> <service name="set_version" mode="manual"/> </services> ++++++ clang-extract-0~20240619.f7c935b.tar.xz -> clang-extract-0~20240703.0b3e33c.tar.xz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clang-extract-0~20240619.f7c935b/.github/workflows/testsuite.yml new/clang-extract-0~20240703.0b3e33c/.github/workflows/testsuite.yml --- old/clang-extract-0~20240619.f7c935b/.github/workflows/testsuite.yml 2024-06-19 21:48:44.000000000 +0200 +++ new/clang-extract-0~20240703.0b3e33c/.github/workflows/testsuite.yml 2024-07-03 21:24:37.000000000 +0200 @@ -29,8 +29,7 @@ run: zypper -n install libelf-devel llvm16-libclang13 clang16-devel libclang-cpp16 clang-tools libLLVM16 llvm16 llvm16-devel meson ninja - python311-psutil python311-pexpect python311-subprocess-tee - python311-pytest gcc findutils bash + python311-psutil python311-pexpect python311-pytest gcc findutils bash - uses: actions/checkout@v2 - name: meson run: meson setup build --buildtype=${{ matrix.build-type }} --native-file ce-native.ini diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clang-extract-0~20240619.f7c935b/README.md new/clang-extract-0~20240703.0b3e33c/README.md --- old/clang-extract-0~20240619.f7c935b/README.md 2024-06-19 21:48:44.000000000 +0200 +++ new/clang-extract-0~20240703.0b3e33c/README.md 2024-07-03 21:24:37.000000000 +0200 @@ -12,7 +12,7 @@ ``` $ sudo zypper install clang18 clang18-devel libclang-cpp18 \ clang-tools libLLVM18 llvm18 llvm18-devel libelf-devel meson ninja \ - python311-psutil python311-pexpect python311-subprocess-tee python311-pytest + python311-psutil python311-pexpect python311-pytest ``` It's advised to use LLVM 18 and higher, since it's well tested. But there support for LLVM 16 and 17 as well, but you might find issues with it. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clang-extract-0~20240619.f7c935b/libcextract/ArgvParser.cpp new/clang-extract-0~20240703.0b3e33c/libcextract/ArgvParser.cpp --- old/clang-extract-0~20240619.f7c935b/libcextract/ArgvParser.cpp 2024-06-19 21:48:44.000000000 +0200 +++ new/clang-extract-0~20240703.0b3e33c/libcextract/ArgvParser.cpp 2024-07-03 21:24:37.000000000 +0200 @@ -70,7 +70,8 @@ IpaclonesPath(nullptr), SymversPath(nullptr), DescOutputPath(nullptr), - IncExpansionPolicy(nullptr) + IncExpansionPolicy(nullptr), + OutputFunctionPrototypeHeader(nullptr) { for (int i = 0; i < argc; i++) { if (!Handle_Clang_Extract_Arg(argv[i])) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clang-extract-0~20240619.f7c935b/libcextract/Passes.cpp new/clang-extract-0~20240703.0b3e33c/libcextract/Passes.cpp --- old/clang-extract-0~20240619.f7c935b/libcextract/Passes.cpp 2024-06-19 21:48:44.000000000 +0200 +++ new/clang-extract-0~20240703.0b3e33c/libcextract/Passes.cpp 2024-07-03 21:24:37.000000000 +0200 @@ -438,12 +438,12 @@ { /* Issue externalization. */ SymbolExternalizer externalizer(ctx->AST.get(), ctx->IA, ctx->Ibt, ctx->PatchObject, ctx->DumpPasses); - externalizer.Externalize_Symbols(ctx->Externalize); - if (ctx->RenameSymbols) { - /* The FuncExtractNames will be modified, as the function will be - renamed. */ - externalizer.Rename_Symbols(ctx->FuncExtractNames); - } + if (ctx->RenameSymbols) + /* The FuncExtractNames will be modified, as the function will be renamed. */ + externalizer.Externalize_Symbols(ctx->Externalize, ctx->FuncExtractNames); + else + externalizer.Externalize_Symbols(ctx->Externalize); + externalizer.Commit_Changes_To_Source(ctx->OFS, ctx->MFS, ctx->HeadersToExpand); /* Store the changed names. */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clang-extract-0~20240619.f7c935b/libcextract/SymbolExternalizer.cpp new/clang-extract-0~20240703.0b3e33c/libcextract/SymbolExternalizer.cpp --- old/clang-extract-0~20240619.f7c935b/libcextract/SymbolExternalizer.cpp 2024-06-19 21:48:44.000000000 +0200 +++ new/clang-extract-0~20240703.0b3e33c/libcextract/SymbolExternalizer.cpp 2024-07-03 21:24:37.000000000 +0200 @@ -33,34 +33,53 @@ FileEntry => FileID that we are sure to have modifications. */ #pragma GCC poison translateFile -/****** Begin hack: used to get a pointer to a private member of a class. *****/ -struct ASTUnit_TopLevelDecls +/* Tokenize away the function-like macro stuff or expression, we only want + the identifier. */ +#define TOKEN_VECTOR " ().,;+-*/^|&{}[]<>^&|\r\n\t" + +/* Return the ranges for all identifiers on the ids vector */ +template <typename T> +static std::vector<std::pair<std::string, SourceRange>> +Get_Range_Of_Identifier(const SourceRange &range, const T &ids) { - typedef std::vector<Decl*> ASTUnit::*type; - friend type Get(ASTUnit_TopLevelDecls); -}; + std::vector< std::pair < std::string, SourceRange> > ret = {}; + StringRef string = PrettyPrint::Get_Source_Text(range); + + /* Create temporary buff, strtok modifies it. */ + unsigned len = string.size(); + char buf[len + 1]; + memcpy(buf, string.data(), len); + buf[len] = '\0'; + + char *tok = strtok(buf, TOKEN_VECTOR); + while (tok != nullptr) { + if (ids.find(StringRef(tok)) != ids.end()) { + /* Found. */ + ptrdiff_t distance = (ptrdiff_t)(tok - buf); + assert(distance >= 0); -template<typename Tag, typename Tag::type M> -struct Rob { - friend typename Tag::type Get(Tag) { - return M; + /* Compute the distance from the original SourceRange of the + MacroExpansion. */ + int32_t offset = (int32_t) distance; + SourceLocation start = range.getBegin().getLocWithOffset(offset); + SourceLocation end = start.getLocWithOffset(strlen(tok)-1); + + /* Add to the list of output. */ + ret.push_back(std::make_pair(std::string(tok), SourceRange(start, end))); + } + + tok = strtok(nullptr, TOKEN_VECTOR); } -}; -template struct Rob<ASTUnit_TopLevelDecls, &ASTUnit::TopLevelDecls>; + return ret; +} -/** Get pointer to the TopLevelDecls vector in the ASTUnit. - * - * The TopLevelDecls attribute from the AST is private, but we need to - * access that in order to remove nodes from the AST toplevel vector, - * else we can't remove further declarations of the function we need - * to externalize. - */ -static std::vector<Decl *>* Get_Pointer_To_Toplev(ASTUnit *obj) +static std::vector<std::pair<std::string, SourceRange>> +Get_Range_Of_Identifier(const SourceRange &range, const StringRef &id) { - return &(obj->*Get(ASTUnit_TopLevelDecls())); + std::set<StringRef> ids = { id }; + return Get_Range_Of_Identifier(range, ids); } -/****************************** End hack. ***********************************/ #define EXTERNALIZED_PREFIX "klpe_" #define RENAME_PREFIX "klpp_" @@ -71,46 +90,13 @@ /* IntervalTree. */ using namespace Intervals; -// For debugging purposes. -#if 0 -extern "C" void Debug_Range(const SourceRange &range) -{ - llvm::outs() << PrettyPrint::Get_Source_Text_Raw(range) << '\n'; -} - -#define DEBUG_RANGE(x) do { \ - llvm::outs() << "line " << __LINE__ << " "; \ - Debug_Range(x); \ - } while (0) -#endif - -/** Define a Visitor just to update TypeOfType instances: - * - * Kernel code sometime does things like this: - * - * typeof(symbol_to_externalize) x; - * - * this bizarre constructs come from macros, which sometimes explodes because - * clang-extract is unable to determine which part of it generates the Decl - * in question. - * - * Now we have to update those typeofs, but there is no easy way of parsing Types - * to get to the Expr. But the RecursiveASTVisitor knows how to do it, so we - * hack the class in order to setup a call to - * FunctionUpdate::Update_References_To_Symbol and update those references. - * - */ -class TypeUpdaterVisitor : public RecursiveASTVisitor<TypeUpdaterVisitor> +class ExternalizerVisitor: public RecursiveASTVisitor<ExternalizerVisitor> { public: /* Constructor. Should match the FunctionUpdate constructor so we can instantiate it in the Visitor. */ - TypeUpdaterVisitor(SymbolExternalizer &se, ValueDecl *new_decl, - const std::string &old_decl_name, bool wrap) - : SE(se), - NewSymbolDecl(new_decl), - OldSymbolName(old_decl_name), - Wrap(wrap) + ExternalizerVisitor(SymbolExternalizer &se) + : SE(se) {} enum { @@ -118,95 +104,150 @@ VISITOR_STOP = false, // Return this for the AST tranversal to stop completely; }; - /* The updator method. This will be called by the Visitor when traversing the - code. */ - bool VisitTypeOfExprType(TypeOfExprType *type) + /* This method will be used by LLVM whenever it finds a symbol declaration, + * being a variable or a method/function. + */ + bool VisitDeclaratorDecl(DeclaratorDecl *decl) { - /* Create a instance of our tested-in-battle FunctionUpdater... */ - SymbolExternalizer::FunctionUpdater fu(SE, NewSymbolDecl, OldSymbolName, Wrap); - - /* ... and call the method which updates the body of a function. (but that - is not a function! But who cares, and Expr is a special type of Stmt - in clang so everything works! */ - fu.Update_References_To_Symbol(type->getUnderlyingExpr()); + /* As we are dealing with variables that should be externalized, we are only + * interested in the symbols that are present in the hash. If the symbol + * found is not in the hash, just continue to the next symbol. + */ + SymbolUpdateStatus *sym = SE.getSymbolsUpdateStatus(decl->getName()); + if (sym == nullptr) + return VISITOR_CONTINUE; + + ExternalizationType type = sym->ExtType; + if (type == ExternalizationType::STRONG) { + if (!sym->Done) { + std::string sym_name = decl->getName().str(); + + /* If we found the first instance of the function we want to externalize, + then proceed to create and replace the function declaration node with + a variable declaration node of proper type. */ + const std::string new_name = EXTERNALIZED_PREFIX + sym_name; + sym->NewName = new_name; + DeclaratorDecl *new_decl = SE.Create_Externalized_Var(decl, new_name); + SE.Log.push_back({.OldName = sym_name, + .NewName = new_name, + .Type = type}); + + /* Create a string with the new variable type and name. */ + std::string o; + llvm::raw_string_ostream outstr(o); + + /* + * It won't be a problem to add the code below multiple times, since + * clang-extract will remove ifndefs for already defined macros + */ + if (SE.Ibt) { + outstr << "#ifndef KLP_RELOC_SYMBOL_POS\n" + "# define KLP_RELOC_SYMBOL_POS(LP_OBJ_NAME, SYM_OBJ_NAME, SYM_NAME, SYM_POS) \\\n" + " asm(\"\\\".klp.sym.rela.\" #LP_OBJ_NAME \".\" #SYM_OBJ_NAME \".\" #SYM_NAME \",\" #SYM_POS \"\\\"\")\n" + "# define KLP_RELOC_SYMBOL(LP_OBJ_NAME, SYM_OBJ_NAME, SYM_NAME) \\\n" + " KLP_RELOC_SYMBOL_POS(LP_OBJ_NAME, SYM_OBJ_NAME, SYM_NAME, 0)\n" + "#endif\n\n"; + } - return VISITOR_CONTINUE; - } + new_decl->print(outstr); - /* To handle symbol renames on situations like the sizeof below - * - * static char x[4]; - * - * void f(void) { - * char y[sizeof(x)]; - * } - */ - bool VisitArrayTypeLoc(const ArrayTypeLoc &type) - { - SymbolExternalizer::FunctionUpdater fu(SE, NewSymbolDecl, OldSymbolName, Wrap); - fu.Update_References_To_Symbol(type.getSizeExpr()); + if (SE.Ibt) { + std::string sym_mod = SE.IA.Get_Symbol_Module(sym_name); + if (sym_mod == "") + sym_mod = "vmlinux"; - return VISITOR_CONTINUE; - } + outstr << " \\\n" << "\tKLP_RELOC_SYMBOL(" << SE.PatchObject << ", " << + sym_mod << ", " << sym_name << ")"; + } + outstr << ";\n"; - private: + SE.Replace_Text(decl->getSourceRange(), outstr.str(), 1000); - /** A reference to SymbolExternalizer. */ - SymbolExternalizer &SE; + sym->Done = true; + sym->Wrap = !SE.Ibt; + } else { + /* If we externalized this function, then all further declarations of + this function shall be discarded. */ - /** The new variable declaration to replace the to be externalized function. */ - ValueDecl *NewSymbolDecl; + /* Get source location of old function declaration. */ + SE.Remove_Text(decl->getSourceRange(), 1000); + } + } else if (type == ExternalizationType::WEAK) { + /* Now checks if this is a function or a variable delcaration. */ + if (FunctionDecl *func = dyn_cast<FunctionDecl>(decl)) { + /* In the case it is a function we need to remove its declaration that + have a body. */ + if (func->hasBody()) { + FunctionDecl *with_body = func->getDefinition(); + if (with_body != func) + SE.Remove_Text(with_body->getSourceRange(), 1000); + } + } + } else if (type == ExternalizationType::RENAME) { + /* Get SourceRange where the function identifier is. */ + auto ids = Get_Range_Of_Identifier(decl->getSourceRange(), + decl->getName()); + assert(ids.size() > 0 && "Decl name do not match required identifier?"); + + SourceRange id_range = ids[0].second; + const std::string new_name = RENAME_PREFIX + decl->getName().str(); + sym->NewName = new_name; + if (!sym->Done) { + /* Only register the first decl rename of the same variable. */ + SE.Log.push_back({.OldName = decl->getName().str(), + .NewName = new_name, + .Type = type}); + sym->Done = true; + } - /** Name of the to be replaced function. */ - const std::string &OldSymbolName; + /* In the case there is a `static` modifier in function, try to drop it. */ + if (FunctionDecl *fdecl = dyn_cast<FunctionDecl>(decl)) { + SE.Drop_Static(fdecl); + } - bool Wrap; -}; + /* Replace text content of old declaration. */ + SE.Replace_Text(id_range, new_name, 100); -static std::vector<SourceRange> -Get_Range_Of_Identifier_In_SrcRange(const SourceRange &range, const char *id) -{ - std::vector<SourceRange> ret = {}; - StringRef string = PrettyPrint::Get_Source_Text(range); + sym->Wrap = false; + } - /* Tokenize away the function-like macro stuff or expression, we only want - the identifier. */ - const char *token_vector = " ().,;+-*/^|&{}[]<>^&|\r\n\t"; + return VISITOR_CONTINUE; + } - /* Create temporary buff, strtok modifies it. */ - unsigned len = string.size(); - char buf[len + 1]; - memcpy(buf, string.data(), len); - buf[len] = '\0'; + /* This visitor will be called by LLVM whenever a symbol is being referenced, being it a + * variable or a function call. We are only interested in the symbols that are + * present in the SymbolsMap. Once found, it updates the code to the new symbol externalized + * name of the symbol. */ + bool VisitDeclRefExpr(DeclRefExpr *expr) + { + SourceLocation begin = expr->getBeginLoc(); + SourceLocation end = expr->getEndLoc(); + SourceRange range(begin, end); + const StringRef &sym_name = PrettyPrint::Get_Source_Text(range); + SymbolUpdateStatus *sym = SE.getSymbolsUpdateStatus(sym_name); - char *tok = strtok(buf, token_vector); - while (tok != nullptr) { - if (strcmp(tok, id) == 0) { - /* Found. */ - ptrdiff_t distance = (ptrdiff_t)(tok - buf); - assert(distance >= 0); + /* + * Only execute the code in the visitor if we have already externalized the + * symbol + */ + if (sym == nullptr || !sym->Done) + return VISITOR_CONTINUE; - /* Compute the distance from the original SourceRange of the - MacroExpansion. */ - int32_t offset = (int32_t) distance; - SourceLocation start = range.getBegin().getLocWithOffset(offset); - SourceLocation end = start.getLocWithOffset(strlen(tok)-1); + ValueDecl *decl = expr->getDecl(); - /* Add to the list of output. */ - ret.push_back(SourceRange(start, end)); + if (decl->getName() == sym_name) { + /* Issue a text modification. */ + SE.Replace_Text(range, sym->getUseName(), 100); } - tok = strtok(nullptr, token_vector); + return VISITOR_CONTINUE; } - return ret; -} + private: -static std::vector<SourceRange> -Get_Range_Of_Identifier_In_SrcRange(const SourceRange &range, const StringRef id) -{ - return Get_Range_Of_Identifier_In_SrcRange(range, id.str().c_str()); -} + /** A reference to SymbolExternalizer. */ + SymbolExternalizer &SE; +}; static SourceRange Get_Range_For_Rewriter(const ASTUnit *ast, const SourceRange &range) { @@ -234,10 +275,10 @@ bool SymbolExternalizer::Drop_Static(FunctionDecl *decl) { if (decl->isStatic()) { - auto ids = Get_Range_Of_Identifier_In_SrcRange(decl->getSourceRange(), "static"); + auto ids = Get_Range_Of_Identifier(decl->getSourceRange(), StringRef("static")); assert(ids.size() > 0 && "static decl without static keyword?"); - SourceRange static_range = ids[0]; + SourceRange static_range = ids[0].second; Remove_Text(static_range, 10); /* Update the storage class. */ @@ -477,86 +518,6 @@ /* ---- End of Deltas class -------- */ -bool SymbolExternalizer::FunctionUpdater::Update_References_To_Symbol(Stmt *stmt) -{ - if (!stmt) - return false; - - bool replaced = false; - - if (DeclRefExpr::classof(stmt)) { - DeclRefExpr *expr = (DeclRefExpr *) stmt; - ValueDecl *decl = expr->getDecl(); - - /* In case we modified the Identifier of the original function, getName() - will return the name of the new function but the SourceText will not - be updated. Hence check if the SourceRange has it as well. */ - auto vec_of_ranges = Get_Range_Of_Identifier_In_SrcRange(expr->getSourceRange(), - OldSymbolName.c_str()); - StringRef old_name_src_txt = ""; - if (!vec_of_ranges.empty()) { - old_name_src_txt = PrettyPrint::Get_Source_Text(vec_of_ranges[0]); - } - - if (decl->getName() == OldSymbolName || old_name_src_txt == OldSymbolName) { - /* Rewrite the source code. */ - SourceLocation begin = expr->getBeginLoc(); - SourceLocation end = expr->getEndLoc(); - - SourceRange range(begin, end); - StringRef str = PrettyPrint::Get_Source_Text(range); - - /* Ensure that we indeed got the old symbol. */ - if (str == OldSymbolName) { - /* Prepare the text modification. */ - std::string new_name; - if (Wrap) { - new_name = NewSymbolDecl->getName().str(); - } else { - new_name = "(*" + NewSymbolDecl->getName().str() + ")"; - } - - /* Issue a text modification. */ - SE.Replace_Text(range, new_name, 100); - } else { - /* If we did not get the old symbol, it mostly means that the - references comes from a macro. */ - - //std::cout << "WARNING: Unable to find location of symbol name: " << OldSymbolName << '\n'; - } - - /* Replace reference with the rewiten name. */ - expr->setDecl(NewSymbolDecl); - replaced = true; - } - } - - /* Repeat the process to child statements. */ - clang::Stmt::child_iterator it, it_end; - for (it = stmt->child_begin(), it_end = stmt->child_end(); - it != it_end; ++it) { - - Stmt *child = *it; - replaced |= Update_References_To_Symbol(child); - } - - return replaced; -} - -bool SymbolExternalizer::FunctionUpdater::Update_References_To_Symbol(DeclaratorDecl *to_update) -{ - ToUpdate = to_update; - if (to_update) { - if (VarDecl *vdecl = dyn_cast<VarDecl>(to_update)) { - return Update_References_To_Symbol(vdecl->getInit()); - } - if (FunctionDecl *fdecl = dyn_cast<FunctionDecl>(to_update)) { - return Update_References_To_Symbol(fdecl->getBody()); - } - } - return false; -} - void SymbolExternalizer::Replace_Text(const SourceRange &range, StringRef new_name, int prio) { SourceRange rw_range = Get_Range_For_Rewriter(AST, range); @@ -732,28 +693,18 @@ return std::string(main_buf.begin(), main_buf.end()); } -void SymbolExternalizer::Strongly_Externalize_Symbol(const std::string &to_externalize) -{ - _Externalize_Symbol(to_externalize, ExternalizationType::STRONG); -} - -void SymbolExternalizer::Weakly_Externalize_Symbol(const std::string &to_externalize) -{ - _Externalize_Symbol(to_externalize, ExternalizationType::WEAK); -} - /** Given a MacroExpansion object, we try to get the location of where the token appears on it. TODO: clang may provide a way of doing this with a tokenizer, so maybe this code can become cleaner with it. */ -static std::vector<SourceRange> -Get_Range_Of_Identifier_In_Macro_Expansion(const MacroExpansion *exp, const char *id) +std::vector<std::pair<std::string, SourceRange>> +SymbolExternalizer::Get_Range_Of_Identifier_In_Macro_Expansion(const MacroExpansion *exp) { - return Get_Range_Of_Identifier_In_SrcRange(exp->getSourceRange(), id); + return Get_Range_Of_Identifier(exp->getSourceRange(), SymbolsMap); } -void SymbolExternalizer::Rewrite_Macros(std::string const &to_look_for, std::string const &replace_with) +void SymbolExternalizer::Rewrite_Macros(void) { PreprocessingRecord *rec = AST->getPreprocessor().getPreprocessingRecord(); @@ -772,236 +723,68 @@ MacroInfo *maybe_macro = MW.Get_Macro_Info(id_info, def->getLocation()); if (!maybe_macro && !MacroWalker::Is_Identifier_Macro_Argument(info, id_info)) { - if (id_info->getName() == to_look_for) { - Replace_Text(SourceRange(tok.getLocation(), tok.getLastLoc()), replace_with, 10); - } + SymbolUpdateStatus *sym = getSymbolsUpdateStatus(id_info->getName()); + if (sym) + Replace_Text(SourceRange(tok.getLocation(), tok.getLastLoc()), sym->getUseName(), 10); } } } else if (MacroExpansion *exp = dyn_cast<MacroExpansion>(entity)) { /* We must look for references to externalized variables in funcion-like macro expansions on the program's toplevel. */ - auto ranges = Get_Range_Of_Identifier_In_Macro_Expansion(exp, to_look_for.c_str()); + auto ranges = Get_Range_Of_Identifier_In_Macro_Expansion(exp); - for (SourceRange &tok_range : ranges) { - Replace_Text(tok_range, replace_with, 10); + for (auto &tok_range : ranges) { + // At this point, tok_range will contain a valid symbol + SymbolUpdateStatus *sym = getSymbolsUpdateStatus(tok_range.first); + Replace_Text(tok_range.second, sym->getUseName(), 10); } } } } -bool SymbolExternalizer::_Externalize_Symbol(const std::string &to_externalize, - ExternalizationType type) +enum ExternalizationType SymbolExternalizer::Get_Symbol_Ext_Type(const std::string &to_externalize) { - ASTUnit::top_level_iterator it; - bool first = true; - ValueDecl *new_decl = nullptr; - bool must_update = false; - bool wrap = false; - bool externalized = false; - - /* The TopLevelDecls attribute from the AST is private, but we need to - access that in order to remove nodes from the AST toplevel vector, - else we can't remove further declarations of the function we need - to externalize. */ - std::vector<Decl *> *topleveldecls = Get_Pointer_To_Toplev(AST); - - for (it = AST->top_level_begin(); it != AST->top_level_end(); ++it) { - DeclaratorDecl *decl = dynamic_cast<DeclaratorDecl *>(*it); - - /* If we externalized some function, then we must start analyzing for further - functions in order to find if there is a reference to the function we - externalized. */ - if (must_update) { - /* Call our hack to update the TypeOfTypes. */ - TypeUpdaterVisitor(*this, new_decl, to_externalize, wrap || Ibt) - .TraverseDecl(decl); - - FunctionUpdater(*this, new_decl, to_externalize, wrap || Ibt) - .Update_References_To_Symbol(decl); - } - - if (decl && decl->getName() == to_externalize) { - if (type == ExternalizationType::STRONG) { - if (first) { - /* If we found the first instance of the function we want to externalize, - then proceed to create and replace the function declaration node with - a variable declaration node of proper type. */ - std::string old_name = decl->getName().str(); - std::string new_name = EXTERNALIZED_PREFIX + old_name; - new_decl = Create_Externalized_Var(decl, new_name); - Log.push_back({.OldName = old_name, - .NewName = new_name, - .Type = ExternalizationType::STRONG}); - - /* Create a string with the new variable type and name. */ - std::string o; - llvm::raw_string_ostream outstr(o); - - /* - * It won't be a problem to add the code below multiple times, since - * clang-extract will remove ifndefs for already defined macros - */ - if (Ibt) { - outstr << "#ifndef KLP_RELOC_SYMBOL_POS\n" - "# define KLP_RELOC_SYMBOL_POS(LP_OBJ_NAME, SYM_OBJ_NAME, SYM_NAME, SYM_POS) \\\n" - " asm(\"\\\".klp.sym.rela.\" #LP_OBJ_NAME \".\" #SYM_OBJ_NAME \".\" #SYM_NAME \",\" #SYM_POS \"\\\"\")\n" - "# define KLP_RELOC_SYMBOL(LP_OBJ_NAME, SYM_OBJ_NAME, SYM_NAME) \\\n" - " KLP_RELOC_SYMBOL_POS(LP_OBJ_NAME, SYM_OBJ_NAME, SYM_NAME, 0)\n" - "#endif\n\n"; - } - - new_decl->print(outstr, AST->getLangOpts()); - - if (Ibt) { - std::string sym_mod = IA.Get_Symbol_Module(old_name); - if (sym_mod == "") - sym_mod = "vmlinux"; - - outstr << " \\\n" << "\tKLP_RELOC_SYMBOL(" << PatchObject << ", " << - sym_mod << ", " << old_name << ")"; - } - outstr << ";\n"; - - Replace_Text(decl->getSourceRange(), outstr.str(), 1000); - - must_update = true; - wrap = false; - - std::string replacement = "(*" + new_decl->getName().str() + ")"; - /* - * IBT uses extern variables, so we need to use the same type from the - * private symbol. - */ - if (Ibt) - replacement = new_decl->getName().str(); - - /* Update any macros that may reference the symbol. */ - Rewrite_Macros(to_externalize, replacement); - - /* Slaps the new node into the position of where was the function - to be externalized. */ - *it = new_decl; - first = false; - externalized = true; - - } else { - /* If we externalized this function, then all further delcarations of - this function shall be discarded. */ - - /* Get source location of old function declaration. */ - Remove_Text(decl->getSourceRange(), 1000); - - /* Remove node from AST. */ - topleveldecls->erase(it); - /* We must decrease the iterator because we deleted an element from the - vector. */ - it--; - } - } else if (type == ExternalizationType::WEAK) { - /* Now checks if this is a function or a variable delcaration. */ - if (FunctionDecl *func = dyn_cast<FunctionDecl>(decl)) { - /* In the case it is a function we need to remove its declaration that - have a body. */ - if (func->hasBody()) { - FunctionDecl *with_body = func->getDefinition(); - externalized = true; - if (with_body == func) { - /* Damn. This function do not have a prototype, we will have to - craft it ourself. */ - - /* FIXME: This reults in unwanted intersections. */ -#if 0 - Stmt *body = with_body->getBody(); - Replace_Text(body->getSourceRange(), ";\n", 1000); - - /* Remove the body from the AST. */ - with_body->setBody(nullptr); -#endif - } else { - Remove_Text(with_body->getSourceRange(), 1000); - topleveldecls->erase(it); - - /* We must decrease the iterator because we deleted an element from the - vector. */ - it--; - } - } - } - } else if (type == ExternalizationType::RENAME) { - /* Get SourceRange where the function identifier is. */ - auto ids = Get_Range_Of_Identifier_In_SrcRange(decl->getSourceRange(), - decl->getName()); - assert(ids.size() > 0 && "Decl name do not match required identifier?"); - - SourceRange id_range = ids[0]; - std::string new_name = RENAME_PREFIX + decl->getName().str(); - if (first) { - /* Only register the first decl rename of the same variable. */ - Log.push_back({.OldName = decl->getName().str(), - .NewName = new_name, - .Type = ExternalizationType::RENAME}); - first = false; - } - - /* In the case there is a `static` modifier in function, try to drop it. */ - if (FunctionDecl *fdecl = dyn_cast<FunctionDecl>(decl)) { - Drop_Static(fdecl); - } + /* If the symbol is available in the debuginfo and is an EXTERN symbol, we + do not need to rewrite it, but rather we need to erase any declaration + with body of it. */ + if (IA.Can_Decide_Visibility()) { + if (IA.Is_Externally_Visible(to_externalize)) + return ExternalizationType::WEAK; - /* Rename the declaration. */ - IdentifierInfo *new_id = AST->getPreprocessor().getIdentifierInfo(new_name); - DeclarationName new_decl_name(new_id); - - decl->setDeclName(new_decl_name); - new_decl = decl; - - /* Replace text content of old declaration. */ - Replace_Text(id_range, new_name, 100); - - must_update = true; - wrap = true; - - /* Update any macros that may reference the symbol. */ - Rewrite_Macros(to_externalize, new_name); - externalized = true; - } - } + return ExternalizationType::STRONG; } - return externalized; + /* Well, we don't have information so we simply strongly externalize + everything. */ + return ExternalizationType::STRONG; } -void SymbolExternalizer::Externalize_Symbol(const std::string &to_externalize) +SymbolUpdateStatus *SymbolExternalizer::getSymbolsUpdateStatus(const StringRef &sym) { - /* If the symbol is available in the debuginfo and is an EXTERN symbol, we - do not need to rewrite it, but rather we need to erase any declaration - with body of it. */ - if (IA.Can_Decide_Visibility()) { - if (IA.Is_Externally_Visible(to_externalize)) { - Weakly_Externalize_Symbol(to_externalize); - } else { - Strongly_Externalize_Symbol(to_externalize); - } - } else { - /* Well, we don't have information so we simply strongly externalize - everything. */ - Strongly_Externalize_Symbol(to_externalize); - } + auto ret = SymbolsMap.find(sym); + if (ret == SymbolsMap.end()) + return nullptr; + + return &ret->second; } -void SymbolExternalizer::Externalize_Symbols(std::vector<std::string> const &to_externalize_array) +void SymbolExternalizer::Externalize_Symbols(std::vector<std::string> const &to_externalize_array, + std::vector<std::string> &to_rename_array) { for (const std::string &to_externalize : to_externalize_array) { - Externalize_Symbol(to_externalize); + SymbolsMap.insert({to_externalize, SymbolUpdateStatus(Get_Symbol_Ext_Type(to_externalize))}); } -} -void SymbolExternalizer::Rename_Symbols(std::vector<std::string> &to_rename_array) -{ for (std::string &to_externalize : to_rename_array) { - if (_Externalize_Symbol(to_externalize, ExternalizationType::RENAME)) { - /* Update the function names for the ClosurePass. */ - to_externalize = RENAME_PREFIX + to_externalize; - } + SymbolsMap.insert({to_externalize, SymbolUpdateStatus(ExternalizationType::RENAME)}); + to_externalize = RENAME_PREFIX + to_externalize; } + + /* Start traversing the AST to find all references to the symbols that we want + * to externalize or rename. */ + ExternalizerVisitor(*this).TraverseDecl(AST->getASTContext().getTranslationUnitDecl()); + + /* Search for all macros and macro expansions and rewrite them using the new + * names for the externalized variables. */ + Rewrite_Macros(); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clang-extract-0~20240619.f7c935b/libcextract/SymbolExternalizer.hh new/clang-extract-0~20240703.0b3e33c/libcextract/SymbolExternalizer.hh --- old/clang-extract-0~20240619.f7c935b/libcextract/SymbolExternalizer.hh 2024-06-19 21:48:44.000000000 +0200 +++ new/clang-extract-0~20240703.0b3e33c/libcextract/SymbolExternalizer.hh 2024-07-03 21:24:37.000000000 +0200 @@ -20,6 +20,7 @@ #include <clang/Tooling/Tooling.h> #include <clang/Rewrite/Core/Rewriter.h> +#include "llvm/ADT/StringMap.h" using namespace clang; @@ -30,6 +31,48 @@ ExternalizationType Type; }; +/* Contains the context of an externalized symbol. It's necessary when the + * symbol is being handled in different visitors to know why/if it was already + * externalized. */ +struct SymbolUpdateStatus +{ + + SymbolUpdateStatus(std::string new_name, ExternalizationType ext, bool done, bool wrap) : + NewName(new_name), + ExtType(ext), + Done(done), + Wrap(wrap) + { + } + + SymbolUpdateStatus(ExternalizationType ext) : + NewName(""), + ExtType(ext), + Done(false), + Wrap(false) + { + } + + /* The name that was used when the symbol was externalized. */ + std::string NewName; + + /* The type of the externalization: STRONG, WEAK of RENAME. */ + ExternalizationType ExtType; + + /* Set when the symbol was externalized. */ + bool Done; + + /* Set when the symbol usage should be dereferenced when used. */ + bool Wrap; + + /* Get the correct name used when the code was transformed */ + std::string getUseName(void) + { + return Wrap ? "(*" + NewName + ")" + : NewName; + } +}; + /** Text Modification class wrapping Clang's Rewriter. * * Clang has a Rewriter class in order to issue Text Modifications into the @@ -192,59 +235,33 @@ TM(ast, dump), IA(ia), Ibt(ibt), - PatchObject(patch_object) + PatchObject(patch_object), + SymbolsMap({}) { } - class FunctionUpdater - { - public: - /** A reference to SymbolExternalizer. */ - SymbolExternalizer &SE; - - /** The new variable declaration to replace the to be externalized function. */ - ValueDecl *NewSymbolDecl; - - /** Name of the to be replaced function. */ - const std::string &OldSymbolName; - - FunctionUpdater(SymbolExternalizer &se, ValueDecl *new_decl, - const std::string &old_decl_name, bool wrap) - : SE(se), - NewSymbolDecl(new_decl), - OldSymbolName(old_decl_name), - Wrap(wrap) - {} - - /** Sweeps the function and update any reference to the old function, replacing - it with the externalized variable. */ - bool Update_References_To_Symbol(DeclaratorDecl *to_update); - bool Update_References_To_Symbol(Stmt *); - - private: - - /* Decl to update. */ - DeclaratorDecl *ToUpdate; - - /* Do we need to wrap the use in (*name)? */ - bool Wrap; - }; - friend class FunctionUpdater; + friend class ExternalizerVisitor; /* Create the externalized var as a AST node ready to be slapped into the AST. */ VarDecl *Create_Externalized_Var(DeclaratorDecl *decl, const std::string &name); - /** Externalize a symbol, that means transforming functions into a function - pointer, or an global variable into a variable pointer. */ - void Externalize_Symbol(DeclaratorDecl *to_externalize); - void Externalize_Symbol(const std::string &to_externalize); - void Externalize_Symbols(std::vector<std::string> const &to_externalize_array); + /* Externalize a symbol, that means transforming functions into a function + pointer, or an global variable into a variable pointer. + + WARNING: Modifies the to_rename_array vector. */ + void Externalize_Symbols(std::vector<std::string> const &to_externalize_array, + std::vector<std::string> &to_rename_array); + inline void Externalize_Symbols(std::vector<std::string> const &to_externalize_array) + { + std::vector<std::string> empty = {}; + Externalize_Symbols(to_externalize_array, empty); + } - /* WARNING: Modifies the given vector. */ - void Rename_Symbols(std::vector<std::string> &to_rename_array); + std::vector<std::pair<std::string, SourceRange>> + Get_Range_Of_Identifier_In_Macro_Expansion(const MacroExpansion *exp); - bool _Externalize_Symbol(const std::string &to_externalize, ExternalizationType type); + SymbolUpdateStatus *getSymbolsUpdateStatus(const StringRef &sym); /* Drop `static` keyword in decl. */ bool Drop_Static(FunctionDecl *decl); @@ -265,11 +282,9 @@ private: - void Strongly_Externalize_Symbol(const std::string &to_externalize); - void Weakly_Externalize_Symbol(const std::string &to_externalize); - void _Externalize_Symbol(const std::string &to_externalize); + enum ExternalizationType Get_Symbol_Ext_Type(const std::string &to_externalize); - void Rewrite_Macros(std::string const &to_look_for, std::string const &replace_with); + void Rewrite_Macros(void); /* Issue a Text Replacement with a given `priority`. The priority will be used in case that there are two replacements to the same piece of text. */ @@ -297,4 +312,7 @@ /* Name of the object that will be patched. */ std::string PatchObject; + + /** Symbols and its externalization type */ + llvm::StringMap<SymbolUpdateStatus> SymbolsMap; }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clang-extract-0~20240619.f7c935b/testsuite/lib/libtest.py new/clang-extract-0~20240703.0b3e33c/testsuite/lib/libtest.py --- old/clang-extract-0~20240619.f7c935b/testsuite/lib/libtest.py 2024-06-19 21:48:44.000000000 +0200 +++ new/clang-extract-0~20240703.0b3e33c/testsuite/lib/libtest.py 2024-07-03 21:24:37.000000000 +0200 @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os +import platform import pathlib import re import signal @@ -60,6 +61,7 @@ self.skip_silently = self.should_skip_test_silently() self.no_debuginfo = self.without_debuginfo() self.no_ipa_clones = self.without_ipaclones() + self.skip_on_archs = self.should_skip_test_on_archs() self.binaries_path = binaries_path @@ -171,6 +173,17 @@ return False + # The architectures should be specified with speaces between them, e.g. + # /* { dg-skip-on-archs "x86_64 ppc64le" }*/ + def should_skip_test_on_archs(self): + p = re.compile('{ *dg-skip-on-archs "(.*)" }') + matched = re.search(p, self.file_content) + if matched is not None: + matches = matched.group(1).split(' ') + return platform.processor() in matches + + return False + def without_debuginfo(self): p = re.compile('{ *dg-no-debuginfo *}') matched = re.search(p, self.file_content) @@ -346,6 +359,10 @@ clang_extract = self.binaries_path + 'clang-extract' ce_output_path = '/tmp/' + next(tempfile._get_candidate_names()) + '.CE.c' + if self.skip_on_archs: + self.print_result(77) + return 77 + command = [ clang_extract, '-DCE_OUTPUT_FILE=' + ce_output_path, self.test_path ] command.extend(self.options) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clang-extract-0~20240619.f7c935b/testsuite/small/attr-1.c new/clang-extract-0~20240703.0b3e33c/testsuite/small/attr-1.c --- old/clang-extract-0~20240619.f7c935b/testsuite/small/attr-1.c 2024-06-19 21:48:44.000000000 +0200 +++ new/clang-extract-0~20240703.0b3e33c/testsuite/small/attr-1.c 2024-07-03 21:24:37.000000000 +0200 @@ -1,5 +1,12 @@ /* { dg-options "-DCE_EXTRACT_FUNCTIONS=f -DCE_NO_EXTERNALIZATION" }*/ +/* { dg-skip-on-archs "s390x" }*/ +#ifdef __x86_64__ register unsigned long current_stack_pointer asm("rsp"); +#elif __aarch64__ +register unsigned long current_stack_pointer asm("sp"); +#elif __PPC64__ +register unsigned long current_stack_pointer asm("r1"); +#endif unsigned long f() { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clang-extract-0~20240619.f7c935b/testsuite/small/attr-6.c new/clang-extract-0~20240703.0b3e33c/testsuite/small/attr-6.c --- old/clang-extract-0~20240619.f7c935b/testsuite/small/attr-6.c 2024-06-19 21:48:44.000000000 +0200 +++ new/clang-extract-0~20240703.0b3e33c/testsuite/small/attr-6.c 2024-07-03 21:24:37.000000000 +0200 @@ -1,5 +1,12 @@ /* { dg-options "-DCE_EXTRACT_FUNCTIONS=f -DCE_NO_EXTERNALIZATION" }*/ +/* { dg-skip-on-archs "s390x" }*/ +#ifdef __x86_64__ #define REG "rsp" +#elif __aarch64__ +#define REG "sp" +#elif __PPC64__ +#define REG "r1" +#endif register unsigned long current_stack_pointer asm(REG); @@ -8,6 +15,6 @@ return current_stack_pointer; } -/* { dg-final { scan-tree-dump "#define REG \"rsp\"" } } */ +/* { dg-final { scan-tree-dump "#define REG \"(rsp|sp|r1)\"" } } */ /* { dg-final { scan-tree-dump "current_stack_pointer asm\(REG\)" } } */ /* { dg-final { scan-tree-dump "unsigned long f" } } */
participants (1)
-
Source-Sync