From 49011b52fcba02a6051957b84705159f52fae4e4 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 7 Oct 2010 16:32:35 +0000 Subject: Vendor import of llvm 2.8 release: http://llvm.org/svn/llvm-project/llvm/tags/RELEASE_28@115866 Approved by: rpaulo (mentor) --- Makefile.rules | 5 + autoconf/configure.ac | 2 +- configure | 20 +- docs/ReleaseNotes.html | 1011 ++++++++++++++++++++++++++--------- include/llvm/ADT/SmallVector.h | 2 +- lib/CodeGen/MachineCSE.cpp | 9 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 7 +- lib/Target/X86/X86ISelLowering.cpp | 15 +- 8 files changed, 781 insertions(+), 290 deletions(-) diff --git a/Makefile.rules b/Makefile.rules index 2e18c66e2b7a..9cff1053d0d9 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -941,6 +941,11 @@ ifdef EXPORTED_SYMBOL_FILE # First, set up the native export file, which may differ from the source # export file. +# The option --version-script is not effective on GNU ld win32. +ifneq (,$(filter $(HOST_OS),Cygwin MingW)) + HAVE_LINK_VERSION_SCRIPT := 0 +endif + ifeq ($(HOST_OS),Darwin) # Darwin convention prefixes symbols with underscores. NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).sed diff --git a/autoconf/configure.ac b/autoconf/configure.ac index de327449e23d..0596dd60d230 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -31,7 +31,7 @@ dnl=== dnl===-----------------------------------------------------------------------=== dnl Initialize autoconf and define the package name, version number and dnl email address for reporting bugs. -AC_INIT([[llvm]],[[2.8rc]],[llvmbugs@cs.uiuc.edu]) +AC_INIT([[llvm]],[[2.8]],[llvmbugs@cs.uiuc.edu]) dnl Provide a copyright substitution and ensure the copyright notice is included dnl in the output of --version option of the generated configure script. diff --git a/configure b/configure index d2b98bfe661f..776de364bb20 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.60 for llvm 2.8rc. +# Generated by GNU Autoconf 2.60 for llvm 2.8. # # Report bugs to . # @@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='llvm' PACKAGE_TARNAME='-llvm-' -PACKAGE_VERSION='2.8rc' -PACKAGE_STRING='llvm 2.8rc' +PACKAGE_VERSION='2.8' +PACKAGE_STRING='llvm 2.8' PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu' ac_unique_file="lib/VMCore/Module.cpp" @@ -1318,7 +1318,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures llvm 2.8rc to adapt to many kinds of systems. +\`configure' configures llvm 2.8 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1384,7 +1384,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of llvm 2.8rc:";; + short | recursive ) echo "Configuration of llvm 2.8:";; esac cat <<\_ACEOF @@ -1394,7 +1394,7 @@ Optional Features: --enable-optimized Compile with optimizations enabled (default is NO) --enable-profiling Compile with profiling enabled (default is NO) --enable-assertions Compile with assertion checks enabled (default is - YES) + NO) --enable-expensive-checks Compile with expensive debug checks enabled (default is NO) @@ -1533,7 +1533,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -llvm configure 2.8rc +llvm configure 2.8 generated by GNU Autoconf 2.60 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1549,7 +1549,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by llvm $as_me 2.8rc, which was +It was created by llvm $as_me 2.8, which was generated by GNU Autoconf 2.60. Invocation command line was $ $0 $@ @@ -21045,7 +21045,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by llvm $as_me 2.8rc, which was +This file was extended by llvm $as_me 2.8, which was generated by GNU Autoconf 2.60. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -21098,7 +21098,7 @@ Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -llvm config.status 2.8rc +llvm config.status 2.8 configured by $0, generated by GNU Autoconf 2.60, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index d346e1ccb1f3..9b6d5e847e94 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -3,6 +3,7 @@ + LLVM 2.8 Release Notes @@ -19,7 +20,6 @@
  • External Projects Using LLVM 2.8
  • What's New in LLVM 2.8?
  • Installation Instructions
  • -
  • Portability and Supported Platforms
  • Known Problems
  • Additional Information
  • @@ -28,11 +28,13 @@

    Written by the LLVM Team

    +
    @@ -66,23 +68,20 @@ current one. To see the release notes for a specific release, please see the Almost dead code. include/llvm/Analysis/LiveValues.h => Dan lib/Transforms/IPO/MergeFunctions.cpp => consider for 2.8. - llvm/Analysis/PointerTracking.h => Edwin wants this, consider for 2.8. GEPSplitterPass --> - - - + + +
    @@ -115,13 +114,32 @@ through expressive diagnostics, a high level of conformance to language standards, fast compilation, and low memory use. Like LLVM, Clang provides a modular, library-based architecture that makes it suitable for creating or integrating with other development tools. Clang is considered a -production-quality compiler for C and Objective-C on x86 (32- and 64-bit).

    +production-quality compiler for C, Objective-C, C++ and Objective-C++ on x86 +(32- and 64-bit), and for darwin-arm targets.

    In the LLVM 2.8 time-frame, the Clang team has made many improvements:

    -
      - -
    +
      +
    • Clang C++ is now feature-complete with respect to the ISO C++ 1998 and 2003 standards.
    • +
    • Added support for Objective-C++.
    • +
    • Clang now uses LLVM-MC to directly generate object code and to parse inline assembly (on Darwin).
    • +
    • Introduced many new warnings, including -Wmissing-field-initializers, -Wshadow, -Wno-protocol, -Wtautological-compare, -Wstrict-selector-match, -Wcast-align, -Wunused improvements, and greatly improved format-string checking.
    • +
    • Introduced the "libclang" library, a C interface to Clang intended to support IDE clients.
    • +
    • Added support for #pragma GCC visibility, #pragma align, and others.
    • +
    • Added support for SSE, ARM NEON, and Altivec.
    • +
    • Improved support for many Microsoft extensions.
    • +
    • Implemented support for blocks in C++.
    • +
    • Implemented precompiled headers for C++.
    • +
    • Improved abstract syntax trees to retain more accurate source information.
    • +
    • Added driver support for handling LLVM IR and bitcode files directly.
    • +
    • Major improvements to compiler correctness for exception handling.
    • +
    • Improved generated code quality in some areas: +
        +
      • Good code generation for X86-32 and X86-64 ABI handling.
      • +
      • Improved code generation for bit-fields, although important work remains.
      • +
      +
    • +
    @@ -138,27 +156,64 @@ production-quality compiler for C and Objective-C on x86 (32- and 64-bit).

    future!). The tool is very good at finding bugs that occur on specific paths through code, such as on error conditions.

    -

    In the LLVM 2.8 time-frame, +

    The LLVM 2.8 release fixes a number of bugs and slightly improves precision + over 2.7, but there are no major new features in the release.

    -The VMKit project is an implementation of -a JVM and a CLI Virtual Machine (Microsoft .NET is an -implementation of the CLI) using LLVM for static and just-in-time -compilation.

    +DragonEgg is a port of llvm-gcc to +gcc-4.5. Unlike llvm-gcc, dragonegg in theory does not require any gcc-4.5 +modifications whatsoever (currently one small patch is needed) thanks to the +new gcc plugin architecture. +DragonEgg is a gcc plugin that makes gcc-4.5 use the LLVM optimizers and code +generators instead of gcc's, just like with llvm-gcc. +

    -

    With the release of LLVM 2.8, ...

    +

    +DragonEgg is still a work in progress, but it is able to compile a lot of code, +for example all of gcc, LLVM and clang. Currently Ada, C, C++ and Fortran work +well, while all other languages either don't work at all or only work poorly. +For the moment only the x86-32 and x86-64 targets are supported, and only on +linux and darwin (darwin may need additional gcc patches). +

    + +

    +The 2.8 release has the following notable changes: +

      +
    • The plugin loads faster due to exporting fewer symbols.
    • +
    • Additional vector operations such as addps256 are now supported.
    • +
    • Ada global variables with no initial value are no longer zero initialized, +resulting in better optimization.
    • +
    • The '-fplugin-arg-dragonegg-enable-gcc-optzns' flag now runs all gcc +optimizers, rather than just a handful.
    • +
    • Fortran programs using common variables now link correctly.
    • +
    • GNU OMP constructs no longer crash the compiler.
    • +
    + + + +
    +

    +The VMKit project is an implementation of +a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and +just-in-time compilation. As of LLVM 2.8, VMKit now supports copying garbage +collectors, and can be configured to use MMTk's copy mark-sweep garbage +collector. In LLVM 2.8, the VMKit .NET VM is no longer being maintained. +

    +
    @@ -178,67 +233,91 @@ libgcc routines).

    All of the code in the compiler-rt project is available under the standard LLVM -License, a "BSD-style" license. New in LLVM 2.8: - -Soft float support -

    +License, a "BSD-style" license. New in LLVM 2.8, compiler_rt now supports +soft floating point (for targets that don't have a real floating point unit), +and includes an extensive testsuite for the "blocks" language feature and the +blocks runtime included in compiler_rt.

    -DragonEgg is a port of llvm-gcc to -gcc-4.5. Unlike llvm-gcc, which makes many intrusive changes to the underlying -gcc-4.2 code, dragonegg in theory does not require any gcc-4.5 modifications -whatsoever (currently one small patch is needed). This is thanks to the new -gcc plugin architecture, which -makes it possible to modify the behaviour of gcc at runtime by loading a plugin, -which is nothing more than a dynamic library which conforms to the gcc plugin -interface. DragonEgg is a gcc plugin that causes the LLVM optimizers to be run -instead of the gcc optimizers, and the LLVM code generators instead of the gcc -code generators, just like llvm-gcc. To use it, you add -"-fplugin=path/dragonegg.so" to the gcc-4.5 command line, and gcc-4.5 magically -becomes llvm-gcc-4.5! -

    +LLDB is a brand new member of the LLVM +umbrella of projects. LLDB is a next generation, high-performance debugger. It +is built as a set of reusable components which highly leverage existing +libraries in the larger LLVM Project, such as the Clang expression parser, the +LLVM disassembler and the LLVM JIT.

    -DragonEgg is still a work in progress. Currently C works very well, while C++, -Ada and Fortran work fairly well. All other languages either don't work at all, -or only work poorly. For the moment only the x86-32 and x86-64 targets are -supported, and only on linux and darwin (darwin needs an additional gcc patch). +LLDB is in early development and not included as part of the LLVM 2.8 release, +but is mature enough to support basic debugging scenarios on Mac OS X in C, +Objective-C and C++. We'd really like help extending and expanding LLDB to +support new platforms, new languages, new architectures, and new features.

    +
    + + + + +

    -2.8 status here. +libc++ is another new member of the LLVM +family. It is an implementation of the C++ standard library, written from the +ground up to specifically target the forthcoming C++'0X standard and focus on +delivering great performance.

    + +

    +As of the LLVM 2.8 release, libc++ is virtually feature complete, but would +benefit from more testing and better integration with Clang++. It is also +looking forward to the C++ committee finalizing the C++'0x standard.

    +

    -The LLVM Machine Code (aka MC) sub-project of LLVM was created to solve a number -of problems in the realm of assembly, disassembly, object file format handling, -and a number of other related areas that CPU instruction-set level tools work -in. It is a sub-project of LLVM which provides it with a number of advantages -over other compilers that do not have tightly integrated assembly-level tools. -For a gentle introduction, please see the Intro to the -LLVM MC Project Blog Post. +KLEE is a symbolic execution framework for +programs in LLVM bitcode form. KLEE tries to symbolically evaluate "all" paths +through the application and records state transitions that lead to fault +states. This allows it to construct testcases that lead to faults and can even +be used to verify some algorithms.

    -

    2.8 status here

    -
    +

    Although KLEE does not have any major new features as of 2.8, we have made +various minor improvements, particular to ease development:

    +
      +
    • Added support for LLVM 2.8. KLEE currently maintains compatibility with + LLVM 2.6, 2.7, and 2.8.
    • +
    • Added a buildbot for 2.6, 2.7, and trunk. A 2.8 buildbot will be coming + soon following release.
    • +
    • Fixed many C++ code issues to allow building with Clang++. Mostly + complete, except for the version of MiniSAT which is inside the KLEE STP + version.
    • +
    • Improved support for building with separate source and build + directories.
    • +
    • Added support for "long double" on x86.
    • +
    • Initial work on KLEE support for using 'lit' test runner instead of + DejaGNU.
    • +
    • Added configure support for using an external version of + STP.
    • +
    + + @@ -254,233 +333,607 @@ LLVM MC Project Blog Post. projects that have already been updated to work with LLVM 2.8.

    + + + +
    +

    +TCE is a toolset for designing +application-specific processors (ASP) based on the Transport triggered +architecture (TTA). The toolset provides a complete co-design flow from C/C++ +programs down to synthesizable VHDL and parallel program binaries. Processor +customization points include the register files, function units, supported +operations, and the interconnection network.

    + +

    TCE uses llvm-gcc/Clang and LLVM for C/C++ language support, target +independent optimizations and also for parts of code generation. It generates +new LLVM-based code generators "on the fly" for the designed TTA processors and +loads them in to the compiler backend as runtime libraries to avoid per-target +recompilation of larger parts of the compiler chain.

    - - - + + +
    +

    +Horizon is a bytecode +language and compiler written on top of LLVM, intended for producing +single-address-space managed code operating systems that +run faster than the equivalent multiple-address-space C systems. +More in-depth blurb is available on the wiki.

    -

    This release includes a huge number of bug fixes, performance tweaks and -minor improvements. Some of the major improvements and new features are listed -in this section. +

    + + + + +
    +

    +Clam AntiVirus is an open source (GPL) +anti-virus toolkit for UNIX, designed especially for e-mail scanning on mail +gateways. Since version 0.96 it has bytecode +signatures that allow writing detections for complex malware. It +uses LLVM's JIT to speed up the execution of bytecode on +X86, X86-64, PPC32/64, falling back to its own interpreter otherwise. +The git version was updated to work with LLVM 2.8.

    +

    The +ClamAV bytecode compiler uses Clang and LLVM to compile a C-like +language, insert runtime checks, and generate ClamAV bytecode.

    +
    +

    +Pure +is an algebraic/functional +programming language based on term rewriting. Programs are collections +of equations which are used to evaluate expressions in a symbolic +fashion. Pure offers dynamic typing, eager and lazy evaluation, lexical +closures, a hygienic macro system (also based on term rewriting), +built-in list and matrix support (including list and matrix +comprehensions) and an easy-to-use C interface. The interpreter uses +LLVM as a backend to JIT-compile Pure programs to fast native code.

    -

    In addition to changes to the code, between LLVM 2.7 and 2.8, a number of -organization changes have happened: -

    +

    Pure versions 0.44 and later have been tested and are known to work with +LLVM 2.8 (and continue to work with older LLVM releases >= 2.5).

    -
      -
    +

    +GHC is an open source, +state-of-the-art programming suite for +Haskell, a standard lazy functional programming language. It includes +an optimizing static compiler generating good code for a variety of +platforms, together with an interactive system for convenient, quick +development.

    -

    LLVM 2.8 includes several major new capabilities:

    +

    In addition to the existing C and native code generators, GHC 7.0 now +supports an LLVM +code generator. GHC supports LLVM 2.7 and later.

    -
      -
    • .
    • -
    +
    + + + + +
    +

    +Clay is a new systems programming +language that is specifically designed for generic programming. It makes +generic programming very concise thanks to whole program type propagation. It +uses LLVM as its backend.

    -

    LLVM IR has several new features for better support of new targets and that -expose new optimization opportunities:

    +

    +llvm-py has been updated to work +with LLVM 2.8. llvm-py provides Python bindings for LLVM, allowing you to write a +compiler backend or a VM in Python.

    -
      +
    -
  • LLVM 2.8 changes the internal order of operands in InvokeInst - and CallInst. - To be portable across releases, resort to CallSite and the - high-level accessors, such as getCalledValue and setUnwindDest. -
  • -
  • - You can no longer pass use_iterators directly to cast<> (and similar), because - these routines tend to perform costly dereference operations more than once. You - have to dereference the iterators yourself and pass them in. -
  • -
  • - llvm.memcpy.*, llvm.memset.*, llvm.memmove.* (and possibly other?) intrinsics - take an extra parameter now (i1 isVolatile), totaling 5 parameters. - If you were creating these intrinsic calls and prototypes yourself (as opposed - to using Intrinsic::getDeclaration), you can use UpgradeIntrinsicFunction/UpgradeIntrinsicCall - to be portable accross releases. - Note that you cannot use Intrinsic::getDeclaration() in a backwards compatible - way (needs 2/3 types now, in 2.7 it needed just 1). -
  • -
  • - SetCurrentDebugLocation takes a DebugLoc now instead of a MDNode. - Change your code to use - SetCurrentDebugLocation(DebugLoc::getFromDILocation(...)). -
  • -
  • - VISIBILITY_HIDDEN is gone. -
  • -
  • - The RegisterPass and RegisterAnalysisGroup templates are - considered deprecated, but continue to function in LLVM 2.8. Clients are - strongly advised to use the upcoming INITIALIZE_PASS() and - INITIALIZE_AG_PASS() macros instead. -
  • - SMDiagnostic takes different parameters now. //FIXME: how to upgrade? -
  • -
  • - The constructor for the Triple class no longer tries to understand odd triple - specifications. Frontends should ensure that they only pass valid triples to - LLVM. The Triple::normalize utility method has been added to help front-ends - deal with funky triples. -
  • - Some APIs got renamed: -
      -
    • llvm_report_error -> report_fatal_error
    • -
    • llvm_install_error_handler -> install_fatal_error_handler
    • -
    • llvm::DwarfExceptionHandling -> llvm::JITExceptionHandling
    • -
    -
  • - + + + + +
    +

    +FAUST is a compiled language for real-time +audio signal processing. The name FAUST stands for Functional AUdio STream. Its +programming model combines two approaches: functional programming and block +diagram composition. In addition with the C, C++, JAVA output formats, the +Faust compiler can now generate LLVM bitcode, and works with LLVM 2.7 and +2.8.

    +

    Jade +(Just-in-time Adaptive Decoder Engine) is a generic video decoder engine using +LLVM for just-in-time compilation of video decoder configurations. Those +configurations are designed by MPEG Reconfigurable Video Coding (RVC) committee. +MPEG RVC standard is built on a stream-based dataflow representation of +decoders. It is composed of a standard library of coding tools written in +RVC-CAL language and a dataflow configuration — block diagram — +of a decoder.

    -

    In addition to a large array of minor performance tweaks and bug fixes, this -release includes a few major enhancements and additions to the optimizers:

    +

    Jade project is hosted as part of the Open +RVC-CAL Compiler and requires it to translate the RVC-CAL standard library +of video coding tools into an LLVM assembly code.

    -
      +
    -
  • + + - +
    +

    Neko LLVM JIT +replaces the standard Neko JIT with an LLVM-based implementation. While not +fully complete, it is already providing a 1.5x speedup on 64-bit systems. +Neko LLVM JIT requires LLVM 2.8 or later.

    + + + +
    +

    +Crack aims to provide +the ease of development of a scripting language with the performance of a +compiled language. The language derives concepts from C++, Java and Python, +incorporating object-oriented programming, operator overloading and strong +typing. Crack 0.2 works with LLVM 2.7, and the forthcoming Crack 0.2.1 release +builds on LLVM 2.8.

    + +
    +

    +DTMC provides support for +Transactional Memory, which is an easy-to-use and efficient way to synchronize +accesses to shared memory. Transactions can contain normal C/C++ code (e.g., +__transaction { list.remove(x); x.refCount--; }) and will be executed +virtually atomically and isolated from other transactions.

    -
      -
    • +
    - + + + +
    +

    +Kai (Japanese 会 for +meeting/gathering) is an experimental interpreter that provides a highly +extensible runtime environment and explicit control over the compilation +process. Programs are defined using nested symbolic expressions, which are all +parsed into first-class values with minimal intrinsic semantics. Kai can +generate optimised code at run-time (using LLVM) in order to exploit the nature +of the underlying hardware and to integrate with external software libraries. +It is a unique exploration into world of dynamic code compilation, and the +interaction between high level and low level semantics.

    +

    +OSL is a shading +language designed for use in physically based renderers and in particular +production rendering. By using LLVM instead of the interpreter, it was able to +meet its performance goals (>= C-code) while retaining the benefits of +runtime specialization and a portable high-level language. +

    -

    We have put a significant amount of work into the code generator -infrastructure, which allows us to implement more aggressive algorithms and make -it run faster:

    +
    + + + + + + + +
    + +

    This release includes a huge number of bug fixes, performance tweaks and +minor improvements. Some of the major improvements and new features are listed +in this section. +

    -
      -
    • MachO writer works.
    • -
    -

    New features of the X86 target include: -

    -
      -
    • The X86 backend now supports holding X87 floating point stack values - in registers across basic blocks, dramatically improving performance of code - that uses long double, and when targetting CPUs that don't support SSE.
    • +

      LLVM 2.8 includes several major new capabilities:

      +
        +
      • As mentioned above, libc++ and LLDB are major new additions to the LLVM collective.
      • +
      • LLVM 2.8 now has pretty decent support for debugging optimized code. You + should be able to reliably get debug info for function arguments, assuming + that the value is actually available where you have stopped.
      • +
      • A new 'llvm-diff' tool is available that does a semantic diff of .ll + files.
      • +
      • The MC subproject has made major progress in this release. + Direct .o file writing support for darwin/x86[-64] is now reliable and + support for other targets and object file formats are in progress.
    -

    New features of the ARM target include: -

    +

    LLVM IR has several new features for better support of new targets and that +expose new optimization opportunities:

      +
    • The memcpy, memmove, and memset + intrinsics now take address space qualified pointers and a bit to indicate + whether the transfer is "volatile" or not. +
    • +
    • Per-instruction debug info metadata is much faster and uses less memory by + using the new DebugLoc class.
    • +
    • LLVM IR now has a more formalized concept of "trap values", which allow the optimizer + to optimize more aggressively in the presence of undefined behavior, while + still producing predictable results.
    • +
    • LLVM IR now supports two new linkage + types (linker_private_weak and linker_private_weak_def_auto) which map + onto some obscure MachO concepts.
    • +
    -
  • +
    + + + +
    + +

    In addition to a large array of minor performance tweaks and bug fixes, this +release includes a few major enhancements and additions to the optimizers:

    + +
      +
    • As mentioned above, the optimizer now has support for updating debug + information as it goes. A key aspect of this is the new llvm.dbg.value + intrinsic. This intrinsic represents debug info for variables that are + promoted to SSA values (typically by mem2reg or the -scalarrepl passes).
    • + +
    • The JumpThreading pass is now much more aggressive about implied value + relations, allowing it to thread conditions like "a == 4" when a is known to + be 13 in one of the predecessors of a block. It does this in conjunction + with the new LazyValueInfo analysis pass.
    • +
    • The new RegionInfo analysis pass identifies single-entry single-exit regions + in the CFG. You can play with it with the "opt -regions analyze" or + "opt -view-regions" commands.
    • +
    • The loop optimizer has significantly improved strength reduction and analysis + capabilities. Notably it is able to build on the trap value and signed + integer overflow information to optimize <= and >= loops.
    • +
    • The CallGraphSCCPassManager now has some basic support for iterating within + an SCC when a optimizer devirtualizes a function call. This allows inlining + through indirect call sites that are devirtualized by store-load forwarding + and other optimizations.
    • +
    • The new -loweratomic pass is available + to lower atomic instructions into their non-atomic form. This can be useful + to optimize generic code that expects to run in a single-threaded + environment.
    +
    +

    +The LLVM Machine Code (aka MC) subsystem was created to solve a number +of problems in the realm of assembly, disassembly, object file format handling, +and a number of other related areas that CPU instruction-set level tools work +in.

    -

    This release includes a number of new APIs that are used internally, which - may also be useful for external clients. -

    +

    The MC subproject has made great leaps in LLVM 2.8. For example, support for + directly writing .o files from LLC (and clang) now works reliably for + darwin/x86[-64] (including inline assembly support) and the integrated + assembler is turned on by default in Clang for these targets. This provides + improved compile times among other things.

      -
    • +
    • The entire compiler has converted over to using the MCStreamer assembler API + instead of writing out a .s file textually.
    • +
    • The "assembler parser" is far more mature than in 2.7, supporting a full + complement of directives, now supports assembler macros, etc.
    • +
    • The "assembler backend" has been completed, including support for relaxation + relocation processing and all the other things that an assembler does.
    • +
    • The MachO file format support is now fully functional and works.
    • +
    • The MC disassembler now fully supports ARM and Thumb. ARM assembler support + is still in early development though.
    • +
    • The X86 MC assembler now supports the X86 AES and AVX instruction set.
    • +
    • Work on ELF and COFF object files and ARM target support is well underway, + but isn't useful yet in LLVM 2.8. Please contact the llvmdev mailing list + if you're interested in this.
    +

    For more information, please see the Intro to the +LLVM MC Project Blog Post. +

    + +
    + + + + +
    + +

    We have put a significant amount of work into the code generator +infrastructure, which allows us to implement more aggressive algorithms and make +it run faster:

    + +
      +
    • The clang/gcc -momit-leaf-frame-pointer argument is now supported.
    • +
    • The clang/gcc -ffunction-sections and -fdata-sections arguments are now + supported on ELF targets (like GCC).
    • +
    • The MachineCSE pass is now tuned and on by default. It eliminates common + subexpressions that are exposed when lowering to machine instructions.
    • +
    • The "local" register allocator was replaced by a new "fast" register + allocator. This new allocator (which is often used at -O0) is substantially + faster and produces better code than the old local register allocator.
    • +
    • A new LLC "-regalloc=default" option is available, which automatically + chooses a register allocator based on the -O optimization level.
    • +
    • The common code generator code was modified to promote illegal argument and + return value vectors to wider ones when possible instead of scalarizing + them. For example, <3 x float> will now pass in one SSE register + instead of 3 on X86. This generates substantially better code since the + rest of the code generator was already expecting this.
    • +
    • The code generator uses a new "COPY" machine instruction. This speeds up + the code generator and eliminates the need for targets to implement the + isMoveInstr hook. Also, the copyRegToReg hook was renamed to copyPhysReg + and simplified.
    • +
    • The code generator now has a "LocalStackSlotPass", which optimizes stack + slot access for targets (like ARM) that have limited stack displacement + addressing.
    • +
    • A new "PeepholeOptimizer" is available, which eliminates sign and zero + extends, and optimizes away compare instructions when the condition result + is available from a previous instruction.
    • +
    • Atomic operations now get legalized into simpler atomic operations if not + natively supported, easing the implementation burden on targets.
    • +
    • We have added two new bottom-up pre-allocation register pressure aware schedulers: +
        +
      1. The hybrid scheduler schedules aggressively to minimize schedule length when registers are available and avoid overscheduling in high pressure situations.
      2. +
      3. The instruction-level-parallelism scheduler schedules for maximum ILP when registers are available and avoid overscheduling in high pressure situations.
      4. +
    • +
    • The tblgen type inference algorithm was rewritten to be more consistent and + diagnose more target bugs. If you have an out-of-tree backend, you may + find that it finds bugs in your target description. This support also + allows limited support for writing patterns for instructions that return + multiple results (e.g. a virtual register and a flag result). The + 'parallel' modifier in tblgen was removed, you should use the new support + for multiple results instead.
    • +
    • A new (experimental) "-rendermf" pass is available which renders a + MachineFunction into HTML, showing live ranges and other useful + details.
    • +
    • The new SubRegIndex tablegen class allows subregisters to be indexed + symbolically instead of numerically. If your target uses subregisters you + will need to adapt to use SubRegIndex when you upgrade to 2.8.
    • + + +
    • The -fast-isel instruction selection path (used at -O0 on X86) was rewritten + to work bottom-up on basic blocks instead of top down. This makes it + slightly faster (because the MachineDCE pass is not needed any longer) and + allows it to generate better code in some cases.
    • + +
    -

    Other miscellaneous features include:

    +

    New features and major changes in the X86 target include: +

      -
    • +
    • The X86 backend now supports holding X87 floating point stack values + in registers across basic blocks, dramatically improving performance of code + that uses long double, and when targeting CPUs that don't support SSE.
    • + +
    • The X86 backend now uses a SSEDomainFix pass to optimize SSE operations. On + Nehalem ("Core i7") and newer CPUs there is a 2 cycle latency penalty on + using a register in a different domain than where it was defined. This pass + optimizes away these stalls.
    • + +
    • The X86 backend now promotes 16-bit integer operations to 32-bits when + possible. This avoids 0x66 prefixes, which are slow on some + microarchitectures and bloat the code on all of them.
    • + +
    • The X86 backend now supports the Microsoft "thiscall" calling convention, + and a calling convention to support + ghc.
    • + +
    • The X86 backend supports a new "llvm.x86.int" intrinsic, which maps onto + the X86 "int $42" and "int3" instructions.
    • + +
    • At the IR level, the <2 x float> datatype is now promoted and passed + around as a <4 x float> instead of being passed and returned as an MMX + vector. If you have a frontend that uses this, please pass and return a + <2 x i32> instead (using bitcasts).
    • + +
    • When printing .s files in verbose assembly mode (the default for clang -S), + the X86 backend now decodes X86 shuffle instructions and prints human + readable comments after the most inscrutable of them, e.g.: + +
      +  insertps $113, %xmm3, %xmm0 # xmm0 = zero,xmm0[1,2],xmm3[1]
      +  unpcklps %xmm1, %xmm0       # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
      +  pshufd   $1, %xmm1, %xmm1   # xmm1 = xmm1[1,0,0,0]
      +
      +
    • +
    + + + +
    +

    New features of the ARM target include: +

    + +
      +
    • The ARM backend now optimizes tail calls into jumps.
    • +
    • Scheduling is improved through the new list-hybrid scheduler as well + as through better modeling of structural hazards.
    • +
    • Half float instructions are now + supported.
    • +
    • NEON support has been improved to model instructions which operate onto + multiple consecutive registers more aggressively. This avoids lots of + extraneous register copies.
    • +
    • The ARM backend now uses a new "ARMGlobalMerge" pass, which merges several + global variables into one, saving extra address computation (all the global + variables can be accessed via same base address) and potentially reducing + register pressure.
    • + +
    • The ARM has received many minor improvements and tweaks which lead to +substantially better performance in a wide range of different scenarios.
    • + +
    • The ARM NEON intrinsics have been substantially reworked to reduce + redundancy and improve code generation. Some of the major changes are: +
        +
      1. + All of the NEON load and store intrinsics (llvm.arm.neon.vld* and + llvm.arm.neon.vst*) take an extra parameter to specify the alignment in bytes + of the memory being accessed. +
      2. +
      3. + The llvm.arm.neon.vaba intrinsic (vector absolute difference and + accumulate) has been removed. This operation is now represented using + the llvm.arm.neon.vabd intrinsic (vector absolute difference) followed by a + vector add. +
      4. +
      5. + The llvm.arm.neon.vabdl and llvm.arm.neon.vabal intrinsics (lengthening + vector absolute difference with and without accumulation) have been removed. + They are represented using the llvm.arm.neon.vabd intrinsic (vector absolute + difference) followed by a vector zero-extend operation, and for vabal, + a vector add. +
      6. +
      7. + The llvm.arm.neon.vmovn intrinsic has been removed. Calls of this intrinsic + are now replaced by vector truncate operations. +
      8. +
      9. + The llvm.arm.neon.vmovls and llvm.arm.neon.vmovlu intrinsics have been + removed. They are now represented as vector sign-extend (vmovls) and + zero-extend (vmovlu) operations. +
      10. +
      11. + The llvm.arm.neon.vaddl*, llvm.arm.neon.vaddw*, llvm.arm.neon.vsubl*, and + llvm.arm.neon.vsubw* intrinsics (lengthening vector add and subtract) have + been removed. They are replaced by vector add and vector subtract operations + where one (vaddw, vsubw) or both (vaddl, vsubl) of the operands are either + sign-extended or zero-extended. +
      12. +
      13. + The llvm.arm.neon.vmulls, llvm.arm.neon.vmullu, llvm.arm.neon.vmlal*, and + llvm.arm.neon.vmlsl* intrinsics (lengthening vector multiply with and without + accumulation and subtraction) have been removed. These operations are now + represented as vector multiplications where the operands are either + sign-extended or zero-extended, followed by a vector add for vmlal or a + vector subtract for vmlsl. Note that the polynomial vector multiply + intrinsic, llvm.arm.neon.vmullp, remains unchanged. +
      14. +
      +
    • + +
    +
    +
    @@ -494,51 +947,129 @@ on LLVM 2.7, this section lists some "gotchas" that you may run into upgrading from the previous release.

      -
    • .ll file doesn't produce #uses comments anymore, to get them, run a .bc file - through "llvm-dis --show-annotations".
    • -
    • MSIL Backend removed.
    • -
    • ABCD and SSI passes removed.
    • -
    • 'Union' LLVM IR feature removed.
    • +
    • The build configuration machinery changed the output directory names. It + wasn't clear to many people that a "Release-Asserts" build was a release build + without asserts. To make this more clear, "Release" does not include + assertions and "Release+Asserts" does (likewise, "Debug" and + "Debug+Asserts").
    • +
    • The MSIL Backend was removed, it was unsupported and broken.
    • +
    • The ABCD, SSI, and SCCVN passes were removed. These were not fully + functional and their behavior has been or will be subsumed by the + LazyValueInfo pass.
    • +
    • The LLVM IR 'Union' feature was removed. While this is a desirable feature + for LLVM IR to support, the existing implementation was half baked and + barely useful. We'd really like anyone interested to resurrect the work and + finish it for a future release.
    • +
    • If you're used to reading .ll files, you'll probably notice that .ll file + dumps don't produce #uses comments anymore. To get them, run a .bc file + through "llvm-dis --show-annotations".
    • +
    • Target triples are now stored in a normalized form, and all inputs from + humans are expected to be normalized by Triple::normalize before being + stored in a module triple or passed to another library.
    + +

    In addition, many APIs have changed in this release. Some of the major LLVM API changes are:

    -
      +
    • LLVM 2.8 changes the internal order of operands in InvokeInst + and CallInst. + To be portable across releases, please use the CallSite class and the + high-level accessors, such as getCalledValue and + setUnwindDest. +
    • +
    • + You can no longer pass use_iterators directly to cast<> (and similar), + because these routines tend to perform costly dereference operations more + than once. You have to dereference the iterators yourself and pass them in. +
    • +
    • + llvm.memcpy.*, llvm.memset.*, llvm.memmove.* intrinsics take an extra + parameter now ("i1 isVolatile"), totaling 5 parameters, and the pointer + operands are now address-space qualified. + If you were creating these intrinsic calls and prototypes yourself (as opposed + to using Intrinsic::getDeclaration), you can use + UpgradeIntrinsicFunction/UpgradeIntrinsicCall to be portable across releases. +
    • +
    • + SetCurrentDebugLocation takes a DebugLoc now instead of a MDNode. + Change your code to use + SetCurrentDebugLocation(DebugLoc::getFromDILocation(...)). +
    • +
    • + The RegisterPass and RegisterAnalysisGroup templates are + considered deprecated, but continue to function in LLVM 2.8. Clients are + strongly advised to use the upcoming INITIALIZE_PASS() and + INITIALIZE_AG_PASS() macros instead. +
    • +
    • + The constructor for the Triple class no longer tries to understand odd triple + specifications. Frontends should ensure that they only pass valid triples to + LLVM. The Triple::normalize utility method has been added to help front-ends + deal with funky triples. +
    • + +
    • + Some APIs were renamed: +
        +
      • llvm_report_error -> report_fatal_error
      • +
      • llvm_install_error_handler -> install_fatal_error_handler
      • +
      • llvm::DwarfExceptionHandling -> llvm::JITExceptionHandling
      • +
      • VISIBILITY_HIDDEN -> LLVM_LIBRARY_VISIBILITY
      • +
      +
    • + +
    • + Some public headers were renamed: +
        +
      • llvm/Assembly/AsmAnnotationWriter.h was renamed + to llvm/Assembly/AssemblyAnnotationWriter.h +
      • +
    - - - -
    - Portability and Supported Platforms + + -
    -

    LLVM is known to work on the following platforms:

    +

    This section lists changes to the LLVM development infrastructure. This +mostly impacts users who actively work on LLVM or follow development on +mainline, but may also impact users who leverage the LLVM build infrastructure +or are interested in LLVM qualification.

      -
    • Intel and AMD machines (IA32, X86-64, AMD64, EMT-64) running Red Hat - Linux, Fedora Core, FreeBSD and AuroraUX (and probably other unix-like - systems).
    • -
    • PowerPC and X86-based Mac OS X systems, running 10.4 and above in 32-bit - and 64-bit modes.
    • -
    • Intel and AMD machines running on Win32 using MinGW libraries (native).
    • -
    • Intel and AMD machines running on Win32 with the Cygwin libraries (limited - support is available for native builds with Visual C++).
    • -
    • Sun x86 and AMD64 machines running Solaris 10, OpenSolaris 0906.
    • -
    • Alpha-based machines running Debian GNU/Linux.
    • +
    • The default for make check is now to use + the lit testing tool, which is + part of LLVM itself. You can use lit directly as well, or use + the llvm-lit tool which is created as part of a Makefile or CMake + build (and knows how to find the appropriate tools). See the lit + documentation and the blog + post, and PR5217 + for more information.
    • + +
    • The LLVM test-suite infrastructure has a new "simple" test format + (make TEST=simple). The new format is intended to require only a + compiler and not a full set of LLVM tools. This makes it useful for testing + released compilers, for running the test suite with other compilers (for + performance comparisons), and makes sure that we are testing the compiler as + users would see it. The new format is also designed to work using reference + outputs instead of comparison to a baseline compiler, which makes it run much + faster and makes it less system dependent.
    • + +
    • Significant progress has been made on a new interface to running the + LLVM test-suite (aka the LLVM "nightly tests") using + the LNT infrastructure. The LNT + interface to the test-suite brings significantly improved reporting + capabilities for monitoring the correctness and generated code quality + produced by LLVM over time.
    - -

    The core LLVM infrastructure uses GNU autoconf to adapt itself -to the machine and operating system on which it is built. However, minor -porting may be required to get LLVM to work on new platforms. We welcome your -portability patches and reports of successful builds or error messages.

    -
    @@ -554,18 +1085,6 @@ listed by component. If you run into a problem, please check the LLVM bug database and submit a bug if there isn't already one.

    -
      -
    • LLVM will not correctly compile on Solaris and/or OpenSolaris -using the stock GCC 3.x.x series 'out the box', -See: Broken versions of GCC and other tools. -However, A Modern GCC Build -for x86/x86-64 has been made available from the third party AuroraUX Project -that has been meticulously tested for bootstrapping LLVM & Clang.
    • -
    • There have been reports of Solaris and/or OpenSolaris build failures due -to an incompatibility in the nm program as well. The nm from binutils does seem -to work.
    • -
    -
    @@ -583,10 +1102,10 @@ components, please contact us on the LLVMdev list.

      -
    • The Alpha, SPU, MIPS, PIC16, Blackfin, MSP430, SystemZ and MicroBlaze - backends are experimental.
    • -
    • llc "-filetype=asm" (the default) is the only - supported value for this option. XXX Update me
    • +
    • The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, PIC16, SystemZ + and XCore backends are experimental.
    • +
    • llc "-filetype=obj" is experimental on all targets + other than darwin-i386 and darwin-x86_64.
    @@ -694,6 +1213,9 @@ appropriate nops inserted to ensure restartability.
    +

    The C backend has numerous problems and is not being actively maintained. +Depending on it for anything serious is not advised.

    +
    • The C backend has only basic support for inline assembly code.
    • @@ -709,69 +1231,28 @@ appropriate nops inserted to ensure restartability.
      -

      The only major language feature of GCC not supported by llvm-gcc is - the __builtin_apply family of builtins. However, some extensions - are only supported on some targets. For example, trampolines are only - supported on some targets (these are used when you take the address of a - nested function).

      +

      llvm-gcc is generally very stable for the C family of languages. The only + major language feature of GCC not supported by llvm-gcc is the + __builtin_apply family of builtins. However, some extensions + are only supported on some targets. For example, trampolines are only + supported on some targets (these are used when you take the address of a + nested function).

      -
      +

      Fortran support generally works, but there are still several unresolved bugs + in Bugzilla. Please see the + tools/gfortran component for details. Note that llvm-gcc is missing major + Fortran performance work in the frontend and library that went into GCC after + 4.2. If you are interested in Fortran, we recommend that you consider using + dragonegg instead.

      - - - -
      -
        -
      • Fortran support generally works, but there are still several unresolved bugs - in Bugzilla. Please see the - tools/gfortran component for details.
      • -
      -
      - - - - -
      -The llvm-gcc 4.2 Ada compiler works fairly well; however, this is not a mature -technology, and problems should be expected. -
        -
      • The Ada front-end currently only builds on X86-32. This is mainly due -to lack of trampoline support (pointers to nested functions) on other platforms. -However, it also fails to build on X86-64 -which does support trampolines.
      • -
      • The Ada front-end fails to bootstrap. -This is due to lack of LLVM support for setjmp/longjmp style -exception handling, which is used internally by the compiler. -Workaround: configure with --disable-bootstrap.
      • -
      • The c380004, c393010 -and cxg2021 ACATS tests fail -(c380004 also fails with gcc-4.2 mainline). -If the compiler is built with checks disabled then c393010 -causes the compiler to go into an infinite loop, using up all system memory.
      • -
      • Some GCC specific Ada tests continue to crash the compiler.
      • -
      • The -E binder option (exception backtraces) -does not work and will result in programs -crashing if an exception is raised. Workaround: do not use -E.
      • -
      • Only discrete types are allowed to start -or finish at a non-byte offset in a record. Workaround: do not pack records -or use representation clauses that result in a field of a non-discrete type -starting or finishing in the middle of a byte.
      • -
      • The lli interpreter considers -'main' as generated by the Ada binder to be invalid. -Workaround: hand edit the file to use pointers for argv and -envp rather than integers.
      • -
      • The -fstack-check option is -ignored.
      • -
      +

      The llvm-gcc 4.2 Ada compiler has basic functionality, but is no longer being +actively maintained. If you are interested in Ada, we recommend that you +consider using dragonegg instead.

      @@ -806,7 +1287,7 @@ lists.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-09-03 01:22:50 +0200 (Fri, 03 Sep 2010) $ + Last modified: $Date: 2010-10-04 22:41:06 +0200 (Mon, 04 Oct 2010) $ diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index 1d6181a95da3..fec6bcd628cc 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -61,7 +61,7 @@ protected: // number of union instances for the space, which guarantee maximal alignment. struct U { #ifdef __GNUC__ - char X __attribute__((aligned(8))); + char X __attribute__((aligned)); #else union { double D; diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 92e2299ec62f..272b54dea1fa 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -54,6 +54,11 @@ namespace { AU.addPreserved(); } + virtual void releaseMemory() { + ScopeMap.clear(); + Exps.clear(); + } + private: const unsigned LookAheadLimit; typedef ScopedHashTableScopehasOneUse(Reg)) + if (!MRI->hasOneNonDBGUse(Reg)) // Only coalesce single use copies. This ensure the copy will be // deleted. continue; @@ -469,6 +474,8 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { DenseMap ParentMap; DenseMap OpenChildren; + CurrVN = 0; + // Perform a DFS walk to determine the order of visit. WorkList.push_back(Node); do { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3a8bebe0dd24..e4f10f93fb74 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1389,7 +1389,8 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { // Check that CPSR isn't set between the comparison instruction and the one we // want to change. - MachineBasicBlock::const_iterator I = CmpInstr, E = MI; + MachineBasicBlock::const_iterator I = CmpInstr, E = MI, + B = MI->getParent()->begin(); --I; for (; I != E; --I) { const MachineInstr &Instr = *I; @@ -1403,6 +1404,10 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { if (MO.getReg() == ARM::CPSR) return false; } + + if (I == B) + // The 'and' is below the comparison instruction. + return false; } // Set the "zero" bit in CPSR. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 95dbb6176687..a6db979925ad 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5414,11 +5414,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKLMask(SVOp)) + if (X86::isUNPCKL_v_undef_Mask(SVOp) || X86::isUNPCKLMask(SVOp)) return (isMMX) ? Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); - if (X86::isUNPCKHMask(SVOp)) + if (X86::isUNPCKH_v_undef_Mask(SVOp) || X86::isUNPCKHMask(SVOp)) return (isMMX) ? Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); @@ -5443,11 +5443,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast(NewOp); - if (X86::isUNPCKLMask(NewSVOp)) + if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || X86::isUNPCKLMask(NewSVOp)) return (isMMX) ? NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); - if (X86::isUNPCKHMask(NewSVOp)) + if (X86::isUNPCKH_v_undef_Mask(NewSVOp) || X86::isUNPCKHMask(NewSVOp)) return (isMMX) ? NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); } @@ -5494,13 +5494,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { TargetMask, DAG); } - if (X86::isUNPCKL_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); - if (X86::isUNPCKH_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); - // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG); -- cgit v1.2.3