четверг, 27 февраля 2014 г.

How to find out which of installed RPM packages contain the dependencies for a binary?

Very often you need to know to which files (dynamical linked libraries in most of cases) the binary that you're developing relies on. Of course, you can check this out using the ldd utility:

[vitaly@thermaltake miscelanous]$ ldd /usr/lib64/firefox/firefox
 linux-vdso.so.1 =>  (0x00007fffeb1fe000)
 libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f63c0f49000)
 libdl.so.2 => /lib64/libdl.so.2 (0x00007f63c0d45000)
 libstdc++.so.6 => /lib64/libstdc++.so.6 (0x00007f63c0a3c000)
 libm.so.6 => /lib64/libm.so.6 (0x00007f63c0735000)
 libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007f63c051f000)
 libc.so.6 => /lib64/libc.so.6 (0x00007f63c015f000)
 /lib64/ld-linux-x86-64.so.2 (0x00007f63c1183000)

But what if we make a step further in order to resolve discovered dependencies into the packages? This would be useful for a wide range of tasks, e.g. for writing the specfiles with explicit pointing the deps in "Requires" field.

This is my whipped up implementation that is built around the queries to rpm database... To be short, we will ask the db for every dependency recursively until we won't be able to add any new dep into the resolvedPackages dictionary.

#!/usr/bin/env python
import sys
import subprocess
import re
import pprint
import json

"""
There are three global objects in this script. The most important is
resolvedPackages dict. It is filled with a package names during the
script execution. When the script discoveres a dependency and determines
the package that owns the dependency, we look into resolvedPackages.
If the package has been already match, we won't do the same work again.
"""
resolvedPackages = {}
notDynamicalBinaries = []
resolvableLibraries = []


class rpmSpecRequires:
    """
    This class resolves dependencies of the binaries to the installed packages
    in order to provide you full list of deps for the Requires field of your
    specfile. This tool is built around well-known RHEL Linux utilities:
        1. ldd
        2. rpm -q --whatprovides
        3. rpm -q --requires
    """

    def __init__(self, binary=None, rpm=None, recursionLevel=1, checkPackagesOfBinaries=False):
        """
        Constructor has an option to prepare such a report not only for
        binaries, but also for a standalone rpm (not implemented yet)
        """
        self.recursionLevel = recursionLevel
        self.checkPackagesOfBinaries = checkPackagesOfBinaries
        print("\n--------------rpmSpecRequires (recursion level = {0})-------------".format(self.recursionLevel))
        if binary:
            self.binary = binary
            self.startFromBinary()
        elif rpm:
            self.rpm = rpm
            self.startFromRpm()
        else:
            #print "\nconstructor: ", binary, rpm, recursionLevel, checkPackagesOfBinaries
            print("Wrong parameters")
            pprint.pprint(resolvedPackages)
            sys.exit(1)

    def startFromBinary(self):
        """
        We start here from the `ldd  call`
        """
        print("{0}Resolving the dependcies for a binary file {1}".format(self.indent(), self.binary))
        self.callLdd(self.binary)
        if self.checkPackagesOfBinaries:
            self.callRpmWhatprovides(self.binary)

    def startFromRpm(self):
        """
        (In future we could analyze the content of the whole RPM package)
        """
        print("{0}Resolving the dependcies for a rpm {1}".format(self.indent(), self.rpm))
        self.callRpmRequires(self.rpm)

    def callLdd(self, binary):
        """
        Wrapper of the `ldd`
        """
        p = subprocess.Popen(["ldd", binary], stdout = subprocess.PIPE)
        answer = p.stdout.read()
        if "not a dynamic executable" in answer:
            print("{0}Not a dynamic executable: {1}".format(self.indent(), binary))
        else:
            raws = answer.split('\n')
            self.libs = filter(lambda x: x is not None, map(self.parseLdd, raws))
            map(self.callRpmWhatprovides, self.libs)

    def parseLdd(self, raw):
        """
        Parser of the `ldd ` output
        """
        try:
            match = re.search('=>(.*)\(', raw)
            path = match.group(1).strip()
        except Exception, e:
            print("{0}Failed to parse: {1} {2}".format(self.indent(), raw, e))
            return None
        else:
            return path

    def callRpmWhatprovides(self, lib):
        """
        Ask rpm database which rpm owns the discovered dependency
        """
        p = subprocess.Popen(["rpm", "-q", "--whatprovides", lib], stdout = subprocess.PIPE)
        answer = p.stdout.read().strip()
        if "no package provides" in answer:
            print("{0}No package was found for {1}".format(self.indent(), lib))
        else:
            packages = answer.split('\n')
            for package in packages:
                if package not in resolvedPackages.keys():
                    print("{0}New package {1} was found for {2}".format(self.indent(), package, lib))
                    resolvedPackages[package] = []
                    resolvedPackages[package].append(lib)
                    rpmSpecRequires(**{"rpm": package, "recursionLevel": self.recursionLevel+1})
                else:
                    print("{0}Package {1} is already captured".format(self.indent(), package))
                    if lib not in resolvedPackages[package]:
                        resolvedPackages[package].append(lib)

    def callRpmRequires(self, package):
        """
        Ask rpm database which rpms the discovered package depends on
        """
        p = subprocess.Popen(["rpm", "-q", "--requires", package], stdout = subprocess.PIPE)
        deps = p.stdout.read().strip().split('\n')
        #print deps
        map(self.parseRpmRequires, deps)

    def parseRpmRequires(self, dep):
        """
        Parser of the `rpm -q --requires ` output
        """
        dep = dep.strip()

        #it's a library that cannot be resolved with a `rpm -q --whatprovides`
        if "(" in dep:
            print("{0}Library dependency {1} was found. Bypassing".format(self.indent(), dep))

        #it's a full path to the binary: need to check them
        elif "/" in dep:
            if dep not in notDynamicalBinaries:
                print("{0}Binary dependency {1} was found".format(self.indent(), dep))
                notDynamicalBinaries.append(dep)
                rpmSpecRequires(**{"binary": dep, "recursionLevel": self.recursionLevel+1, "checkPackagesOfBinaries": True})
            else:
                print("{0}Binary dependency {1} is already captured".format(self.indent(), dep))

        #Further work
        else:
            package = dep.split(' ')[0]
            if package not in resolvedPackages:

                #resolvable library
                if ".so" in package:
                    if package not in resolvableLibraries:
                        print("{0}Resolvable library {1} was found".format(self.indent(), package))
                        resolvableLibraries.append(package)
                        self.callRpmWhatprovides(package)
                    else:
                        print("{0}Resolvable library {1} is already captured".format(self.indent(), package))

                #package without provided version
                elif re.search(r'[0-9]+', package) is None:
                    self.callRpmQ(package)
                else:
                    print("{0}New package {1} was found for {2}".format(self.indent(), package, dep))
                    resolvedPackages[package] = []
                    resolvedPackages[package].append(dep)
                    rpmSpecRequires(**{"rpm": package, "recursionLevel": self.recursionLevel+1})
            else:
                print("{0}Package {1} is already captured".format(self.indent(), package))
                if dep not in resolvedPackages[package]:
                    resolvedPackages[package].append(dep)

    def callRpmQ(self, dep):
        """
        Simple check if the package has been already installed
        """
        p = subprocess.Popen(["rpm", "-q", dep], stdout = subprocess.PIPE)
        answer = p.stdout.read()
        if "is not installed" not in answer:
            packages = answer.strip().split('\n')
            for package in packages:
                if package not in resolvedPackages:
                    print("{0}New package {1} was found for {2}".format(self.indent(), package, dep))
                    resolvedPackages[package] = []
                    resolvedPackages[package].append(dep)
                    rpmSpecRequires(**{"rpm": package, "recursionLevel": self.recursionLevel+1})
                else:
                    print("{0}Package {1} is already captured".format(self.indent(), package))
                    if dep not in resolvedPackages[package]:
                        resolvedPackages[package].append(dep)
        else:
              print("{0}No package was found for {1}".format(self.indent(), dep))

    def indent(self):
        return "".join(['\t' for i in xrange(0, self.recursionLevel-1)])

def generateRequires(dep):
    """
    This function constructs the formatted list of dependencies for a Spec file:
    """
    try:
        match = re.search('(.*)\.el', dep)
        no_arch_no_repo = match.group(1).strip()
        return "Requires:\t{0}\n".format(no_arch_no_repo)
    except Exception, e:
        return ""


if __name__ == "__main__":
    Resolver = rpmSpecRequires(**{"binary": sys.argv[1]})
    name = sys.argv[1].split('/')[-1]
    print("\n\n------------------------------------RESULTS--------------------------------------")
    pprint.pprint(resolvedPackages)
    with open(name + ".unique", "w") as f:
        json.dump(resolvedPackages.keys(), f, indent = 4)
    with open(name + ".spec","w") as f:
        for r in map(generateRequires, sorted(resolvedPackages.keys())):
            f.write(r)


At the end of the output you will see the packages with corresponding dependencies required by your binary:
[vitaly@thermaltake miscelanous]$ ./full_dependencies.py /usr/lib64/firefox/firefox

(...)

------------------------------------RESULTS--------------------------------------
{'basesystem-10.0-9.fc20.noarch': ['basesystem'],
 'bash-4.2.45-4.fc20.x86_64': ['/bin/sh', '/usr/bin/bash'],
 'filesystem-3.2-19.fc20.x86_64': ['filesystem'],
 'glibc-2.18-12.fc20.x86_64': ['/lib64/libpthread.so.0',
                               '/sbin/ldconfig',
                               '/usr/sbin/glibc_post_upgrade.x86_64',
                               '/lib64/libdl.so.2',
                               '/lib64/libc.so.6',
                               'glibc',
                               '/lib64/libm.so.6'],
 'glibc-common-2.18-12.fc20.x86_64': ['glibc-common'],
 'libgcc-4.8.2-7.fc20.x86_64': ['libgcc', '/lib64/libgcc_s.so.1'],
 'libstdc++-4.8.2-7.fc20.x86_64': ['/lib64/libstdc++.so.6'],
 'ncurses-base-5.9-12.20130511.fc20.noarch': ['ncurses-base'],
 'ncurses-libs-5.9-12.20130511.fc20.x86_64': ['/lib64/libtinfo.so.5'],
 'setup-2.8.71-2.fc20.noarch': ['setup'],
 'tzdata-2013i-2.fc20.noarch': ['tzdata']}