Python pandas 模块,ExcelFile() 实例源码

我们从Python开源项目中,提取了以下28个代码示例,用于说明如何使用pandas.ExcelFile()

项目:Multiple-factor-risk-model    作者:icezerowjj    | 项目源码 | 文件源码
def load_file(file_name, sheet_name_list):
    '''
    load xlsx file into a dictionary indexed by sheet names
    :param string file_name:name of file
    :param [string] sheet_name_list: name of selected sheets in the xlsx file
    :return: {string:DataFrame} raw_data: {name of sheet:pure data retrieved from xlsx
    with column and index 0,1,2,...}
    '''
    print 'loading file...'
    cut_head = 2
    file = pd.ExcelFile(file_name)
    raw_data = {}
    # iterate over every sheet and retrieve useful data into raw_data
    for i in range(len(sheet_name_list)):
        print 'parsing sheet', sheet_name_list[i]
        # parse a sheet from the whole file into a DataFrame with headers cut off
        temp = file.parse(sheet_name_list[i]).iloc[cut_head:, :]
        # now temp.dtype = object,because the data read in contains string.Here convert it to float
        temp = temp.astype(np.float)
        # reset index and column with 0,1,2,...,
        temp.columns = range(temp.shape[1])
        temp.index = range(temp.shape[0])
        temp.fillna(0, inplace=True)
        raw_data[sheet_name_list[i]] = temp
    return raw_data
项目:prodyn    作者:yabata    | 项目源码 | 文件源码
def read_data(file):
    """Read data about the system from the excel file and assign 
    it to different parameters

    Args:
            file: excel file, which stores all data about considered system

    Returns:
            cst: constants, which describe the system
            srs: parameters, which are variable with time
            U: list of possible decisions
            states: values, which set number of states and characterize 
            all possible ones
    """

    xls = pd.ExcelFile(file)
    states = xls.parse('DP-States',index_col=[0])
    cst = xls.parse('Constants',index_col=[0])['Value']
    srs = xls.parse('Time-Series',index_col=[0])
    U = xls.parse('DP-Decisions',index_col=[0])['Decisions'].values
    return cst,srs,U,states

###################################
#for 2 states - temperature and heat-storage
###################################
项目:AutoTrading    作者:curme    | 项目源码 | 文件源码
def getExcelData(self):
        """
        get data from 'hsi_futures.xlsx'
        Date | Open | High | Low | Close | SMAVG5 | SMAVG10 | SMAVG15 | Volume | VolumeSMAVG5
        :return: data table
        """
        df = pd.DataFrame()
        xl = pd.ExcelFile("../dataManager/hsi_futures.xlsx")
        # print xl.sheet_names
        sheets = xl.sheet_names
        for sheet in sheets:
            df = df.append(pd.read_excel("../dataManager/hsi_futures.xlsx", sheet))
        df['Date'] = pd.to_datetime(df['Date'])
        df.sort_values("Date", ascending=True, inplace=True)
        data = df.set_index([range(df.shape[0])])
        return data
项目:tmtk    作者:thehyve    | 项目源码 | 文件源码
def get_clinical_template(study):
    """Try to detect the clinical template file in the source dir and open it with pandas."""
    clinical_templates = [template for template in study.excel_files if "clin" in template.lower() and
                          "~$" not in template]

    clinical_template = None
    if len(clinical_templates) == 1:
        clinical_template_name = clinical_templates[0]
        clinical_template = pd.ExcelFile(clinical_template_name, comment="#")
        print("[INFO] Clinical data template detected: " + clinical_template_name)
    elif len(clinical_templates) == 0:
        study.clinical_template_present = False
        print("[WARNING] No clinical data template could be detected. Assuming only high-dimensional templates " +
              "are available.")
    else:
        print("[ERROR] Too many clinical data templates were found. " +
              "Make sure only one file has 'clinical' in its name.")
        Validity.list_length(clinical_templates, expected=1)

    return clinical_template
项目:tmtk    作者:thehyve    | 项目源码 | 文件源码
def add_general_study_metadata(study, study_metadata_template_path):
    """Read the data from general study level metadata template and write to tags file."""
    metadata = pd.ExcelFile(study_metadata_template_path, comment="#")

    if len(metadata.sheet_names) > 1:
        print("[WARNING] Multiple sheets detected in general study metadata template. Assuming first sheet.")
    df = metadata.parse(0, header=None)
    tag_index = 10
    for __, row in df.iterrows():
        data = row[row.first_valid_index():].dropna().tolist()
        if len(data) == 2:
            tag = data[0]
            value = data[1]
            study.all_metadata.add(("\\", tag, value, tag_index))
            tag_index += 1
    study.write_metadata()
项目:dhmin    作者:tum-ens    | 项目源码 | 文件源码
def read_excel(filename):
    """Read input Excel file and return dict of DataFrames for each sheet.

    Read an Excel spreadsheet with geographic input data.

    Args:
        filename: filename to an Excel spreadsheet with 'Vertex' and 'Edge'

    Returns:
        dict of 2 pandas DataFrames
    """
    with pd.ExcelFile(filename) as xls:
        vertex = xls.parse('Vertex').set_index('Vertex')
        edge = xls.parse('Edge').set_index(['Edge', 'Vertex1', 'Vertex2'])
    data = {
        'Vertex': vertex,
        'Edge': edge}
    return data
项目:xgbfir    作者:limexp    | 项目源码 | 文件源码
def _compare_xlsx(self, file1, file2, rtol=1e-02, atol=1e-03):
#        print("requested compare: {} and {}".format(file1, file2))
        xl1 = pd.ExcelFile(file1)
        xl2 = pd.ExcelFile(file2)
        self.assertEqual(xl1.sheet_names, xl2.sheet_names)

        for sheet in xl1.sheet_names:
#            print("Prrocessing sheet {}".format(sheet))
            df1 = xl1.parse(sheet)
            df2 = xl2.parse(sheet)
            columns1 = list(df1)
            columns2 = list(df2)
            self.assertEqual(len(columns1), len(columns2))
            arr1 = df1.values
            arr2 = df2.values

            self.assertEqual(arr1.shape, arr2.shape)
            for x, y in np.ndindex(arr1.shape):
                v1 = arr1[x, y]
                v2 = arr2[x, y]
#                print("{}: ({}, {}): {} vs {}".format(sheet, x, y, v1, v2))
                if isinstance(v1, six.string_types) or isinstance(v2, six.string_types):
                    self.assertEqual(v1, v2)
                else:
                    npt.assert_allclose(v1, v2, rtol=rtol, atol=atol)
项目:Behavior    作者:danustc    | 项目源码 | 文件源码
def parse_xls(fpath):
    '''
    load the xls data.
    export: a data frame (in pandas)
    '''
    book = xlrd.open_workbook(fpath)
    sheet_names = book.sheet_names()
    print("sheet_names:", sheet_names)
    xl_sheet = book.sheet_by_name(sheet_names[0])
    title_row = xl_sheet.row_values(0) # title row
    print(title_row)
    xls_file = pd.ExcelFile(fpath)
    sht1 = xls_file.sheet_names[0]
    df = xls_file.parse(sht1)
    print(df.ix[0])
    print(df.ix[1])
    print(df[3:])
项目:correctiv-nursinghomes    作者:correctiv    | 项目源码 | 文件源码
def load_supervision_authorities(self, *args, **options):
        excel_file = pd.ExcelFile(options['filename'])
        state_names = excel_file.sheet_names

        for state_name in state_names:
            state = State.objects.get(name=state_name)
            df = excel_file.parse(state_name)
            for _, row in df.iterrows():
                try:
                    email = stringify(row['email'])
                    if email:
                        email = email.splitlines()[0]
                        email = EMAIL_RE.search(email)
                        email = email.group(0).strip() if email is not None else ''
                        email = email.lower()
                    authority, created = SupervisionAuthority.objects.update_or_create(
                        state=state, name=stringify(row['name']), defaults=dict(
                            address=stringify(row['address']),
                            contact=stringify(row['contact']),
                            email=email,
                            url=stringify(row['url']),
                            report_url=stringify(row.get(u'Verfügbare Berichte', ''))
                        )
                    )
                    if created:
                        print(authority)
                except Exception:
                    print(row['name'])
                    raise
项目:correctiv-nursinghomes    作者:correctiv    | 项目源码 | 文件源码
def assign_brandenburg(self, *args, **options):
        brandenburg_state = State.objects.get(name='Brandenburg')
        excel_file = pd.ExcelFile(options['filename'])
        df = excel_file.parse('Brandenburg')
        assigned_auths = defaultdict(list)
        locations = {}
        for _, row in df.iterrows():
            auth = SupervisionAuthority.objects.get(state=brandenburg_state, name=row['name'])
            locations[auth] = GEOSGeometry('POINT(%f %f)' % (row['lng'], row['lat']), srid=4326)
            assigned_districts = row[u'Landkreis-Zuständigkeit'].splitlines()
            for district_name in assigned_districts:
                districts = District.objects.filter(part_of=brandenburg_state, name=district_name)
                if len(districts) != 1:
                    print(district_name)
                    print(districts)
                else:
                    assigned_auths[districts[0]].append(auth)

        for nursinghome in NursingHome.objects.filter(supervision_authority__isnull=True,
                state=brandenburg_state):
            district = District.objects.get(geom__covers=nursinghome.geo)
            auths = assigned_auths[district]
            if len(auths) == 1:
                nursinghome.supervision_authority = auths[0]
                nursinghome.save()
            else:
                min_distance = None
                best_auth = None
                for auth, point in locations.items():
                    if auth not in auths:
                        continue
                    dist = NursingHome.objects.filter(pk=nursinghome.pk
                            ).annotate(distance=Distance('geo', point))
                    dist = dist[0].distance.m
                    if min_distance is None or dist < min_distance:
                        min_distance = dist
                        best_auth = auth
                nursinghome.supervision_authority = best_auth
                nursinghome.save()
项目:correctiv-nursinghomes    作者:correctiv    | 项目源码 | 文件源码
def assign_rheinlandpfalz(self, *args, **options):
        rp_state = State.objects.get(name='Rheinland-Pfalz')
        excel_file = pd.ExcelFile(options['filename'])
        df = excel_file.parse('Rheinland-Pfalz')
        assigned = defaultdict(list)
        for _, row in df.iterrows():
            auth = SupervisionAuthority.objects.get(state=rp_state, name=row['name'])
            district_names = row[u'Landkreis-Zuständigkeit'].splitlines()
            for district_name in district_names:
                only = None
                if '|' in district_name:
                    district_name, only = district_name.split('|')
                    only = only.split(',')

                districts = District.objects.filter(part_of=rp_state, name=district_name)
                if len(districts) == 0:
                    districts = District.objects.filter(part_of=rp_state, name__contains=district_name)
                if len(districts) == 0:
                    districts = District.objects.filter(part_of=rp_state, name__contains=district_name.split()[0])
                if len(districts) == 0:
                    districts = District.objects.filter(part_of=rp_state, name__istartswith=re.sub('\W', '', district_name))
                if len(districts) > 1:
                    if 'Kreis' in district_name:
                        districts = districts.filter(kind_detail__contains='Landkreis')
                    if 'Stadt' in district_name:
                        districts = districts.filter(kind_detail__contains='Stadt')
                if len(districts) != 1:
                    print(districts)
                    print(u'District not one: %s' % district_name)
                    continue
                assigned[auth].append((districts[0], only))
        for auth, district_list in assigned.items():
            for district, only in district_list:
                if only is None:
                    NursingHome.objects.filter(state=rp_state, district=district, supervision_authority__isnull=True).update(supervision_authority=auth)
                    continue
                for muni_name in only:
                    muni_name = muni_name.strip()
                    munis = Municipality.objects.filter(part_of=district, name__contains=muni_name)
                    if len(munis) > 1:
                        munis = Municipality.objects.filter(part_of=district, name=muni_name)
                    if len(munis) != 1:
                        print('Did not find %s' % muni_name)
                        continue
                    muni = munis[0]
                    NursingHome.objects.filter(state=rp_state, district=district, supervision_authority__isnull=True, geo__coveredby=muni.geom).update(supervision_authority=auth)
项目:prodyn    作者:yabata    | 项目源码 | 文件源码
def read_data(file):
    """Read data about the system from the excel file and assign 
    it to different parameters

    Args:
            file: excel file, which stores all data about considered system

    Returns:
            cst: constants, which describe the system
            srs: parameters, which are variable with time
            U: list of possible decisions
            states: values, which set number of states and characterize 
            all possible ones
    """

    xls = pd.ExcelFile(file)
    states = xls.parse('DP-States',index_col=[0])
    cst = xls.parse('Constants',index_col=[0])['Value']
    srs = xls.parse('Time-Series',index_col=[0])
    U = xls.parse('DP-Decisions',index_col=[0])['Decisions'].values
    return cst,srs,U,states



###################################
#for 2 states
###################################
项目:prodyn    作者:yabata    | 项目源码 | 文件源码
def read_data(file):

    xls = pd.ExcelFile(file)
    cst = xls.parse('Constants',index_col=[0])['Value']
    srs = xls.parse('Time-Series',index_col=[0])
    U = xls.parse('DP-Decisions',index_col=[0])['Decisions'].values
    states = xls.parse('DP-States',index_col=[0])
    return cst,srs,U,states
项目:prodyn    作者:yabata    | 项目源码 | 文件源码
def read_data(file):

    xls = pd.ExcelFile(file)
    cst = xls.parse('Constants',index_col=[0])['Value']
    srs = xls.parse('Time-Series',index_col=[0])
    U = xls.parse('DP-Decisions',index_col=[0])['Decisions'].values
    states = xls.parse('DP-States',index_col=[0])
    return cst,srs,U,states
项目:prodyn    作者:yabata    | 项目源码 | 文件源码
def read_data(file):
    """Read data about the system from the excel file and assign 
    it to different parameters

    Args:
            file: excel file, which stores all data about considered system

    Returns:
            cst: constants, which describe the system
            srs: parameters, which are variable with time
            U: list of possible decisions
            states: values, which set number of states and characterize 
            all possible ones
    """

    xls = pd.ExcelFile(file)
    states = xls.parse('DP-States',index_col=[0])
    cst = xls.parse('Constants',index_col=[0])['Value']
    srs = xls.parse('Time-Series',index_col=[0])
    U = xls.parse('DP-Decisions',index_col=[0])['Decisions'].values
    return cst,srs,U,states



###################################
#for 1 state - temperature
###################################
项目:betterself    作者:jeffshek    | 项目源码 | 文件源码
def get_sanitized_dataframe(self, date_column='Date'):
        # ExcelFile does not handle file_paths very well, use native Python open
        opened_file = open(self.file_path, 'rb')
        excel_file = pd.ExcelFile(opened_file)
        dataframe = excel_file.parse(self.sheet)

        # Sanitize so the inputs are correct and remove fluke days
        dataframe = self._sanitize_sheet(dataframe)
        dataframe = self._set_dataframe_index(dataframe, date_column)
        return dataframe
项目:dfViewer    作者:sterry24    | 项目源码 | 文件源码
def loadFile(self, filename):
        if filename.endswith('.xls') or filename.endswith('.xlsx'):
            df=pd.ExcelFile(filename)
            sheetnames=df.sheet_names
            dialog=ExcelFileDialog(filename,sheetnames,self)
            dialog.accepted.connect(self.loadExcel)
            dialog.show()
        else:
            table = QTableView()
            table.setAlternatingRowColors(True)
            model=DataFrameTableModel(filename=filename)
            model.trackDataChange.connect(self.trackChanges)
            table.setModel(model)
            ### Set some variables ###
            table.headers = table.horizontalHeader()
            table.vHeaders=table.verticalHeader()
            #### Set context menu for table headers ####
            table.headers.setContextMenuPolicy(Qt.CustomContextMenu)
            table.headers.customContextMenuRequested.connect(self.headerMenu)
            table.vHeaders.setContextMenuPolicy(Qt.CustomContextMenu)
            table.vHeaders.customContextMenuRequested.connect(self.vHeaderMenu)
            if model._filename.endswith('.csv'):
                df=pd.read_csv(model._filename)
                model.setDataFrame(df)
                self.tableTabWidget.addTab(table, QFileInfo(model._filename).fileName())
                self.tableTabWidget.setCurrentWidget(table)
            if model._filename.endswith('.txt'):
                delim = str(self.parseDelimiter(model._filename))
                if delim == ' ':
                    df=pd.read_csv(model._filename,delim_whitespace = True)
                else:
                    df=pd.read_csv(model._filename,sep=delim)
                model.setDataFrame(df)
                self.tableTabWidget.addTab(table, QFileInfo(model._filename).fileName())
                self.tableTabWidget.setCurrentWidget(table)
项目:Nordic44-Nordpool    作者:ALSETLab    | 项目源码 | 文件源码
def read_excel(fname, header=None):
    """Read excel into dict.
    Args:
        fname: name of excel file
        header: The finland files does not have a header
    Output:
        dictionary containing the data
    """
    xls = ExcelFile(fname)
    if header:
        parse_cols = [1]
    else:
        parse_cols = None

    df = xls.parse(xls.sheet_names[0], skiprows=1,
                   parse_cols=parse_cols)

    # Fix keys
    temp = df.to_dict()
    for key in temp:
        new_key = key.replace(" - ", "_")
        temp[new_key] = temp.pop(key)
    # Stupid hack for Finland
    if header:
        temp[header] = temp.pop(temp.keys()[0])

    return temp
项目:sport_movements_analysis    作者:guillaumeAssogba    | 项目源码 | 文件源码
def importData(file):
    try:
        fileData = pd.ExcelFile(file)
    except:
        try:
           fileData = pd.read_csv(file)
        except:
            print("Please provide an excel or csv file")
    return fileData

#Load for not writing all the time
项目:pythonwhat    作者:datacamp    | 项目源码 | 文件源码
def setUp(self):
        self.data = {
            "DC_PEC": "import pandas as pd; from urllib.request import urlretrieve; urlretrieve('https://s3.amazonaws.com/assets.datacamp.com/production/course_998/datasets/battledeath.xlsx', 'battledeath.xlsx')",
            "DC_SOLUTION": "xl = pd.ExcelFile('battledeath.xlsx')",
            "DC_SCT": "test_object('xl')"
        }
项目:pythonwhat    作者:datacamp    | 项目源码 | 文件源码
def test_step_1(self):
        self.data["DC_CODE"] = "xl = pd.ExcelFile('battledeath.xlsx')"
        sct_payload = helper.run(self.data)
        self.assertTrue(sct_payload['correct'])
项目:pythonwhat    作者:datacamp    | 项目源码 | 文件源码
def test_pass_1(self):
        self.data = {
            "DC_PEC": "import pandas as pd; from urllib.request import urlretrieve; urlretrieve('https://s3.amazonaws.com/assets.datacamp.com/production/course_998/datasets/battledeath.xlsx', 'battledeath.xlsx'); from urllib.request import urlretrieve; urlretrieve('https://s3.amazonaws.com/assets.datacamp.com/production/course_998/datasets/battledeath.xlsx', 'battledeath2.xlsx')",
            "DC_SOLUTION": "xl = pd.ExcelFile('battledeath.xlsx')",
            "DC_CODE": "xl = pd.ExcelFile('battledeath2.xlsx')",
            "DC_SCT": '''
def my_converter(x):
    return(x.sheet_names)
set_converter(key = "pandas.io.excel.ExcelFile", fundef = my_converter)
test_object('xl')
'''
        }
        sct_payload = helper.run(self.data)
        self.assertTrue(sct_payload['correct'])
项目:qtim_ROP    作者:QTIM-Lab    | 项目源码 | 文件源码
def unet_cross_val(data_dir, out_dir, mapping, splits, unet_conf):

    # Load spreadsheet
    with pd.ExcelFile(mapping) as xls:
        df = pd.read_excel(xls, 'Sheet1').set_index('index')
        df['class'] = df['class'].map({'preplus': 'pre-plus', 'normal': 'normal', 'plus': 'plus'})

    img_dir = join(data_dir, 'images')
    seg_dir = join(data_dir, 'manual_segmentations')
    mask_dir = join(data_dir, 'masks')

    # Check whether all images exist
    check_images_exist(df, img_dir, seg_dir, mask_dir)

    # Now split into training and testing
    CVFile = sio.loadmat(splits)

    # # Combining Pre-Plus and Plus
    # trainPlusIndex = CVFile['trainPlusIndex'][0]
    # testPlusIndex = CVFile['testPlusIndex'][0]
    #
    # plus_dir = make_sub_dir(out_dir, 'trainTestPlus')
    # print "Generating splits for combined No and Pre-Plus"
    # generate_splits(trainPlusIndex, testPlusIndex, df, img_dir, mask_dir, seg_dir, plus_dir)

    # Combining No and Pre-Plus
    trainPrePIndex = CVFile['trainPrePIndex'][0]
    testPrePIndex = CVFile['testPrePIndex'][0]

    prep_dir = make_sub_dir(out_dir, 'trainTestPreP')
    print "Generating splits for combined Pre-Plus and Plus"
    generate_splits(trainPrePIndex, testPrePIndex, df, img_dir, mask_dir, seg_dir, prep_dir)

    # Train models
    train_and_test(prep_dir, unet_conf, processes=1)
    # train_and_test(plus_dir, unet_conf, processes=2)
项目:tmtk    作者:thehyve    | 项目源码 | 文件源码
def read_hd_file_template(self, source_dir, hd_template):
        """Try to read the specified template file and send to sheet loading method."""
        template_path = os.path.join(source_dir, hd_template)
        try:
            hd_template_workbook = pd.ExcelFile(template_path, comment="#", dtype=object)
        except FileNotFoundError:
            raise Validity.TemplateException("Could not find high-dim template file at: {0}".format(template_path))
        # except XLRDError:
        #     raise Validity.TemplateException(
        #         "High-dim template file at: {0} is not a valid xlsx file.".format(template_path))

        self._load_sheets(hd_template_workbook)
项目:seetd    作者:stitchfix    | 项目源码 | 文件源码
def load_data(fname='SeatTest_New.xlsx'):
    """
    Load the xlsx using pandas.
    :param fname: string location of the file to load
    :return: pandas object
    """
    return pd.ExcelFile(fname)
项目:autoupdate_blacklists    作者:elluscinia    | 项目源码 | 文件源码
def parse_statistics(logfile):
    xl = pd.ExcelFile(logfile)
    df = xl.parse("Sheet")
    df = df.sort_values(by='Line Numbers')

    writer = pd.ExcelWriter(logfile)
    df.to_excel(writer, sheet_name='Sheet', index=False)
    writer.save()

    wb = openpyxl.load_workbook(logfile)
    ws = wb.active

    row_count = ws.max_row
    column_count = ws.max_column

    chart = ScatterChart()
    chart.title = "Time upload domain names"
    chart.style = 13
    chart.x_axis.title = "Line numbers"
    chart.y_axis.title = "Time, sec"

    xvalues = Reference(ws, min_col=1, min_row=2, max_row=row_count)
    color_choice = ['3F888F', 'D24D57']
    for i in range(2, column_count + 1):
        values = Reference(ws, min_col=i, min_row=1, max_row=row_count)
        series = Series(values, xvalues, title_from_data=True)
        series.marker.symbol = "diamond"
        series.graphicalProperties.line.solidFill = color_choice[i-2]
        series.marker.graphicalProperties.line.solidFill = color_choice[i-2]
        series.marker.graphicalProperties.solidFill = color_choice[i-2]
        series.graphicalProperties.line.width = 20000
        chart.series.append(series)

    chart.legend.legendPos = 'b'
    ws.add_chart(chart)
    wb.save(logfile)
项目:dfViewer    作者:sterry24    | 项目源码 | 文件源码
def loadExcel(self,options):
        names = options['sheets']
        filename = options['file']
        openEach = options['openEach']
        df=pd.ExcelFile(filename)
        if not openEach:
            newTab = QTabWidget()
            newTab.setTabsClosable(True)
            newTab.currentChanged.connect(self.tabChanged)
            self.connect(newTab,SIGNAL("tabCloseRequested(int)"),
                         self.fileCloseInternalTab)
            for i in range(len(names)):
                table = QTableView()
                table.setAlternatingRowColors(True)
                model=DataFrameTableModel(filename=filename)
                model.trackDataChange.connect(self.trackChanges)
                table.setModel(model)
                ### Set some variables ###
                table.headers = table.horizontalHeader()
                table.vHeaders=table.verticalHeader()
                #### Set context menu for table headers ####
                table.headers.setContextMenuPolicy(Qt.CustomContextMenu)
                table.headers.customContextMenuRequested.connect(self.headerMenu)
                table.vHeaders.setContextMenuPolicy(Qt.CustomContextMenu)
                table.vHeaders.customContextMenuRequested.connect(self.vHeaderMenu)

                df2=df.parse(sheetname=names[i])
                model.setDataFrame(df2)
                newTab.addTab(table,names[i])
            newTab.setCurrentIndex(0)
            self.tableTabWidget.addTab(newTab,QFileInfo(filename).fileName())
            self.tableTabWidget.setCurrentWidget(newTab)
        else:
            for i in range(len(names)):
                table = QTableView()
                table.setAlternatingRowColors(True)
                model=DataFrameTableModel(filename=names[i])
                model.trackDataChange.connect(self.trackChanges)
                table.setModel(model)
                ### Set some variables ###
                table.headers = table.horizontalHeader()
                table.vHeaders=table.verticalHeader()
                #### Set context menu for table headers ####
                table.headers.setContextMenuPolicy(Qt.CustomContextMenu)
                table.headers.customContextMenuRequested.connect(self.headerMenu)
                table.vHeaders.setContextMenuPolicy(Qt.CustomContextMenu)
                table.vHeaders.customContextMenuRequested.connect(self.vHeaderMenu)
                df2=df.parse(sheetname=names[i])
                model.setDataFrame(df2)
                self.tableTabWidget.addTab(table,names[i])
                self.tableTabWidget.setCurrentWidget(table)
项目:autolysis    作者:gramener    | 项目源码 | 文件源码
def metadata_file(path, root, tables=None):
    '''
    Returns the metadata for a file. There are 3 types of file formats:

    1. Archives (7z, zip, rar, tar) / compressed (xz, bzip2, gzip). Decompress and process
    2. Database (sqlite3, hdf5, xls, xlsx). Process each table/sheet as a sub-dataset
    3. Data (csv, json). Process directly
    '''
    tree = Meta()
    format = guess_format(path)
    if format is not None:
        tree.format = format

    if format == 'dir':
        tree.datasets = Datasets()
        for base, dirs, files in os.walk(path):
            for filename in files:
                source = os.path.join(base, filename)
                name = os.path.relpath(source, path)
                tree.datasets[name] = submeta = Meta(name=name, source=source)
                try:
                    submeta.update(metadata_file(source, root, tables))
                except Exception as e:
                    submeta['error'] = str(e)
                    logging.exception('Unable to get metadata for %s', source)
    elif format in {'7z', 'zip', 'rar', 'tar', 'xz', 'gz', 'bz2'}:
        tree.datasets = Datasets()
        for name, source in unzip_files(path, root, format):
            tree.datasets[name] = submeta = Meta(name=name)
            try:
                submeta.update(metadata_file(source, root, tables))
            except Exception as e:
                submeta['error'] = str(e)
                logging.exception('Unable to get metadata for %s', source)
    elif format == 'sqlite3':
        tree.update(metadata_sql('sqlite:///' + path, tables))
    elif format in {'hdf5', 'xls', 'xlsx'}:
        if format == 'hdf5':
            store = pd.HDFStore(path)
            table_list = store.keys()
            store.close()
        else:
            xls = pd.ExcelFile(path)
            table_list = xls.sheet_names
            format = 'xlsx'
        tree.datasets = Datasets()
        for table in table_list:
            if tables is None or table in tables:
                tree.datasets[table] = Meta([
                    ('name', table),
                    ('format', 'table'),
                    ('command', [format, path, table])
                ])
    elif format == 'csv':
        tree.command = ['csv', path]
    elif format == 'json':
        tree.command = ['json', path]
    return tree