Python pandas 模块,read_excel() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.read_excel()

项目:AutoTrading    作者:curme    | 项目源码 | 文件源码
def getExcelData(self):
        """
        get data from 'hsi_futures.xlsx'
        Date | Open | High | Low | Close | SMAVG5 | SMAVG10 | SMAVG15 | Volume | VolumeSMAVG5
        :return: data table
        """
        df = pd.DataFrame()
        xl = pd.ExcelFile("../dataManager/hsi_futures.xlsx")
        # print xl.sheet_names
        sheets = xl.sheet_names
        for sheet in sheets:
            df = df.append(pd.read_excel("../dataManager/hsi_futures.xlsx", sheet))
        df['Date'] = pd.to_datetime(df['Date'])
        df.sort_values("Date", ascending=True, inplace=True)
        data = df.set_index([range(df.shape[0])])
        return data
项目:TuShare    作者:andyzsf    | 项目源码 | 文件源码
def get_hs300s():
    """
    ????300??????????
    Return
    --------
    DataFrame
        code :????
        name :????
        date :??
        weight:??
    """
    from tushare.stock.fundamental import get_stock_basics
    try:
        wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'], 
                                                  ct.PAGES['hs300w']), parse_cols=[0, 3, 6])
        wt.columns = ct.FOR_CLASSIFY_W_COLS
        wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
        df = get_stock_basics()[['name']]
        df = df.reset_index()
        return pd.merge(df,wt)
    except Exception as er:
        print(str(er))
项目:TuShare    作者:andyzsf    | 项目源码 | 文件源码
def get_sz50s():
    """
    ????50???
    Return
    --------
    DataFrame
        code :????
        name :????
    """
    try:
        df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'], 
                                                  ct.PAGES['sz50b']), parse_cols=[0,1])
        df.columns = ct.FOR_CLASSIFY_B_COLS
        df['code'] = df['code'].map(lambda x :str(x).zfill(6))
        return df
    except Exception as er:
        print(str(er))
项目:TuShare    作者:andyzsf    | 项目源码 | 文件源码
def get_zz500s():
    """
    ????500???
    Return
    --------
    DataFrame
        code :????
        name :????
    """
    from tushare.stock.fundamental import get_stock_basics
    try:
#         df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'], 
#                                                   ct.PAGES['zz500b']), parse_cols=[0,1])
#         df.columns = ct.FOR_CLASSIFY_B_COLS
#         df['code'] = df['code'].map(lambda x :str(x).zfill(6))
        wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'], 
                                                   ct.PAGES['zz500wt']), parse_cols=[0, 3, 6])
        wt.columns = ct.FOR_CLASSIFY_W_COLS
        wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
        df = get_stock_basics()[['name']]
        df = df.reset_index()
        return pd.merge(df,wt)
    except Exception as er:
        print(str(er))
项目:base_function    作者:Rockyzsu    | 项目源码 | 文件源码
def search():
    #??
    df = pd.read_excel("huatai2.xls")
    input_m = 0.0
    output_m = 0.0
    for index, row in df.iterrows():
        if row[u'??'] == u'??':
            each_input = row[u'?????']
            print u"??",
            print each_input
            input_m = input_m + each_input
            #print type(money)
        if row[u'??'] == u'??':
            each_output = row[u'?????']
            print u"??",
            print each_output
            #print type(money)
            output_m = output_m + each_output

    print "Sumary is %f" % (input_m - output_m)
项目:base_function    作者:Rockyzsu    | 项目源码 | 文件源码
def replace_test():
    #???

    df = pd.read_excel("huatai2.xls")
    s1 = pd.Series(['a', 'b', 'c', 'd', 'e'])
    #print s1
    s2 = pd.Series(['1', '2', '3', '4', '5'])
    #print s2

    s3 = s1.replace(1, 'k')
    #print s1
    #print s3
    print df
    df.replace(['20160722', u'????', 2431.0, u'????', 13.00, 300.0, 3891.10, 3905.71, u'??'],
               ['20160722', '0', '0', '0', 0, 0, 0, 0, '0'], inplace=True)
    #df.replace(['20160722'],['20160725','0','0','0',0,0,0,0,'0'],inplace=True)
    print df
项目:newsrecommender    作者:Newsrecommender    | 项目源码 | 文件源码
def load_articles(self):
        """
        Loads the DataFrame with all the  articles. 
        Return: DataFrame with the title, content, tags and author of all  articles
        """
        #parser = SafeConfigParser()
        #parser.read('Config.ini')
        #file_path = settings['IP_FILE_PATH']
        #file_name = settings['IP_FILE_NAME']

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(file_path,file_name))

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(parser.get('Article_input_dir', 'ip_file_path'),parser.get('Article_input_file', 'ip_file_name'))    
        #file_path = '/Users/shwetanknagar/Downloads/Personal/Project Eventstreet/Boconni Project'
        #file_name = os.path.basename("TestSet300_User_Ratings.xlsx")
        path = os.path.join(self.ip_file_path, self.ip_file_name)
        #commented by shwenag
        #self.df = pd.read_csv('TrainSet700_User_Ratings.xlsx', encoding='utf-8')         # Load articles in a DataFrame
        self.df = pd.read_excel(path,  na_values=['NA'], parse_cols = "A,B,C")
        #self.df = self.df[['Sno', 'title', 'content_text']]  # Slice to remove redundant columns
        #commenting the below by shwenag
        print(self.df)
        logging.debug("Number of articles: {0} and no of columns are {1} \n".format(len(self.df),self.df.shape))
项目:newsrecommender    作者:Newsrecommender    | 项目源码 | 文件源码
def load_articles(self):
        """
        Loads the DataFrame with all the  articles. 
        Return: DataFrame with the title, content, tags and author of all  articles
        """
        #parser = SafeConfigParser()
        #parser.read('Config.ini')
        #file_path = settings['IP_FILE_PATH']
        #file_name = settings['IP_FILE_NAME']

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(file_path,file_name))

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(parser.get('Article_input_dir', 'ip_file_path'),parser.get('Article_input_file', 'ip_file_name'))    
        file_path = '/Users/shwetanknagar/Downloads/Personal/Project Eventstreet/Boconni Project'
        file_name = os.path.basename("TestSet300_User_Ratings.xlsx")
        path = os.path.join(file_path, file_name)
        #commented by shwenag
        #self.df = pd.read_csv('TrainSet700_User_Ratings.xlsx', encoding='utf-8')         # Load articles in a DataFrame
        self.df = pd.read_excel(path,  na_values=['NA'], parse_cols = "A,B,C")
        #self.df = self.df[['Sno', 'title', 'content_text']]  # Slice to remove redundant columns
        #commenting the below by shwenag
        print(self.df)
        logging.debug("Number of articles: {0} and no of columns are {1} \n".format(len(self.df),self.df.shape))
项目:cjworkbench    作者:CJWorkbench    | 项目源码 | 文件源码
def test_load_xlsx(self):
        url = 'http://test.com/the.xlsx'
        self.url_pval.set_value(url)
        self.url_pval.save()

        xlsx_bytes = open(mock_xslx_path, "rb").read()
        xlsx_table = pd.read_excel(mock_xslx_path)

        # success case
        with requests_mock.Mocker() as m:
            m.get(url, content=xlsx_bytes, headers={'content-type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'})
            self.press_fetch_button()
            response = self.get_render()
            self.assertEqual(response.content, make_render_json(xlsx_table))

        # malformed file  should put module in error state
        with requests_mock.Mocker() as m:
            m.get(url, content=b"there's just no way this is xlsx", headers={'content-type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'})
            self.press_fetch_button()
            self.wfmodule.refresh_from_db()
            self.assertEqual(self.wfmodule.status, WfModule.ERROR)
项目:smart-battery-for-smart-energy-usage    作者:AnatolyPavlov    | 项目源码 | 文件源码
def main():
    df = pd.read_excel('../data/Tariffs.xlsx')
    df.loc[df['Tariff'] == 'Low', 'Tariff'] = 0.0399
    df.loc[df['Tariff'] == 'Normal', 'Tariff'] = 0.1176
    df.loc[df['Tariff'] == 'High', 'Tariff'] = 0.6720
    #
    ets = ExtractTimeSeries(datetime_col='TariffDateTime', yt_col='Tariff')
    df = ets.transform(df)
    #
    day = pd.to_datetime('2013-12-27').date()
    next_day = day + timedelta(days=1)
    df_out = df.query('index >= @day and index < @next_day')
    df_out.columns=['Tariff (UK Pounds)']
    #
    print_process('Saving Post-Processed Data')
    path_to_price = '../clean_data/price_data_London.csv'
    df_out.to_csv(path_to_price)
    print 'Tariff data saved into: {}'.format(path_to_price)
    print
项目:nuts-ml    作者:maet3608    | 项目源码 | 文件源码
def _load_table(self, filepath):
        """
        Load table from file system.

        :param str filepath: Path to table in CSV, TSV, XLSX or
                   Pandas pickle format.
        :return: Pandas table
        :rtype: pandas.core.frame.DataFrame
        """
        _, ext = os.path.splitext(filepath.lower())
        if ext == '.tsv':
            return pd.read_table(filepath, **self.kwargs)
        if ext == '.csv':
            return pd.read_csv(filepath, **self.kwargs)
        if ext == '.xlsx':
            return pd.read_excel(filepath, **self.kwargs)
        return pd.read_pickle(filepath, **self.kwargs)
项目:the-magical-csv-merge-machine    作者:entrepreneur-interet-general    | 项目源码 | 文件源码
def read_excel(self, file):
        # TODO: add iterator and return columns
        excel_tab = pd.read_excel(file, dtype=str)
        columns = excel_tab.columns

        def make_gen(excel_tab, chunksize):
            cursor = 0
            chunk = excel_tab.iloc[:chunksize]
            while chunk.shape[0]:
                yield chunk
                cursor += chunksize
                chunk = excel_tab.iloc[cursor:cursor+chunksize]
        tab = make_gen(excel_tab, self.CHUNKSIZE) 

        tab = (self._clean_header(tab_part) for tab_part in tab)

        return tab, None, None, self._clean_column_names(columns)
项目:stock    作者:Rockyzsu    | 项目源码 | 文件源码
def GetAllTodayData(self):
        #???? ????  ?,??????????
        filename=self.today+'_all_.xls'
        #??data????
        filename=os.path.join(self.path,filename)
        if not os.path.exists(filename):
            self.df_today_all=ts.get_today_all()
            #?????
            self.df_today_all.drop(self.df_today_all[self.df_today_all['turnoverratio']==0].index,inplace=True)
            #??????????
            #n1=self.df_today_all[self.df_today_all['turnoverratio']==0]
            #n2=self.df_today_all.drop(n1.index)
            #print n2
            print self.df_today_all
            self.df_today_all.to_excel(filename,sheet_name='All')

        else:
            self.df_today_all=pd.read_excel(filename,sheet_name='All')
            print "File existed"
项目:stock    作者:Rockyzsu    | 项目源码 | 文件源码
def count_up_down(filename):
    total=[]
    df=pd.read_excel(filename)

    count= len(df[(df['changepercent']>=-10.2) & (df['changepercent']<-9)])
    total.append(count)
    for i in range(-9,9,1):
        count= len(df[(df['changepercent']>=i*1.00) & (df['changepercent']<((i+1))*1.00)])
        total.append(count)
    count= len(df[(df['changepercent']>=9)])
    total.append(count)
    print total
    df_figure=pd.Series(total,index=[range(-10,10)])
    print df_figure
    fg=df_figure.plot(kind='bar',table=True)
    plt.show(fg)
项目:Factory_Planning    作者:dimgold    | 项目源码 | 文件源码
def read_res(file): #read data from xls files
    Size, R, xr,xl,xc,yu,yd,yc = [] ,[], [],[] ,[], [],[],[] #lists for areas, Fij's,  deps cordinates
    out = pd.read_excel(file, sheetname = "Out") #read model results
    Rout = pd.read_excel(file, sheetname = "R") #read Fij's
    Sizeout = pd.read_excel(file, sheetname = "Size") #read deps wanted sizes
    Wout = pd.read_excel(file, sheetname = "W") # w1 and w2
    w1 = float(Wout['w1'][0])
    w2 = 1.0-w1
    totx = float(out['totx'][0]) #total length in x axis
    toty = float(out['toty'][0]) #total length in y axis
    for d in range(len(Sizeout)): #insert data results into python lists
        R.append([])
        Size.append(float(Sizeout['Area'][d]))
        xr.append(float(out['Xr'][d]))
        xl.append(float(out['Xl'][d]))
        xc.append((float(out['Xl'][d])+float(out['Xr'][d]))/2)
        yu.append(float(out['Yu'][d]))
        yd.append(float(out['Yd'][d]))   
        yc.append((float(out['Yu'][d])+float(out['Yd'][d]))/2)
        for i in range(len(Rout)):
            R[d].append(float(Rout.iloc[d,i]))
    return Size, R, totx, toty, xr,xl,xc,yu,yd,yc, w1, w2
项目:CUBAC    作者:usnistgov    | 项目源码 | 文件源码
def groups(ofname):
    df = pandas.read_excel('GC-VTPR.xlsx', sheetname='Groups')
    entries = []
    for i,row in df.iterrows():
        entry = {
          "Q_k": row['Qk'], 
          "R_k": row['Rk'], 
          "maingroup_name": row["main group name"], 
          "mgi": row['main group index'], 
          "sgi": row['sub group index'], 
          "subgroup_name": row["sub group name"]
        }
        entries.append(entry)

    with open(ofname, 'w') as fp:
        json.dump(entries, fp, indent = 2, sort_keys = True)
项目:CUBAC    作者:usnistgov    | 项目源码 | 文件源码
def interaction_parameters(ofname):
    df = pandas.read_excel('GC-VTPR.xlsx', sheetname='InteractionParameters')
    df = df.fillna(0.0)
    entries = []
    for i,row in df.iterrows():
        entry = {
            "a_ij": row['aij / K'],
            "a_ji": row['aji / K'],
            "b_ij": row['bij'],
            "b_ji": row['bji'],
            "c_ij": row['cij / K-1'], 
            "c_ji": row['cji / K-1'], 
            "mgi1": row['i'], 
            "mgi2": row['j']
        }
        entries.append(entry)

    with open(ofname, 'w') as fp:
        json.dump(entries, fp, indent = 2, sort_keys = True)
项目:ReducedVarianceReparamGradients    作者:andymiller    | 项目源码 | 文件源码
def _load_powerplant():
    """
    attribute information:

    features consist of hourly average ambient variables 
    - temperature (t) in the range 1.81 c and 37.11 c,
    - ambient pressure (ap) in the range 992.89-1033.30 millibar,
    - relative humidity (rh) in the range 25.56% to 100.16%
    - exhaust vacuum (v) in teh range 25.36-81.56 cm hg
    - net hourly electrical energy output (ep) 420.26-495.76 mw
    the averages are taken from various sensors located around the
    plant that record the ambient variables every second.
    the variables are given without normalization.
    """
    data_file = os.path.join(data_dir, 'power-plant/Folds5x2_pp.xlsx')
    data = pd.read_excel(data_file)
    x    = data.values[:, :-1]
    y    = data.values[:,  -1]
    return x, y
项目:statscraper    作者:jplusplus    | 项目源码 | 文件源码
def _fetch_data(self, dataset, query=None):
        files = [(y, m) for y in query['years'] for m in query['months']]
        frames = []

        # Download and clean every monthly Excel file
        for file in files:
            year, month = file
            url = self.BASE_URL.format(year=year, month=MONTHS[month])
            frame = self._clean_data(pd.read_excel(url), year, month)
            frames.append(frame)

        # Yield individual rows of type Result from the dataframe
        raw_data = pd.concat(frames)
        for i, row in raw_data.iterrows():
            val = row.pop('value')
            yield Result(val, json.loads(row.to_json()))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def get_exceldf(self, basename, *args, **kwds):
        """
        Return test data DataFrame. Test data path is defined by
        pandas.util.testing.get_data_path()

        Parameters
        ----------

        basename : str
            File base name, excluding file extension.

        Returns
        -------

        df : DataFrame
        """
        pth = os.path.join(self.dirpath, basename + self.ext)
        return read_excel(pth, *args, **kwds)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_read_one_empty_col_no_header(self):
        df = pd.DataFrame(
            [["", 1, 100],
             ["", 2, 200],
             ["", 3, 300],
             ["", 4, 400]]
        )
        with ensure_clean(self.ext) as path:
            df.to_excel(path, 'no_header', index=False, header=False)
            actual_header_none = read_excel(
                path,
                'no_header',
                parse_cols=[0],
                header=None
            )

            actual_header_zero = read_excel(
                path,
                'no_header',
                parse_cols=[0],
                header=0
            )
        expected = DataFrame()
        tm.assert_frame_equal(actual_header_none, expected)
        tm.assert_frame_equal(actual_header_zero, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_read_from_file_url(self):

        # FILE
        if sys.version_info[:2] < (2, 6):
            raise nose.SkipTest("file:// not supported with Python < 2.6")

        localtable = os.path.join(self.dirpath, 'test1' + self.ext)
        local_table = read_excel(localtable)

        try:
            url_table = read_excel('file://localhost/' + localtable)
        except URLError:
            # fails on some systems
            import platform
            raise nose.SkipTest("failing on %s" %
                                ' '.join(platform.uname()).strip())

        tm.assert_frame_equal(url_table, local_table)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_read_excel_skiprows_list(self):
        # GH 4903
        actual = pd.read_excel(os.path.join(self.dirpath,
                                            'testskiprows' + self.ext),
                               'skiprows_list', skiprows=[0, 2])
        expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True],
                              [2, 3.5, pd.Timestamp('2015-01-02'), False],
                              [3, 4.5, pd.Timestamp('2015-01-03'), False],
                              [4, 5.5, pd.Timestamp('2015-01-04'), True]],
                             columns=['a', 'b', 'c', 'd'])
        tm.assert_frame_equal(actual, expected)

        actual = pd.read_excel(os.path.join(self.dirpath,
                                            'testskiprows' + self.ext),
                               'skiprows_list', skiprows=np.array([0, 2]))
        tm.assert_frame_equal(actual, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_read_excel_squeeze(self):
        # GH 12157
        f = os.path.join(self.dirpath, 'test_squeeze' + self.ext)

        actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True)
        expected = pd.Series([2, 3, 4], [4, 5, 6], name='b')
        expected.index.name = 'a'
        tm.assert_series_equal(actual, expected)

        actual = pd.read_excel(f, 'two_columns', squeeze=True)
        expected = pd.DataFrame({'a': [4, 5, 6],
                                 'b': [2, 3, 4]})
        tm.assert_frame_equal(actual, expected)

        actual = pd.read_excel(f, 'one_column', squeeze=True)
        expected = pd.Series([1, 2, 3], name='a')
        tm.assert_series_equal(actual, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_int_types(self):
        _skip_if_no_xlrd()

        for np_type in (np.int8, np.int16, np.int32, np.int64):

            with ensure_clean(self.ext) as path:
                # Test np.int values read come back as int (rather than float
                # which is Excel's format).
                frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
                                  dtype=np_type)
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = read_excel(reader, 'test1')
                int_frame = frame.astype(np.int64)
                tm.assert_frame_equal(int_frame, recons)
                recons2 = read_excel(path, 'test1')
                tm.assert_frame_equal(int_frame, recons2)

                # test with convert_float=False comes back as float
                float_frame = frame.astype(float)
                recons = read_excel(path, 'test1', convert_float=False)
                tm.assert_frame_equal(recons, float_frame,
                                      check_index_type=False,
                                      check_column_type=False)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_sheets(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', columns=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # Test writing to separate sheets
            writer = ExcelWriter(path)
            self.frame.to_excel(writer, 'test1')
            self.tsframe.to_excel(writer, 'test2')
            writer.save()
            reader = ExcelFile(path)
            recons = read_excel(reader, 'test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            recons = read_excel(reader, 'test2', index_col=0)
            tm.assert_frame_equal(self.tsframe, recons)
            np.testing.assert_equal(2, len(reader.sheet_names))
            np.testing.assert_equal('test1', reader.sheet_names[0])
            np.testing.assert_equal('test2', reader.sheet_names[1])
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_colaliases(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', columns=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # column aliases
            col_aliases = Index(['AA', 'X', 'Y', 'Z'])
            self.frame2.to_excel(path, 'test1', header=col_aliases)
            reader = ExcelFile(path)
            rs = read_excel(reader, 'test1', index_col=0)
            xp = self.frame2.copy()
            xp.columns = col_aliases
            tm.assert_frame_equal(xp, rs)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_to_excel_multiindex(self):
        _skip_if_no_xlrd()

        frame = self.frame
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays,
                                           names=['first', 'second'])
        frame.index = new_index

        with ensure_clean(self.ext) as path:
            frame.to_excel(path, 'test1', header=False)
            frame.to_excel(path, 'test1', columns=['A', 'B'])

            # round trip
            frame.to_excel(path, 'test1', merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            df = read_excel(reader, 'test1', index_col=[0, 1],
                            parse_dates=False)
            tm.assert_frame_equal(frame, df)

    # Test for Issue 11328. If column indices are integers, make
    # sure they are handled correctly for either setting of
    # merge_cells
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_to_excel_multiindex_dates(self):
        _skip_if_no_xlrd()

        # try multiindex with dates
        tsframe = self.tsframe.copy()
        new_index = [tsframe.index, np.arange(len(tsframe.index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        with ensure_clean(self.ext) as path:
            tsframe.index.names = ['time', 'foo']
            tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = read_excel(reader, 'test1',
                                index_col=[0, 1])

            tm.assert_frame_equal(tsframe, recons)
            self.assertEqual(recons.index.names, ('time', 'foo'))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_to_excel_multiindex_no_write_index(self):
        _skip_if_no_xlrd()

        # Test writing and re-reading a MI witout the index. GH 5616.

        # Initial non-MI frame.
        frame1 = DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]})

        # Add a MI.
        frame2 = frame1.copy()
        multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)])
        frame2.index = multi_index

        with ensure_clean(self.ext) as path:

            # Write out to Excel without the index.
            frame2.to_excel(path, 'test1', index=False)

            # Read it back in.
            reader = ExcelFile(path)
            frame3 = read_excel(reader, 'test1')

            # Test that it is the same as the initial frame.
            tm.assert_frame_equal(frame1, frame3)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_datetimes(self):

        # Test writing and reading datetimes. For issue #9139. (xref #9185)
        _skip_if_no_xlrd()

        datetimes = [datetime(2013, 1, 13, 1, 2, 3),
                     datetime(2013, 1, 13, 2, 45, 56),
                     datetime(2013, 1, 13, 4, 29, 49),
                     datetime(2013, 1, 13, 6, 13, 42),
                     datetime(2013, 1, 13, 7, 57, 35),
                     datetime(2013, 1, 13, 9, 41, 28),
                     datetime(2013, 1, 13, 11, 25, 21),
                     datetime(2013, 1, 13, 13, 9, 14),
                     datetime(2013, 1, 13, 14, 53, 7),
                     datetime(2013, 1, 13, 16, 37, 0),
                     datetime(2013, 1, 13, 18, 20, 52)]

        with ensure_clean(self.ext) as path:
            write_frame = DataFrame.from_items([('A', datetimes)])
            write_frame.to_excel(path, 'Sheet1')
            read_frame = read_excel(path, 'Sheet1', header=0)

            tm.assert_series_equal(write_frame['A'], read_frame['A'])

    # GH7074
项目:forward    作者:yajun0601    | 项目源码 | 文件源码
def pingan_trust():
    result = get_total(start_date=START_DATE,end_date='2016-05-10')
#    issuers = pd.read_excel('????-SW??-????.xlsx',sheetname=[0], header = 0)[0]
#    issuers = my_db.getCompanyList()
    issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0]
    issuers.columns= ['name']
    focus = issuers.merge(result, on='name', how='left')

    focus = focus.sort_values('??',axis=0,ascending=False)
    import time
    time_str = time.strftime('%Y%m%d',time.localtime(time.time()))
    focus['rptDate']=time_str

    insert_into_db(focus)     

    report = focus.dropna(axis=0, how='any',thresh=3)
    report.to_excel("shixin_zhixing_bank.xlsx")
项目:forward    作者:yajun0601    | 项目源码 | 文件源码
def pingan_trust():
    result = get_total(start_date=START_DATE,end_date='2016-05-10')
#    issuers = pd.read_excel('????-SW??-????.xlsx',sheetname=[0], header = 0)[0]
#    issuers = my_db.getCompanyList()
    issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0]
    issuers.columns= ['name']
    focus = issuers.merge(result, on='name', how='left')

    focus = focus.sort_values('??',axis=0,ascending=False)
    import time
    time_str = time.strftime('%Y%m%d',time.localtime(time.time()))
    focus['rptDate']=time_str

    insert_into_db(focus)     

    report = focus.dropna(axis=0, how='any',thresh=3)
    report.to_excel("shixin_zhixing_bank.xlsx")
项目:forward    作者:yajun0601    | 项目源码 | 文件源码
def getModle():
    fixedEvaluation = pd.read_excel(MODLE_FILE_NAME,sheetname=[0], header = 0, skiprows = [0])
    industryTbl = pd.read_excel(MODLE_FILE_NAME,sheetname=[1], index_col = 2,parse_cols="B:L",header = 3)
    trendTbl = pd.read_excel(MODLE_FILE_NAME,sheetname=[2], header = 2,skiprows=[0])
    fluctuationTbl = pd.read_excel(MODLE_FILE_NAME,sheetname=[3], header = 2,skiprows=[0])
    fixedScoreTble = pd.read_excel(MODLE_FILE_NAME,sheetname=[4], header = 0,skiprows=[0])

    df=pd.read_excel(DATA_FILE,sheetname=[1], header = 0,index_col=0,verbose=True)
    df[1].head().index
    df[1].head().columns

    df[1].head().describe()
    df[1].head().loc[:,['????','?????']]

    for i in range(df[1].head().iloc[1].count()):
        print(df[1].head().iloc[1][i])

    head = df[1].head()    
    head.values[0][1:40].reshape(13,3)
项目:PythonPackages    作者:wanhanwan    | 项目源码 | 文件源码
def get_history_bar(field_names, start_date, end_date, **kwargs):
    field_info = pd.read_excel(argInfoWB,sheetname='????',engine='xlrd')
    if not isinstance(field_names,list):
        field_names = [field_names]
    # ?????????
    _l = []
    w.start()
    for fieldName in field_names:
        field_name = field_info[field_info['FactorName']==field_name]['FieldName'].iat[0]
        args = field_info[field_info['FactorName']==field_name]['Args'].iat[0]

        params = _parse_args(args,**kwargs)
        all_days = data_api.tc.get_trade_days(start_date, end_date)
        all_ids = data_api.get_history_ashare(all_days).index.levels[1].unique()

        data = w.wsd(
            list(map(tradecode_to_windcode, all_ids)), field_name, start_date, end_date, params)
        _l.append(_bar_to_dataframe(data))
    data = pd.concat(_l,axis=1)
    w.close()
    return data
项目:spice-hate_speech_detection    作者:futurice    | 项目源码 | 文件源码
def read_annotated_files(dirname):

    messages = []
    labels = np.zeros(0)

    filenames = glob.glob(os.path.join(dirname, '*.xls*'))

    for filename in filenames:
        print('Reading %s' % filename, end='. ', flush=True)

        df = pd.read_excel(filename)
        print("Found %d new samples" % df[df.LABEL.notnull()].shape[0])
        labels = np.hstack((labels, np.array(df[df.LABEL.notnull()].LABEL.tolist(),
                           dtype=int)))
        messages += df[df.LABEL.notnull()].text.tolist()


    return messages, labels
项目:pecos    作者:sandialabs    | 项目源码 | 文件源码
def setUp(self):
        trans = {
            'Linear': ['A'],
            'Random': ['B'],
            'Wave': ['C','D']}

        system_name = 'Simple'
        file_name = join(simpleexampledir,'simple.xlsx')

        df = pd.read_excel(file_name)
        self.pm = pecos.monitoring.PerformanceMonitoring()
        self.pm.add_dataframe(df, system_name)
        self.pm.add_translation_dictionary(trans, system_name)
        self.pm.check_timestamp(900)
        clock_time = self.pm.get_clock_time()
        time_filter = (clock_time > 3*3600) & (clock_time < 21*3600)
        self.pm.add_time_filter(time_filter)
项目:qtim_ROP    作者:QTIM-Lab    | 项目源码 | 文件源码
def unet_cross_val(data_dir, out_dir, mapping, splits, unet_conf):

    # Load spreadsheet
    with pd.ExcelFile(mapping) as xls:
        df = pd.read_excel(xls, 'Sheet1').set_index('index')
        df['class'] = df['class'].map({'preplus': 'pre-plus', 'normal': 'normal', 'plus': 'plus'})

    img_dir = join(data_dir, 'images')
    seg_dir = join(data_dir, 'manual_segmentations')
    mask_dir = join(data_dir, 'masks')

    # Check whether all images exist
    check_images_exist(df, img_dir, seg_dir, mask_dir)

    # Now split into training and testing
    CVFile = sio.loadmat(splits)

    # # Combining Pre-Plus and Plus
    # trainPlusIndex = CVFile['trainPlusIndex'][0]
    # testPlusIndex = CVFile['testPlusIndex'][0]
    #
    # plus_dir = make_sub_dir(out_dir, 'trainTestPlus')
    # print "Generating splits for combined No and Pre-Plus"
    # generate_splits(trainPlusIndex, testPlusIndex, df, img_dir, mask_dir, seg_dir, plus_dir)

    # Combining No and Pre-Plus
    trainPrePIndex = CVFile['trainPrePIndex'][0]
    testPrePIndex = CVFile['testPrePIndex'][0]

    prep_dir = make_sub_dir(out_dir, 'trainTestPreP')
    print "Generating splits for combined Pre-Plus and Plus"
    generate_splits(trainPrePIndex, testPrePIndex, df, img_dir, mask_dir, seg_dir, prep_dir)

    # Train models
    train_and_test(prep_dir, unet_conf, processes=1)
    # train_and_test(plus_dir, unet_conf, processes=2)
项目:qtim_ROP    作者:QTIM-Lab    | 项目源码 | 文件源码
def pca_augmentation(data_h5, excel_path):

    f = h5py.File(data_h5, 'r')

    df1 = pd.read_excel(excel_path, sheetname=0, header=1)
    df1 = df1.rename(columns=lambda x: x.strip()).set_index('Image')  # strip whitespace

    df2 = pd.read_excel(excel_path, sheetname=1, header=1)
    df2 = df2.rename(columns=lambda x: x.strip()).set_index('Image')  # strip whitespace
    df = pd.concat([df1, df2])

    X = preprocess_data(f)
    X_mean = np.mean(X, axis=0)
    X = X - X_mean

    # PCA
    pca = PCA().fit(X)
项目:qtpandas    作者:draperjames    | 项目源码 | 文件源码
def setDataFrameFromFile(self, filepath, **kwargs):
        """
        Sets the model's dataFrame by reading a file.
        Accepted file formats:
            - .xlsx (sheet1 is read unless specified in kwargs)
            - .csv (comma separated unless specified in kwargs)
            - .txt (any separator)

        :param filepath: (str)
            The path to the file to be read.
        :param kwargs:
            pandas.read_csv(**kwargs) or pandas.read_excel(**kwargs)
        :return: None
        """
        df = superReadFile(filepath, **kwargs)
        self.setDataFrame(df, filePath=filepath)
项目:smalldata    作者:dannguyen    | 项目源码 | 文件源码
def get_dataset_rows(meta):
    """

    if filename is a xlsx file, returns a list of list of dicts
    else: returns a list of dicts

    relies on get_dataset_local_filename() and meta['slug']

    right now wishing I had made this all OOP...
    """
    srcpath = meta['local_filepath']
    if meta['filetype'] == 'workbook':
        import pandas as pd
        sheetindices = list(range(len(meta['gsheet']['sheets'])))
        dfs = pd.read_excel(str(srcpath), sheetname=sheetindices)
        return [x.to_dict('records') for x in dfs.values()]
    else:
        # assume CSV
        return list(DictReader(srcpath.open('r')))
项目:Travelling-salesman-problem-using-Ant-colony-optimization    作者:Ahsaan-566    | 项目源码 | 文件源码
def initGraph(self, name):
        #file reading using pandas
        df = pd.read_excel(name, sheetname='Sheet1')
        dim = df['Dimension']
        x = df['X']
        y = df['Y']
        #print x,y
        self.numCities = len(dim)
        #set and fill the adjMatrix
        self.adjMatrix = [[1.0 for i in range(self.numCities)] for j in range(self.numCities) ]
        for i in range(self.numCities):
            for j in range(self.numCities):
        #fill the adjmatrix with city coordinates and calculate euclidean distances
                self.adjMatrix[i][j] = self.calEdge(x[i], x[j], y[i], y[j])

    #calculating edge weights using euclidean distances
项目:Book_DeepLearning_Practice    作者:wac81    | 项目源码 | 文件源码
def extract_lstm_test(dictionary, file_name, tag_num=CLASS_NUM, col_tag=0, col_content=1, length=MAX_LENGTH):
    contents = pd.read_excel(file_name, header=None)
    cw = lambda x: [word.encode('utf-8') for word in jieba.cut(x) if word not in stopwords and word.strip() != '' and word.encode('utf-8') in dictionary.index]
    contents['words'] = contents[col_content].apply(cw)
    get_sent = lambda x: list(dictionary['id'][x])
    contents['sent'] = contents['words'].apply(get_sent)  # ????,??????????
    print("Pad sequences (samples x time)")
    contents['sent'] = list(sequence.pad_sequences(contents['sent'], maxlen=length))
    x = np.array(list(contents['sent']))  # ???
    y = np.zeros((len(list(contents[col_tag])), tag_num))
    for i in range(len(list(contents[col_tag]))):
        for j in range(tag_num):
            if contents[col_tag][i] == j:
                y[i][j] = 1
    return x, y


# dictionary model ????
项目:Book_DeepLearning_Practice    作者:wac81    | 项目源码 | 文件源码
def extract_dictionary_feature(file_name, col_tag=0, col_content=1):
    # ????
    adv = codecs.open('./data/vocabulary/adv.txt', 'rb', encoding='utf-8').read().split('\n')
    inverse = codecs.open('./data/vocabulary/inverse.txt', 'rb', encoding='utf-8').read().split('\n')
    negdict = codecs.open('./data/vocabulary/negdict.txt', 'rb', encoding='utf-8').read().split('\n')
    posdict = codecs.open('./data/vocabulary/posdict.txt', 'rb', encoding='utf-8').read().split('\n')

    contents = pd.read_excel(file_name, header=None)

    print 'cut words...'
    cw = lambda x: [pair for pair in psg.lcut(x) if pair.word not in stopwords]
    contents['pairs'] = contents[col_content].apply(cw)
    matrix = reviews2matrix(list(contents['pairs']), posdict, negdict, inverse, adv)
    x = matrix2vec(matrix)
    y = list(contents[col_tag])
    return x, y
项目:spider    作者:devzhan    | 项目源码 | 文件源码
def read():
    df = pd.read_excel("jjs1.xlsx")
    data = list(df.ix[:, 7])
    prices=[]
    for item in data:
        if not str(item)=="nan":
            prices.append(int(item))
    print(prices[0:100])

    matplotlib.style.use('ggplot')#??ggplot??

    ts = pd.Series(prices[0:100], index=pd.date_range('1/1/2000', periods=100))
    plt.figure()
    df.plot.hist(alpha=0.5)
    plt.legend()
    plt.show()
    pass
项目:patriots    作者:wdxtub    | 项目源码 | 文件源码
def loadfile():
    neg=pd.read_excel(datadir + '/neg.xls',header=None,index=None)
    pos=pd.read_excel(datadir + '/pos.xls',header=None,index=None)

    cw = lambda x: list(jieba.cut(x))
    pos['words'] = pos[0].apply(cw)
    neg['words'] = neg[0].apply(cw)

    #print pos['words']
    #use 1 for positive sentiment, 0 for negative
    y = np.concatenate((np.ones(len(pos)), np.zeros(len(neg))))

    x_train, x_test, y_train, y_test = train_test_split(np.concatenate((pos['words'], neg['words'])), y, test_size=0.2)

    np.save(modeldir + '/y_train.npy',y_train)
    np.save(modeldir + '/y_test.npy',y_test)
    return x_train,x_test



#??????????????
项目:stock    作者:datablood    | 项目源码 | 文件源码
def get_hs300s():
    """
    ????300??????????
    Return
    --------
    DataFrame
        code :????
        name :????
        date :??
        weight:??
    """
    from tushare.stock.fundamental import get_stock_basics
    try:
        wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
                                                  ct.PAGES['hs300w']), parse_cols=[0, 3, 6])
        wt.columns = ct.FOR_CLASSIFY_W_COLS
        wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
        df = get_stock_basics()[['name']]
        df = df.reset_index()
        return pd.merge(df,wt)
    except Exception as er:
        print(str(er))
项目:stock    作者:datablood    | 项目源码 | 文件源码
def get_sz50s():
    """
    ????50???
    Return
    --------
    DataFrame
        code :????
        name :????
    """
    try:
        df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
                                                  ct.PAGES['sz50b']), parse_cols=[0,1])
        df.columns = ct.FOR_CLASSIFY_B_COLS
        df['code'] = df['code'].map(lambda x :str(x).zfill(6))
        return df
    except Exception as er:
        print(str(er))
项目:stock    作者:datablood    | 项目源码 | 文件源码
def get_zz500s():
    """
    ????500???
    Return
    --------
    DataFrame
        code :????
        name :????
    """
    from tushare.stock.fundamental import get_stock_basics
    try:
#         df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
#                                                   ct.PAGES['zz500b']), parse_cols=[0,1])
#         df.columns = ct.FOR_CLASSIFY_B_COLS
#         df['code'] = df['code'].map(lambda x :str(x).zfill(6))
        wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
                                                   ct.PAGES['zz500wt']), parse_cols=[0, 3, 6])
        wt.columns = ct.FOR_CLASSIFY_W_COLS
        wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
        df = get_stock_basics()[['name']]
        df = df.reset_index()
        return pd.merge(df,wt)
    except Exception as er:
        print(str(er))
项目:array_storage_benchmark    作者:mverleg    | 项目源码 | 文件源码
def load(self, pth):
        with open(pth, 'r') as fh:
            data = read_excel(fh, sheetname='data')
            print(data.as_matrix(columns=data.columns[1:]))
            return data.as_matrix()



#todo: pandas formats - http://pandas.pydata.org/pandas-docs/stable/io.html
# hdf5
# sql

#todo: hdf5 - http://stackoverflow.com/a/9619713/723090

#todo: bloscpack http://stackoverflow.com/a/22225337/723090

#todo: pytables