AKShare量化金融数据获取从入门到精通

1. AKShare简介与安装

1.1 什么是AKShare

AKShare是一个基于Python的开源财经数据接口库,提供超过2000个金融数据接口,覆盖股票、基金、期货、外汇、宏观经济等全品类数据。它采用统一的接口设计,让数据获取变得简单高效。

1.2 安装与环境配置

# 基础安装
pip install akshare

# 推荐安装(包含更多依赖)
pip install akshare[all]

# 升级到最新版本
pip install --upgrade akshare

1.3 快速入门示例

import akshare as ak

# 获取上证指数实时行情
stock_zh_index_spot_df = ak.stock_zh_index_spot(symbol="上证指数")
print(stock_zh_index_spot_df)

# 获取贵州茅台历史K线数据
stock_zh_a_hist_df = ak.stock_zh_a_hist(symbol="600519", period="daily", start_date="20200101", end_date="20241231", adjust="")
print(stock_zh_a_hist_df)

2. 基础数据获取

2.1 股票市场数据

2.1.1 实时行情数据

# 获取A股实时行情
a_share_spot = ak.stock_zh_a_spot()
print(a_share_spot.head())

# 获取指定股票实时行情
single_stock = ak.stock_zh_a_spot_em(symbol="600519")
print(single_stock)

2.1.2 历史K线数据

# 获取历史日线数据
hist_data = ak.stock_zh_a_hist(
    symbol="600519",
    period="daily",
    start_date="20200101",
    end_date="20241231",
    adjust="qfq"  # qfq:前复权, hfq:后复权, None:不复权
)
print(hist_data)

2.1.3 分时数据

# 获取分钟级数据
minute_data = ak.stock_zh_a_minute(symbol="600519", period="1", adjust="")
print(minute_data)

2.2 指数与板块数据

2.2.1 指数行情

# 获取指数实时行情
index_spot = ak.stock_zh_index_spot(symbol="000300")
print(index_spot)

# 获取指数历史数据
index_hist = ak.stock_zh_index_hist(symbol="000300", period="daily")
print(index_hist)

2.2.2 指数成分股

# 获取沪深300成分股
hs300_cons = ak.index_stock_cons(symbol="000300")
print(hs300_cons)

# 获取中证500成分股
zz500_cons = ak.index_stock_cons(symbol="000905")
print(zz500_cons)

3. 股票数据分析

3.1 基本面数据

3.1.1 财务报表

# 获取上市公司财务报表
finance_report = ak.stock_financial_report_sina(symbol="600519", report_type="资产负债表")
print(finance_report)

3.1.2 股本结构

# 获取股本结构数据
share_structure = ak.stock_zh_a_gdgs(symbol="600519")
print(share_structure)

3.2 技术指标分析

3.2.1 常用技术指标

# 计算MACD指标
def calculate_macd(df, fastperiod=12, slowperiod=26, signalperiod=9):
    df['EMA_FAST'] = df['收盘'].ewm(span=fastperiod).mean()
    df['EMA_SLOW'] = df['收盘'].ewm(span=slowperiod).mean()
    df['DIF'] = df['EMA_FAST'] - df['EMA_SLOW']
    df['DEA'] = df['DIF'].ewm(span=signalperiod).mean()
    df['MACD'] = (df['DIF'] - df['DEA']) * 2
    return df

# 应用技术指标
stock_data = ak.stock_zh_a_hist(symbol="600519", period="daily", start_date="20200101", end_date="20241231")
stock_data = calculate_macd(stock_data)
print(stock_data.tail())

3.2.2 均线系统

# 计算移动平均线
def add_moving_averages(df, windows=[5, 10, 20, 60]):
    for window in windows:
        df[f'MA{window}'] = df['收盘'].rolling(window=window).mean()
    return df

stock_data = add_moving_averages(stock_data)
print(stock_data.tail())

4. 基金与期货数据

4.1 基金数据获取

4.1.1 基金基本信息

# 获取基金基本信息
fund_info = ak.fund_em_open_fund_info(fund="000001", indicator="基本信息")
print(fund_info)

4.1.2 基金净值数据

# 获取基金历史净值
fund_hist = ak.fund_em_open_fund_daily(fund="000001", start_date="20200101", end_date="20241231")
print(fund_hist)

4.1.3 基金持仓

# 获取基金持仓数据
fund_portfolio = ak.fund_em_portfolio_fund(fund="000001")
print(fund_portfolio)

4.2 期货数据

4.2.1 期货实时行情

# 获取期货实时行情
futures_spot = ak.futures_hq_subscribe_exchange_symbol(symbol="CF", market="郑州商品交易所")
print(futures_spot)

4.2.2 期货历史数据

# 获取期货历史日线数据
futures_hist = ak.futures_zh_daily(symbol="CF0")
print(futures_hist)

4.2.3 期权数据

# 获取期权数据
option_data = ak.option_sse_codes()
print(option_data)

5. 宏观经济数据

5.1 国内经济指标

5.1.1 GDP数据

# 获取中国GDP数据
gdp_data = ak.macro_china_gdp()
print(gdp_data)

5.1.2 CPI与PPI

# 获取CPI数据
cpi_data = ak.macro_china_cpi()
print(cpi_data)

# 获取PPI数据
ppi_data = ak.macro_china_ppi()
print(ppi_data)

5.1.3 货币供应量

# 获取货币供应量数据
money_supply = ak.macro_china_money_supply()
print(money_supply)

5.2 国际经济数据

5.2.1 全球主要指数

# 获取全球主要股指
global_index = ak.index_global_hist(symbol="道琼斯工业平均指数")
print(global_index)

5.2.2 汇率数据

# 获取外汇牌价
exchange_rate = ak.currency_boc_safe()
print(exchange_rate)

# 获取实时汇率
realtime_exchange = ak.currency_exchange_market()
print(realtime_exchange)

6. 高级功能与技巧

6.1 批量数据获取

6.1.1 批量获取股票数据

def batch_get_stock_data(stock_list, start_date, end_date):
    """批量获取股票历史数据"""
    all_data = {}
    for stock in stock_list:
        try:
            print(f"正在获取 {stock} 数据...")
            data = ak.stock_zh_a_hist(symbol=stock, period="daily", 
                                    start_date=start_date, end_date=end_date, adjust="qfq")
            all_data[stock] = data
            # 防止请求过于频繁
            time.sleep(1)
        except Exception as e:
            print(f"获取 {stock} 数据失败: {e}")
    return all_data

# 使用示例
stock_list = ["600519", "000001", "300750"]
batch_data = batch_get_stock_data(stock_list, "20200101", "20241231")

6.1.2 并行数据获取

import concurrent.futures
import time

def get_single_stock_data(symbol, start_date, end_date):
    """获取单只股票数据"""
    try:
        data = ak.stock_zh_a_hist(symbol=symbol, period="daily",
                                start_date=start_date, end_date=end_date, adjust="qfq")
        return symbol, data
    except Exception as e:
        return symbol, None

def parallel_get_stock_data(stock_list, start_date, end_date, max_workers=3):
    """并行获取股票数据"""
    results = {}
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_stock = {
            executor.submit(get_single_stock_data, stock, start_date, end_date): stock 
            for stock in stock_list
        }
        
        for future in concurrent.futures.as_completed(future_to_stock):
            stock, data = future.result()
            if data is not None:
                results[stock] = data
            time.sleep(0.5)  # 控制请求频率
    
    return results

6.2 数据清洗与处理

6.2.1 数据质量检查

def check_data_quality(df):
    """检查数据质量"""
    quality_report = {
        'total_rows': len(df),
        'missing_values': df.isnull().sum().to_dict(),
        'duplicate_rows': df.duplicated().sum(),
        'date_range': (df['日期'].min(), df['日期'].max()) if '日期' in df.columns else None
    }
    return quality_report

# 使用示例
report = check_data_quality(stock_data)
print(report)

6.2.2 数据标准化

def standardize_column_names(df):
    """标准化列名"""
    column_mapping = {
        '日期': 'date',
        '开盘': 'open',
        '最高': 'high',
        '最低': 'low',
        '收盘': 'close',
        '成交量': 'volume',
        '成交额': 'amount'
    }
    
    df_renamed = df.rename(columns=column_mapping)
    return df_renamed

# 应用标准化
standardized_data = standardize_column_names(stock_data)
print(standardized_data.columns)

6.3 防风控策略

6.3.1 会话优化

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

def create_robust_session():
    """创建稳健的会话"""
    session = requests.Session()
    session.headers.update({
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
    })
    
    retry_strategy = Retry(
        total=3,
        backoff_factor=1,
        status_forcelist=[429, 500, 502, 503, 504],
    )
    
    adapter = HTTPAdapter(max_retries=retry_strategy)
    session.mount("https://", adapter)
    session.mount("http://", adapter)
    
    return session

# 注入到AKShare
ak._requests_session = create_robust_session()

6.3.2 智能重试机制

import time
import random

def smart_retry(func, max_retries=3, base_delay=1):
    """智能重试装饰器"""
    for attempt in range(max_retries):
        try:
            return func()
        except Exception as e:
            if attempt == max_retries - 1:
                raise e
            
            delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
            print(f"第 {attempt + 1} 次尝试失败,{delay:.2f}秒后重试...")
            time.sleep(delay)

# 使用示例
try:
    data = smart_retry(lambda: ak.stock_zh_a_hist("600519", "20200101", "20241231"))
except Exception as e:
    print(f"最终获取数据失败: {e}")

7. 实战案例分析

7.1 沪深300指数分析

7.1.1 数据获取与处理

# 获取沪深300历史数据
hs300_data = ak.stock_zh_index_hist(symbol="000300", period="daily")
hs300_data = hs300_data.set_index('日期')

# 计算收益率
hs300_data['return'] = hs300_data['收盘'].pct_change()
hs300_data['cum_return'] = (1 + hs300_data['return']).cumprod()

print(hs300_data.tail())

7.1.2 风险指标计算

import numpy as np

def calculate_risk_metrics(returns):
    """计算风险指标"""
    metrics = {
        '年化收益率': returns.mean() * 252,
        '年化波动率': returns.std() * np.sqrt(252),
        '夏普比率': (returns.mean() * 252) / (returns.std() * np.sqrt(252)) if returns.std() != 0 else 0,
        '最大回撤': (returns + 1).cumprod().max() - (returns + 1).cumprod().min(),
        '胜率': (returns > 0).sum() / len(returns) * 100
    }
    return metrics

risk_metrics = calculate_risk_metrics(hs300_data['return'].dropna())
print(risk_metrics)

7.2 多因子选股策略

7.2.1 因子数据获取

def get_factor_data(stock_list, start_date, end_date):
    """获取多因子数据"""
    factor_data = []
    
    for stock in stock_list:
        try:
            # 获取历史数据
            hist_data = ak.stock_zh_a_hist(symbol=stock, period="daily", 
                                         start_date=start_date, end_date=end_date, adjust="qfq")
            
            if len(hist_data) < 60:
                continue
                
            # 计算因子
            hist_data['return_5d'] = hist_data['收盘'].pct_change(5)
            hist_data['return_20d'] = hist_data['收盘'].pct_change(20)
            hist_data['volatility'] = hist_data['收盘'].pct_change().rolling(20).std()
            
            # 取最新一期数据
            latest_data = hist_data.iloc[-1].copy()
            latest_data['symbol'] = stock
            
            factor_data.append(latest_data)
            time.sleep(0.5)
            
        except Exception as e:
            print(f"处理 {stock} 时出错: {e}")
    
    return pd.DataFrame(factor_data)

# 获取因子数据
factor_df = get_factor_data(["600519", "000001", "300750", "601318"], "20200101", "20241231")
print(factor_df)

7.2.2 策略回测框架

class StrategyBacktester:
    def __init__(self, initial_capital=100000):
        self.initial_capital = initial_capital
        self.portfolio_value = initial_capital
        self.positions = {}
        self.trades = []
        self.equity_curve = []
    
    def add_position(self, symbol, quantity, price):
        """添加持仓"""
        if symbol not in self.positions:
            self.positions[symbol] = {'quantity': 0, 'avg_cost': 0}
        
        total_quantity = self.positions[symbol]['quantity'] + quantity
        total_cost = (self.positions[symbol]['quantity'] * self.positions[symbol]['avg_cost'] + 
                     quantity * price)
        
        self.positions[symbol] = {
            'quantity': total_quantity,
            'avg_cost': total_cost / total_quantity if total_quantity > 0 else 0
        }
        
        self.portfolio_value -= quantity * price
    
    def calculate_portfolio_value(self, current_prices):
        """计算投资组合价值"""
        value = self.portfolio_value
        
        for symbol, position in self.positions.items():
            if symbol in current_prices:
                value += position['quantity'] * current_prices[symbol]
        
        return value
    
    def backtest(self, data_dict, signal_func):
        """回测执行"""
        dates = sorted(set.union(*[set(df.index) for df in data_dict.values()]))
        
        for date in dates:
            # 生成交易信号
            signals = signal_func(date, data_dict)
            
            # 执行交易
            for symbol, signal in signals.items():
                if symbol not in data_dict or date not in data_dict[symbol].index:
                    continue
                
                price = data_dict[symbol].loc[date, '收盘']
                
                if signal == 1:  # 买入信号
                    # 简单策略:用10%资金买入
                    investment = self.portfolio_value * 0.1
                    quantity = int(investment / price)
                    if quantity > 0:
                        self.add_position(symbol, quantity, price)
                        self.trades.append({
                            'date': date,
                            'symbol': symbol,
                            'type': 'buy',
                            'price': price,
                            'quantity': quantity
                        })
                
                elif signal == -1:  # 卖出信号
                    if symbol in self.positions and self.positions[symbol]['quantity'] > 0:
                        quantity = self.positions[symbol]['quantity']
                        self.portfolio_value += quantity * price
                        self.trades.append({
                            'date': date,
                            'symbol': symbol,
                            'type': 'sell',
                            'price': price,
                            'quantity': quantity
                        })
                        self.positions[symbol]['quantity'] = 0
            
            # 记录净值曲线
            current_prices = {symbol: df.loc[date, '收盘'] if date in df.index else None 
                            for symbol, df in data_dict.items()}
            current_prices = {k: v for k, v in current_prices.items() if v is not None}
            
            portfolio_value = self.calculate_portfolio_value(current_prices)
            self.equity_curve.append({
                'date': date,
                'value': portfolio_value
            })
        
        return pd.DataFrame(self.equity_curve).set_index('date')

# 简单动量策略信号函数
def momentum_signal(date, data_dict):
    signals = {}
    
    for symbol, df in data_dict.items():
        if date not in df.index or len(df.loc[:date]) < 60:
            continue
        
        # 60日动量因子
        recent_data = df.loc[:date].tail(60)
        momentum = (recent_data['收盘'].iloc[-1] / recent_data['收盘'].iloc[0]) - 1
        
        if momentum > 0.1:  # 正动量
            signals[symbol] = 1
        elif momentum < -0.1:  # 负动量
            signals[symbol] = -1
    
    return signals

# 执行回测
data_dict = {
    '600519': ak.stock_zh_a_hist("600519", "20200101", "20241231").set_index('日期'),
    '000001': ak.stock_zh_a_hist("000001", "20200101", "20241231").set_index('日期')
}

backtester = StrategyBacktester()
equity_curve = backtester.backtest(data_dict, momentum_signal)
print(equity_curve.tail())

7.3 量化投资组合构建

7.3.1 马科维茨均值方差模型

import numpy as np
from scipy.optimize import minimize

def portfolio_optimization(returns_df, risk_free_rate=0.03):
    """投资组合优化"""
    # 计算期望收益率和协方差矩阵
    mu = returns_df.mean() * 252  # 年化收益率
    Sigma = returns_df.cov() * 2

AKShare官网:https://www.akshare.xyz/
‌GitHub/GitCode仓库‌:https://gitcode.com/gh_mirrors/aks/akshare
‌完整接口文档‌:https://www.akshare.xyz/tutorial.html

posted @ 2026-05-01 15:42  嗨_放飞梦想  阅读(1610)  评论(2)    收藏  举报