AKShare量化金融数据获取从入门到精通
1. AKShare简介与安装
1.1 什么是AKShare
AKShare是一个基于Python的开源财经数据接口库,提供超过2000个金融数据接口,覆盖股票、基金、期货、外汇、宏观经济等全品类数据。它采用统一的接口设计,让数据获取变得简单高效。
1.2 安装与环境配置
# 基础安装
pip install akshare
# 推荐安装(包含更多依赖)
pip install akshare[all]
# 升级到最新版本
pip install --upgrade akshare
1.3 快速入门示例
import akshare as ak
# 获取上证指数实时行情
stock_zh_index_spot_df = ak.stock_zh_index_spot(symbol="上证指数")
print(stock_zh_index_spot_df)
# 获取贵州茅台历史K线数据
stock_zh_a_hist_df = ak.stock_zh_a_hist(symbol="600519", period="daily", start_date="20200101", end_date="20241231", adjust="")
print(stock_zh_a_hist_df)
2. 基础数据获取
2.1 股票市场数据
2.1.1 实时行情数据
# 获取A股实时行情
a_share_spot = ak.stock_zh_a_spot()
print(a_share_spot.head())
# 获取指定股票实时行情
single_stock = ak.stock_zh_a_spot_em(symbol="600519")
print(single_stock)
2.1.2 历史K线数据
# 获取历史日线数据
hist_data = ak.stock_zh_a_hist(
symbol="600519",
period="daily",
start_date="20200101",
end_date="20241231",
adjust="qfq" # qfq:前复权, hfq:后复权, None:不复权
)
print(hist_data)
2.1.3 分时数据
# 获取分钟级数据
minute_data = ak.stock_zh_a_minute(symbol="600519", period="1", adjust="")
print(minute_data)
2.2 指数与板块数据
2.2.1 指数行情
# 获取指数实时行情
index_spot = ak.stock_zh_index_spot(symbol="000300")
print(index_spot)
# 获取指数历史数据
index_hist = ak.stock_zh_index_hist(symbol="000300", period="daily")
print(index_hist)
2.2.2 指数成分股
# 获取沪深300成分股
hs300_cons = ak.index_stock_cons(symbol="000300")
print(hs300_cons)
# 获取中证500成分股
zz500_cons = ak.index_stock_cons(symbol="000905")
print(zz500_cons)
3. 股票数据分析
3.1 基本面数据
3.1.1 财务报表
# 获取上市公司财务报表
finance_report = ak.stock_financial_report_sina(symbol="600519", report_type="资产负债表")
print(finance_report)
3.1.2 股本结构
# 获取股本结构数据
share_structure = ak.stock_zh_a_gdgs(symbol="600519")
print(share_structure)
3.2 技术指标分析
3.2.1 常用技术指标
# 计算MACD指标
def calculate_macd(df, fastperiod=12, slowperiod=26, signalperiod=9):
df['EMA_FAST'] = df['收盘'].ewm(span=fastperiod).mean()
df['EMA_SLOW'] = df['收盘'].ewm(span=slowperiod).mean()
df['DIF'] = df['EMA_FAST'] - df['EMA_SLOW']
df['DEA'] = df['DIF'].ewm(span=signalperiod).mean()
df['MACD'] = (df['DIF'] - df['DEA']) * 2
return df
# 应用技术指标
stock_data = ak.stock_zh_a_hist(symbol="600519", period="daily", start_date="20200101", end_date="20241231")
stock_data = calculate_macd(stock_data)
print(stock_data.tail())
3.2.2 均线系统
# 计算移动平均线
def add_moving_averages(df, windows=[5, 10, 20, 60]):
for window in windows:
df[f'MA{window}'] = df['收盘'].rolling(window=window).mean()
return df
stock_data = add_moving_averages(stock_data)
print(stock_data.tail())
4. 基金与期货数据
4.1 基金数据获取
4.1.1 基金基本信息
# 获取基金基本信息
fund_info = ak.fund_em_open_fund_info(fund="000001", indicator="基本信息")
print(fund_info)
4.1.2 基金净值数据
# 获取基金历史净值
fund_hist = ak.fund_em_open_fund_daily(fund="000001", start_date="20200101", end_date="20241231")
print(fund_hist)
4.1.3 基金持仓
# 获取基金持仓数据
fund_portfolio = ak.fund_em_portfolio_fund(fund="000001")
print(fund_portfolio)
4.2 期货数据
4.2.1 期货实时行情
# 获取期货实时行情
futures_spot = ak.futures_hq_subscribe_exchange_symbol(symbol="CF", market="郑州商品交易所")
print(futures_spot)
4.2.2 期货历史数据
# 获取期货历史日线数据
futures_hist = ak.futures_zh_daily(symbol="CF0")
print(futures_hist)
4.2.3 期权数据
# 获取期权数据
option_data = ak.option_sse_codes()
print(option_data)
5. 宏观经济数据
5.1 国内经济指标
5.1.1 GDP数据
# 获取中国GDP数据
gdp_data = ak.macro_china_gdp()
print(gdp_data)
5.1.2 CPI与PPI
# 获取CPI数据
cpi_data = ak.macro_china_cpi()
print(cpi_data)
# 获取PPI数据
ppi_data = ak.macro_china_ppi()
print(ppi_data)
5.1.3 货币供应量
# 获取货币供应量数据
money_supply = ak.macro_china_money_supply()
print(money_supply)
5.2 国际经济数据
5.2.1 全球主要指数
# 获取全球主要股指
global_index = ak.index_global_hist(symbol="道琼斯工业平均指数")
print(global_index)
5.2.2 汇率数据
# 获取外汇牌价
exchange_rate = ak.currency_boc_safe()
print(exchange_rate)
# 获取实时汇率
realtime_exchange = ak.currency_exchange_market()
print(realtime_exchange)
6. 高级功能与技巧
6.1 批量数据获取
6.1.1 批量获取股票数据
def batch_get_stock_data(stock_list, start_date, end_date):
"""批量获取股票历史数据"""
all_data = {}
for stock in stock_list:
try:
print(f"正在获取 {stock} 数据...")
data = ak.stock_zh_a_hist(symbol=stock, period="daily",
start_date=start_date, end_date=end_date, adjust="qfq")
all_data[stock] = data
# 防止请求过于频繁
time.sleep(1)
except Exception as e:
print(f"获取 {stock} 数据失败: {e}")
return all_data
# 使用示例
stock_list = ["600519", "000001", "300750"]
batch_data = batch_get_stock_data(stock_list, "20200101", "20241231")
6.1.2 并行数据获取
import concurrent.futures
import time
def get_single_stock_data(symbol, start_date, end_date):
"""获取单只股票数据"""
try:
data = ak.stock_zh_a_hist(symbol=symbol, period="daily",
start_date=start_date, end_date=end_date, adjust="qfq")
return symbol, data
except Exception as e:
return symbol, None
def parallel_get_stock_data(stock_list, start_date, end_date, max_workers=3):
"""并行获取股票数据"""
results = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_stock = {
executor.submit(get_single_stock_data, stock, start_date, end_date): stock
for stock in stock_list
}
for future in concurrent.futures.as_completed(future_to_stock):
stock, data = future.result()
if data is not None:
results[stock] = data
time.sleep(0.5) # 控制请求频率
return results
6.2 数据清洗与处理
6.2.1 数据质量检查
def check_data_quality(df):
"""检查数据质量"""
quality_report = {
'total_rows': len(df),
'missing_values': df.isnull().sum().to_dict(),
'duplicate_rows': df.duplicated().sum(),
'date_range': (df['日期'].min(), df['日期'].max()) if '日期' in df.columns else None
}
return quality_report
# 使用示例
report = check_data_quality(stock_data)
print(report)
6.2.2 数据标准化
def standardize_column_names(df):
"""标准化列名"""
column_mapping = {
'日期': 'date',
'开盘': 'open',
'最高': 'high',
'最低': 'low',
'收盘': 'close',
'成交量': 'volume',
'成交额': 'amount'
}
df_renamed = df.rename(columns=column_mapping)
return df_renamed
# 应用标准化
standardized_data = standardize_column_names(stock_data)
print(standardized_data.columns)
6.3 防风控策略
6.3.1 会话优化
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
def create_robust_session():
"""创建稳健的会话"""
session = requests.Session()
session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
})
retry_strategy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[429, 500, 502, 503, 504],
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("https://", adapter)
session.mount("http://", adapter)
return session
# 注入到AKShare
ak._requests_session = create_robust_session()
6.3.2 智能重试机制
import time
import random
def smart_retry(func, max_retries=3, base_delay=1):
"""智能重试装饰器"""
for attempt in range(max_retries):
try:
return func()
except Exception as e:
if attempt == max_retries - 1:
raise e
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
print(f"第 {attempt + 1} 次尝试失败,{delay:.2f}秒后重试...")
time.sleep(delay)
# 使用示例
try:
data = smart_retry(lambda: ak.stock_zh_a_hist("600519", "20200101", "20241231"))
except Exception as e:
print(f"最终获取数据失败: {e}")
7. 实战案例分析
7.1 沪深300指数分析
7.1.1 数据获取与处理
# 获取沪深300历史数据
hs300_data = ak.stock_zh_index_hist(symbol="000300", period="daily")
hs300_data = hs300_data.set_index('日期')
# 计算收益率
hs300_data['return'] = hs300_data['收盘'].pct_change()
hs300_data['cum_return'] = (1 + hs300_data['return']).cumprod()
print(hs300_data.tail())
7.1.2 风险指标计算
import numpy as np
def calculate_risk_metrics(returns):
"""计算风险指标"""
metrics = {
'年化收益率': returns.mean() * 252,
'年化波动率': returns.std() * np.sqrt(252),
'夏普比率': (returns.mean() * 252) / (returns.std() * np.sqrt(252)) if returns.std() != 0 else 0,
'最大回撤': (returns + 1).cumprod().max() - (returns + 1).cumprod().min(),
'胜率': (returns > 0).sum() / len(returns) * 100
}
return metrics
risk_metrics = calculate_risk_metrics(hs300_data['return'].dropna())
print(risk_metrics)
7.2 多因子选股策略
7.2.1 因子数据获取
def get_factor_data(stock_list, start_date, end_date):
"""获取多因子数据"""
factor_data = []
for stock in stock_list:
try:
# 获取历史数据
hist_data = ak.stock_zh_a_hist(symbol=stock, period="daily",
start_date=start_date, end_date=end_date, adjust="qfq")
if len(hist_data) < 60:
continue
# 计算因子
hist_data['return_5d'] = hist_data['收盘'].pct_change(5)
hist_data['return_20d'] = hist_data['收盘'].pct_change(20)
hist_data['volatility'] = hist_data['收盘'].pct_change().rolling(20).std()
# 取最新一期数据
latest_data = hist_data.iloc[-1].copy()
latest_data['symbol'] = stock
factor_data.append(latest_data)
time.sleep(0.5)
except Exception as e:
print(f"处理 {stock} 时出错: {e}")
return pd.DataFrame(factor_data)
# 获取因子数据
factor_df = get_factor_data(["600519", "000001", "300750", "601318"], "20200101", "20241231")
print(factor_df)
7.2.2 策略回测框架
class StrategyBacktester:
def __init__(self, initial_capital=100000):
self.initial_capital = initial_capital
self.portfolio_value = initial_capital
self.positions = {}
self.trades = []
self.equity_curve = []
def add_position(self, symbol, quantity, price):
"""添加持仓"""
if symbol not in self.positions:
self.positions[symbol] = {'quantity': 0, 'avg_cost': 0}
total_quantity = self.positions[symbol]['quantity'] + quantity
total_cost = (self.positions[symbol]['quantity'] * self.positions[symbol]['avg_cost'] +
quantity * price)
self.positions[symbol] = {
'quantity': total_quantity,
'avg_cost': total_cost / total_quantity if total_quantity > 0 else 0
}
self.portfolio_value -= quantity * price
def calculate_portfolio_value(self, current_prices):
"""计算投资组合价值"""
value = self.portfolio_value
for symbol, position in self.positions.items():
if symbol in current_prices:
value += position['quantity'] * current_prices[symbol]
return value
def backtest(self, data_dict, signal_func):
"""回测执行"""
dates = sorted(set.union(*[set(df.index) for df in data_dict.values()]))
for date in dates:
# 生成交易信号
signals = signal_func(date, data_dict)
# 执行交易
for symbol, signal in signals.items():
if symbol not in data_dict or date not in data_dict[symbol].index:
continue
price = data_dict[symbol].loc[date, '收盘']
if signal == 1: # 买入信号
# 简单策略:用10%资金买入
investment = self.portfolio_value * 0.1
quantity = int(investment / price)
if quantity > 0:
self.add_position(symbol, quantity, price)
self.trades.append({
'date': date,
'symbol': symbol,
'type': 'buy',
'price': price,
'quantity': quantity
})
elif signal == -1: # 卖出信号
if symbol in self.positions and self.positions[symbol]['quantity'] > 0:
quantity = self.positions[symbol]['quantity']
self.portfolio_value += quantity * price
self.trades.append({
'date': date,
'symbol': symbol,
'type': 'sell',
'price': price,
'quantity': quantity
})
self.positions[symbol]['quantity'] = 0
# 记录净值曲线
current_prices = {symbol: df.loc[date, '收盘'] if date in df.index else None
for symbol, df in data_dict.items()}
current_prices = {k: v for k, v in current_prices.items() if v is not None}
portfolio_value = self.calculate_portfolio_value(current_prices)
self.equity_curve.append({
'date': date,
'value': portfolio_value
})
return pd.DataFrame(self.equity_curve).set_index('date')
# 简单动量策略信号函数
def momentum_signal(date, data_dict):
signals = {}
for symbol, df in data_dict.items():
if date not in df.index or len(df.loc[:date]) < 60:
continue
# 60日动量因子
recent_data = df.loc[:date].tail(60)
momentum = (recent_data['收盘'].iloc[-1] / recent_data['收盘'].iloc[0]) - 1
if momentum > 0.1: # 正动量
signals[symbol] = 1
elif momentum < -0.1: # 负动量
signals[symbol] = -1
return signals
# 执行回测
data_dict = {
'600519': ak.stock_zh_a_hist("600519", "20200101", "20241231").set_index('日期'),
'000001': ak.stock_zh_a_hist("000001", "20200101", "20241231").set_index('日期')
}
backtester = StrategyBacktester()
equity_curve = backtester.backtest(data_dict, momentum_signal)
print(equity_curve.tail())
7.3 量化投资组合构建
7.3.1 马科维茨均值方差模型
import numpy as np
from scipy.optimize import minimize
def portfolio_optimization(returns_df, risk_free_rate=0.03):
"""投资组合优化"""
# 计算期望收益率和协方差矩阵
mu = returns_df.mean() * 252 # 年化收益率
Sigma = returns_df.cov() * 2
AKShare官网:https://www.akshare.xyz/
GitHub/GitCode仓库:https://gitcode.com/gh_mirrors/aks/akshare
完整接口文档:https://www.akshare.xyz/tutorial.html

浙公网安备 33010602011771号