简介:本文详细介绍如何通过Python调用天眼查API实现企业信息自动化查询,涵盖API申请、请求封装、数据解析及异常处理全流程,提供可直接复用的代码示例和最佳实践建议。
天眼查作为国内领先的企业信息查询平台,其API接口为开发者提供了结构化的企业数据获取能力。通过API可获取包括但不限于企业基础信息(名称、法人、注册资本)、工商变更记录、司法风险信息、经营状况等核心数据。
天眼查提供两类主要API接口:
所有API调用需通过AppKey+AppSecret的双重认证:
import hashlibimport timedef generate_sign(app_secret, params):"""生成API请求签名"""sorted_params = sorted(params.items(), key=lambda x: x[0])param_str = "&".join([f"{k}={v}" for k, v in sorted_params])sign_str = f"{param_str}&{app_secret}"return hashlib.md5(sign_str.encode('utf-8')).hexdigest().upper()
pip install requests pandas jsonpath-rw
requests:处理HTTP请求pandas:数据结构化处理jsonpath-rw:复杂JSON解析
import requestsimport jsonfrom datetime import datetimeclass TianYanChaAPI:def __init__(self, app_key, app_secret):self.app_key = app_keyself.app_secret = app_secretself.base_url = "https://open.api.tianyancha.com"def _get_headers(self):return {"X-TYC-APP-KEY": self.app_key,"X-TYC-TIMESTAMP": str(int(datetime.now().timestamp() * 1000)),"Content-Type": "application/json"}def call_api(self, method, path, params=None):url = f"{self.base_url}{path}"params = params or {}params.update({"appKey": self.app_key,"timestamp": self._get_headers()["X-TYC-TIMESTAMP"]})params["sign"] = generate_sign(self.app_secret, params)response = requests.get(url, params=params, headers=self._get_headers())return self._handle_response(response)def _handle_response(self, response):try:data = response.json()if data.get("code") != 200:raise Exception(f"API Error: {data.get('message')}")return data["data"]except json.JSONDecodeError:raise Exception("Invalid JSON response")
def get_company_basic(api_client, company_name):"""查询企业基础信息"""path = "/services/open/ic/search/v5"params = {"keyword": company_name,"pageSize": 1}result = api_client.call_api("GET", path, params)if result and result.get("businessEntities"):return result["businessEntities"][0]return None
def get_change_records(api_client, company_id):"""获取企业工商变更记录"""path = f"/services/open/ic/changeinfo/v2/{company_id}"return api_client.call_api("GET", path)
def get_legal_risks(api_client, company_id):"""查询企业司法风险信息"""path = f"/services/open/ic/lawSuit/v4/{company_id}"return api_client.call_api("GET", path)
import concurrent.futuresdef batch_query(api_client, company_names):"""并发批量查询企业信息"""results = []with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:future_to_name = {executor.submit(get_company_basic, api_client, name): namefor name in company_names}for future in concurrent.futures.as_completed(future_to_name):name = future_to_name[future]try:results.append((name, future.result()))except Exception as exc:results.append((name, {"error": str(exc)}))return results
import sqlite3def save_to_db(company_data):"""将查询结果存入SQLite数据库"""conn = sqlite3.connect('company_data.db')cursor = conn.cursor()cursor.execute('''CREATE TABLE IF NOT EXISTS companies (id TEXT PRIMARY KEY,name TEXT,legal_person TEXT,reg_capital TEXT,est_date TEXT,status TEXT)''')for data in company_data:if data and "id" in data:cursor.execute('''INSERT OR REPLACE INTO companies VALUES (?,?,?,?,?,?)''', (data["id"],data["name"],data.get("legalPersonName"),data.get("regCapital"),data.get("estiblishTime"),data.get("companyStatus")))conn.commit()conn.close()
X-TYC-RATE-LIMIT头信息获取剩余配额def retry_api_call(api_func, max_retries=3):
for attempt in range(max_retries):
try:
return api_func()
except Exception as e:
if attempt == max_retries - 1:
raise
wait_time = min(2 ** attempt + random.uniform(0, 1), 10)
time.sleep(wait_time)
## 5.2 数据质量校验```pythondef validate_company_data(data):"""验证企业数据完整性"""required_fields = ["id", "name", "companyStatus"]missing = [field for field in required_fields if field not in data]if missing:raise ValueError(f"Missing required fields: {missing}")return True
class TYCAPIError(Exception):"""天眼查API基础异常类"""passclass RateLimitError(TYCAPIError):"""调用频率超限异常"""passclass AuthError(TYCAPIError):"""认证失败异常"""passdef handle_api_errors(response):if response.status_code == 403:raise AuthError("Invalid API credentials")elif response.status_code == 429:raise RateLimitError("API rate limit exceeded")elif response.status_code != 200:raise TYCAPIError(f"API request failed with status {response.status_code}")
if __name__ == "__main__":# 初始化API客户端api_client = TianYanChaAPI(app_key="your_app_key",app_secret="your_app_secret")# 查询单个企业company = get_company_basic(api_client, "阿里巴巴")if company:print(f"企业名称: {company['name']}")print(f"法人代表: {company.get('legalPersonName')}")print(f"注册资本: {company.get('regCapital')}")# 查询工商变更changes = get_change_records(api_client, company["id"])print(f"变更记录数: {len(changes.get('changeInfos', []))}")# 批量查询示例companies = ["腾讯", "百度", "字节跳动"]batch_results = batch_query(api_client, companies)for name, data in batch_results:print(f"{name}: {'成功' if data else '失败'}")
通过系统化的API调用和数据处理,Python开发者可以高效构建企业信息查询系统,为风险控制、商业分析等场景提供可靠的数据支持。实际开发中应持续关注天眼查API的版本更新和接口调整,确保系统的长期稳定性。