Python SDK
The official Python SDK for SearchHive APIs. Get started quickly with type-safe, async-ready Python bindings for SwiftSearch, ScrapeForge, and DeepDive.
Type Safe
Full type hints and IDE support
Async Ready
Built-in async/await support
Error Handling
Comprehensive error types
LangChain Ready
Pre-built LangChain tools
Installation
Install the Python SDK
# Using pip
pip install searchhive-python
# Using conda
conda install -c conda-forge searchhive-python
# Update to latest version
pip install --upgrade searchhive-python
Requirements
Quick Start
Get up and running with all three APIs in minutes.
Quick start example covering all APIs
from searchhive import SearchHive
# Initialize client with your API key
client = SearchHive(api_key="sk_live_your_key_here")
# SwiftSearch: Real-time web search
search_results = client.swift_search(
query="Bitcoin price 2025 predictions",
max_results=10,
auto_scrape_top=3
)
print(f"Found {len(search_results.search_results)} results")
print(f"Credits used: {search_results.credits_used}")
# Access results
for result in search_results.search_results:
print(f"Title: {result.title}")
print(f"URL: {result.link}")
print(f"Snippet: {result.snippet}\n")
# ScrapeForge: Extract content from URLs
scraped_data = client.scrape_forge(
url="https://coindesk.com/price/bitcoin",
extract_options=["title", "text", "metadata"]
)
print(f"Page title: {scraped_data.primary_content.title}")
print(f"Content length: {len(scraped_data.primary_content.text)}")
# DeepDive: AI-powered research
research = client.deep_dive(
topic="cryptocurrency market trends 2025",
max_sources=5,
generate_summary=True # Requires Builder plan
)
print(f"Analyzed {research.sources_analyzed} sources")
if research.ai_summary:
print(f"AI Summary: {research.ai_summary}")
Client Configuration
Client configuration options
from searchhive import SearchHive
# Basic configuration
client = SearchHive(
api_key="sk_live_your_key_here"
)
# Advanced configuration
client = SearchHive(
api_key="sk_live_your_key_here",
base_url="https://www.searchhive.dev/api", # Custom endpoint
timeout=30, # Request timeout in seconds
max_retries=3, # Retry failed requests
retry_delay=1.0, # Delay between retries
verify_ssl=True, # SSL verification
user_agent="MyApp/1.0", # Custom user agent
)
# Environment-based configuration (recommended)
import os
client = SearchHive() # Will use SEARCHHIVE_API_KEY env var
# Check client status
status = client.get_status()
print(f"Credits remaining: {status.credits_remaining}")
print(f"Plan: {status.plan}")
print(f"Rate limit: {status.rate_limit_remaining}/hour")
SwiftSearch API
Real-time web search with automatic content extraction and data enrichment.
SwiftSearch examples
# SwiftSearch examples
# Basic search
results = client.swift_search("Python web scraping tutorial")
# Advanced search with all features
results = client.swift_search(
query="AI startups funding 2025",
max_results=20,
auto_scrape_top=5, # Scrape top 5 results
include_contacts=True, # Extract contact info
include_social=True, # Find social profiles
freshness="24h" # Recent results only
)
# Access different result types
print("Search Results:")
for result in results.search_results:
print(f"- {result.title} ({result.link})")
print("\nScraped Content:")
for content in results.scraped_content:
if not content.error:
print(f"- {content.title}: {len(content.text)} chars")
print("\nContacts Found:")
for contact in results.contacts:
print(f"- {contact.name} ({contact.email}) at {contact.company}")
print("\nSocial Profiles:")
for profile in results.social_profiles:
print(f"- {profile.platform}: {profile.url}")
# Handle errors gracefully
try:
results = client.swift_search("test query")
except client.APIError as e:
print(f"API Error: {e}")
except client.RateLimitError as e:
print(f"Rate limited: {e}")
except client.InsufficientCreditsError as e:
print(f"Out of credits: {e}")
ScrapeForge API
Enterprise web scraping that bypasses protections and extracts structured data.
ScrapeForge examples
# ScrapeForge examples
# Basic web scraping
scraped = client.scrape_forge(
url="https://example.com/article"
)
# Advanced scraping with options
scraped = client.scrape_forge(
url="https://news.ycombinator.com",
extract_options=["title", "text", "links", "images", "metadata"],
follow_internal_links=True, # Requires Unicorn plan
max_depth=2,
max_pages=10,
include_contacts=True,
render_js=True, # Enable JavaScript rendering
wait_time=3 # Wait 3 seconds after page load
)
# Access extracted content
content = scraped.primary_content
print(f"Title: {content.title}")
print(f"Text: {content.text[:500]}...") # First 500 chars
if content.links:
print(f"Found {len(content.links)} links:")
for link in content.links[:5]: # First 5 links
print(f"- {link.text}: {link.url}")
if content.images:
print(f"Found {len(content.images)} images:")
for img in content.images[:3]:
print(f"- {img.alt}: {img.src}")
# Handle discovered links (if following enabled)
if scraped.discovered_links:
print(f"\nDiscovered {len(scraped.discovered_links)} additional pages:")
for link_content in scraped.discovered_links:
if not link_content.error:
print(f"- {link_content.title}")
# Batch scraping multiple URLs
urls = [
"https://example.com/page1",
"https://example.com/page2",
"https://example.com/page3"
]
batch_results = client.scrape_batch(urls)
for i, result in enumerate(batch_results):
print(f"Page {i+1}: {result.primary_content.title if result.primary_content.title else 'Failed'}")
DeepDive API
AI-powered research across multiple sources with summarization and analysis.
DeepDive research examples
# DeepDive research examples
# Basic research
research = client.deep_dive(
topic="quantum computing breakthroughs 2025"
)
# Comprehensive research with AI features
research = client.deep_dive(
topic="sustainable energy investment opportunities",
max_sources=10,
generate_summary=True, # AI-generated summary
include_social_mentions=True, # Social media analysis
research_depth="comprehensive", # deep, comprehensive, expert
include_citations=True # Source citations
)
# Access research results
print(f"Research Topic: {research.topic}")
print(f"Sources Analyzed: {research.sources_analyzed}")
print(f"Research Depth: {research.research_depth}")
# Search results that were analyzed
print("\nKey Sources:")
for result in research.search_results:
print(f"- {result.title} ({result.link})")
# Scraped and analyzed content
print("\nContent Analysis:")
for content in research.scraped_content[:3]: # First 3
if not content.error:
summary = content.text[:200] + "..." if len(content.text) > 200 else content.text
print(f"- {content.title}:\n {summary}\n")
# AI-generated insights (Builder plan+)
if research.ai_summary:
print(f"AI Summary:\n{research.ai_summary}")
# Social media mentions (Builder plan+)
if research.social_mentions:
print(f"\nSocial Mentions ({len(research.social_mentions)}):")
for mention in research.social_mentions[:3]:
print(f"- {mention.platform}: {mention.text[:100]}...")
# Citations for academic/professional use
if research.citations:
print("\nCitations:")
for citation in research.citations:
print(f"- {citation.apa_format}")
Async Support
High-performance async/await support for concurrent requests and better scalability.
Async usage patterns
import asyncio
from searchhive import AsyncSearchHive
async def main():
# Async client for high-performance applications
client = AsyncSearchHive(api_key="sk_live_your_key_here")
try:
# Async search
results = await client.swift_search("Python async tutorial")
print(f"Found {len(results.search_results)} results")
# Concurrent requests
search_task = client.swift_search("AI news")
scrape_task = client.scrape_forge("https://example.com")
research_task = client.deep_dive("machine learning trends")
# Wait for all to complete
search_res, scrape_res, research_res = await asyncio.gather(
search_task, scrape_task, research_task
)
print(f"Search: {len(search_res.search_results)} results")
print(f"Scrape: {scrape_res.primary_content.title}")
print(f"Research: {research_res.sources_analyzed} sources")
finally:
await client.close()
# Run the async function
asyncio.run(main())
# Or use context manager (recommended)
async def with_context_manager():
async with AsyncSearchHive(api_key="sk_live_your_key_here") as client:
results = await client.swift_search("test query")
return results
Error Handling
Comprehensive error handling
from searchhive import SearchHive
from searchhive.exceptions import (
APIError,
AuthenticationError,
RateLimitError,
InsufficientCreditsError,
ValidationError,
NotFoundError
)
client = SearchHive(api_key="sk_live_your_key_here")
try:
results = client.swift_search("test query")
except AuthenticationError:
print("Invalid API key - check your credentials")
except InsufficientCreditsError as e:
print(f"Out of credits: {e.credits_needed} needed, {e.credits_available} available")
print(f"Upgrade at: {e.upgrade_url}")
except RateLimitError as e:
print(f"Rate limited. Retry after {e.retry_after} seconds")
except ValidationError as e:
print(f"Invalid parameters: {e.errors}")
except NotFoundError:
print("Resource not found")
except APIError as e:
print(f"API error ({e.status_code}): {e.message}")
except Exception as e:
print(f"Unexpected error: {e}")
# Retry logic with exponential backoff
import time
import random
def search_with_retry(query, max_retries=3):
for attempt in range(max_retries):
try:
return client.swift_search(query)
except RateLimitError as e:
if attempt < max_retries - 1:
wait_time = (2 ** attempt) + random.uniform(0, 1)
print(f"Rate limited, retrying in {wait_time:.1f}s...")
time.sleep(wait_time)
else:
raise
except APIError as e:
if e.status_code >= 500 and attempt < max_retries - 1:
wait_time = (2 ** attempt) + random.uniform(0, 1)
print(f"Server error, retrying in {wait_time:.1f}s...")
time.sleep(wait_time)
else:
raise
# Usage
try:
results = search_with_retry("Bitcoin price analysis")
print(f"Success! Found {len(results.search_results)} results")
except Exception as e:
print(f"All retries failed: {e}")
Data Models
All response objects are fully typed with Pydantic models for better IDE support and validation.
Response objects and data models
# Data models and response objects
# SwiftSearch response
search_results = client.swift_search("test query")
# Individual search result
result = search_results.search_results[0]
print(f"Title: {result.title}") # str
print(f"Link: {result.link}") # str
print(f"Snippet: {result.snippet}") # str
print(f"Position: {result.position}") # int
print(f"Date: {result.date}") # Optional[datetime]
print(f"Source: {result.source}") # str
# Scraped content
if search_results.scraped_content:
content = search_results.scraped_content[0]
print(f"URL: {content.url}") # str
print(f"Title: {content.title}") # str
print(f"Text: {content.text}") # str
print(f"Links: {content.links}") # List[Link]
print(f"Images: {content.images}") # List[Image]
print(f"Error: {content.error}") # Optional[str]
# Contact information
if search_results.contacts:
contact = search_results.contacts[0]
print(f"Name: {contact.name}") # str
print(f"Email: {contact.email}") # str
print(f"Phone: {contact.phone}") # Optional[str]
print(f"Company: {contact.company}") # Optional[str]
print(f"Title: {contact.title}") # Optional[str]
print(f"Source: {contact.source_url}") # str
# Social profiles
if search_results.social_profiles:
profile = search_results.social_profiles[0]
print(f"Platform: {profile.platform}") # str (twitter, linkedin, etc.)
print(f"URL: {profile.url}") # str
print(f"Username: {profile.username}") # str
print(f"Followers: {profile.followers}") # Optional[int]
# Response metadata
print(f"Credits used: {search_results.credits_used}")
print(f"Remaining: {search_results.remaining_credits}")
print(f"Request ID: {search_results.request_id}")
print(f"Timestamp: {search_results.timestamp}")
# ScrapeForge response
scrape_result = client.scrape_forge("https://example.com")
content = scrape_result.primary_content
# Links found on page
if content.links:
link = content.links[0]
print(f"Link text: {link.text}") # str
print(f"URL: {link.url}") # str
print(f"Absolute URL: {link.absolute_url}") # str
# Images found on page
if content.images:
image = content.images[0]
print(f"Alt text: {image.alt}") # str
print(f"Source: {image.src}") # str
print(f"Width: {image.width}") # Optional[int]
print(f"Height: {image.height}") # Optional[int]
# DeepDive response
research = client.deep_dive("AI trends")
print(f"Topic: {research.topic}")
print(f"Summary: {research.ai_summary}")
print(f"Confidence: {research.confidence_score}") # 0-100
# Convert to dict/JSON for storage
data_dict = search_results.to_dict()
json_str = search_results.to_json()
Advanced Features
Request caching to save credits
from searchhive import SearchHive
from searchhive.cache import RedisCache, FileCache
# Redis caching (recommended for production)
cache = RedisCache(host='localhost', port=6379, db=0)
client = SearchHive(api_key="sk_live_your_key_here", cache=cache)
# File-based caching (for development)
cache = FileCache(cache_dir="./searchhive_cache", ttl=3600)
client = SearchHive(api_key="sk_live_your_key_here", cache=cache)
# Cached requests won't consume credits if within TTL
results1 = client.swift_search("Bitcoin news") # Uses API
results2 = client.swift_search("Bitcoin news") # Uses cache
Pre-built LangChain tools
from searchhive.langchain import SearchHiveTools
from langchain.agents import initialize_agent, AgentType
from langchain.llms import OpenAI
# Get pre-built LangChain tools
tools = SearchHiveTools(api_key="sk_live_your_key_here")
# Initialize agent with web capabilities
llm = OpenAI(temperature=0)
agent = initialize_agent(
tools.get_all_tools(), # [swift_search, scrape_forge, deep_dive]
llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True
)
# Agent can now search and scrape the web
result = agent.run("Find the latest Bitcoin price and analyze market sentiment")
print(result)
Configuration Reference
Parameter | Type | Default | Description |
---|---|---|---|
api_key | str | None | Your SearchHive API key |
base_url | str | https://www.searchhive.dev/api | API base URL |
timeout | int | 30 | Request timeout in seconds |
max_retries | int | 3 | Number of retry attempts |
verify_ssl | bool | True | Verify SSL certificates |
Best Practices
Use environment variables for API keys
Store your API key in SEARCHHIVE_API_KEY
environment variable instead of hardcoding
Implement caching for repeated requests
Use Redis or file-based caching to avoid duplicate API calls and save credits
Monitor credit usage
Check remaining_credits
in responses and set up alerts before running out
Use async for high-throughput applications
AsyncSearchHive client provides better performance for concurrent requests