OpenAI Assistants Integration
Supercharge your OpenAI Assistants with real-time web search and scraping capabilities. Enable your AI assistants to access current information and perform research tasks automatically.
Function Calling Ready
Key Features
Seamlessly integrate SearchHive APIs as OpenAI Assistant functions
Enable assistants to search the web and access current information
Extract detailed content from any webpage for comprehensive analysis
AI automatically chooses the right SearchHive API for each task
Setup Guide
1. Create Assistant with SearchHive Functions
Create OpenAI Assistant with SearchHive Integration
import openai
import requests
import json
# Initialize OpenAI client
client = openai.OpenAI(api_key="your-openai-api-key")
# Define SearchHive functions for the assistant
def search_web(query, max_results=10):
"""Search the web using SearchHive API"""
response = requests.post(
"https://www.searchhive.dev/api/v1/swiftsearch",
headers={"Authorization": "Bearer your-searchhive-key"},
json={
"query": query,
"max_results": max_results
}
)
return response.json()
def scrape_webpage(url):
"""Scrape webpage content using SearchHive"""
response = requests.post(
"https://www.searchhive.dev/api/v1/scrapeforge",
headers={"Authorization": "Bearer your-searchhive-key"},
json={
"url": url,
"extract_options": ["title", "text", "metadata"]
}
)
return response.json()
# Create assistant with SearchHive functions
assistant = client.beta.assistants.create(
name="Web Research Assistant",
instructions="""You are a helpful research assistant with access to real-time web search and webpage scraping capabilities.
Use the search_web function to find current information on any topic.
Use the scrape_webpage function to extract detailed content from specific URLs.
Always provide sources and cite where information came from. Be thorough in your research and cross-reference multiple sources when possible.""",
model="gpt-4-turbo-preview",
tools=[
{
"type": "function",
"function": {
"name": "search_web",
"description": "Search the web for current information on any topic",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query to execute"
},
"max_results": {
"type": "integer",
"description": "Maximum number of results to return (default: 10)",
"default": 10
}
},
"required": ["query"]
}
}
},
{
"type": "function",
"function": {
"name": "scrape_webpage",
"description": "Extract content from a specific webpage URL",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to scrape content from"
}
},
"required": ["url"]
}
}
}
]
)
print(f"Assistant created with ID: {assistant.id}")
2. Handle Function Calls
Function Call Handler for SearchHive APIs
import json
def run_conversation(assistant_id, user_message):
# Create a thread
thread = client.beta.threads.create()
# Add user message to thread
client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_message
)
# Run the assistant
run = client.beta.threads.runs.create(
thread_id=thread.id,
assistant_id=assistant_id
)
# Wait for completion and handle function calls
while run.status in ['queued', 'in_progress', 'requires_action']:
if run.status == 'requires_action':
# Handle function calls
tool_calls = run.required_action.submit_tool_outputs.tool_calls
tool_outputs = []
for tool_call in tool_calls:
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
if function_name == "search_web":
result = search_web(
query=function_args["query"],
max_results=function_args.get("max_results", 10)
)
elif function_name == "scrape_webpage":
result = scrape_webpage(url=function_args["url"])
else:
result = {"error": f"Unknown function: {function_name}"}
tool_outputs.append({
"tool_call_id": tool_call.id,
"output": json.dumps(result)
})
# Submit function outputs
run = client.beta.threads.runs.submit_tool_outputs(
thread_id=thread.id,
run_id=run.id,
tool_outputs=tool_outputs
)
# Wait before checking again
time.sleep(1)
run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
# Get the assistant's response
messages = client.beta.threads.messages.list(thread_id=thread.id)
return messages.data[0].content[0].text.value
# Example usage
response = run_conversation(
assistant_id=assistant.id,
user_message="What are the latest developments in AI technology this week? Please search for recent news and provide a summary."
)
print(response)
Complete Streamlit Chatbot
Here's a complete example of a Streamlit chatbot powered by OpenAI Assistants and SearchHive:
Complete Research Chatbot with Streamlit
import streamlit as st
import openai
import time
st.title("🔍 AI Research Assistant")
st.caption("Powered by OpenAI GPT-4 + SearchHive APIs")
# Initialize session state
if "messages" not in st.session_state:
st.session_state.messages = []
if "thread_id" not in st.session_state:
st.session_state.thread_id = None
# Initialize clients
client = openai.OpenAI(api_key=st.secrets["OPENAI_API_KEY"])
ASSISTANT_ID = "asst_your_assistant_id"
def create_thread():
"""Create a new conversation thread"""
thread = client.beta.threads.create()
return thread.id
def run_assistant(thread_id, user_message):
"""Run the assistant with SearchHive integration"""
# Add user message
client.beta.threads.messages.create(
thread_id=thread_id,
role="user",
content=user_message
)
# Start the run
run = client.beta.threads.runs.create(
thread_id=thread_id,
assistant_id=ASSISTANT_ID
)
# Handle the conversation with function calls
while run.status in ['queued', 'in_progress', 'requires_action']:
if run.status == 'requires_action':
# Handle SearchHive function calls
tool_calls = run.required_action.submit_tool_outputs.tool_calls
tool_outputs = []
for tool_call in tool_calls:
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
with st.spinner(f"🔍 {function_name.replace('_', ' ').title()}..."):
if function_name == "search_web":
result = search_web(
query=function_args["query"],
max_results=function_args.get("max_results", 10)
)
st.sidebar.success(f"Found {len(result.get('search_results', []))} search results")
elif function_name == "scrape_webpage":
result = scrape_webpage(url=function_args["url"])
st.sidebar.success(f"Scraped webpage: {function_args['url']}")
tool_outputs.append({
"tool_call_id": tool_call.id,
"output": json.dumps(result)
})
# Submit the function outputs
run = client.beta.threads.runs.submit_tool_outputs(
thread_id=thread_id,
run_id=run.id,
tool_outputs=tool_outputs
)
time.sleep(1)
run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
# Get the assistant's response
messages = client.beta.threads.messages.list(thread_id=thread_id)
return messages.data[0].content[0].text.value
# Create thread if needed
if st.session_state.thread_id is None:
st.session_state.thread_id = create_thread()
# Display chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Chat input
if prompt := st.chat_input("Ask me to research anything..."):
# Add user message to history
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Get assistant response
with st.chat_message("assistant"):
with st.spinner("Researching..."):
response = run_assistant(st.session_state.thread_id, prompt)
st.markdown(response)
# Add assistant response to history
st.session_state.messages.append({"role": "assistant", "content": response})
# Sidebar with examples
st.sidebar.header("💡 Example Queries")
st.sidebar.markdown("""
- "What are the latest AI breakthroughs this month?"
- "Research the current state of renewable energy adoption"
- "Find recent news about cryptocurrency regulations"
- "Analyze the latest trends in remote work"
- "What are experts saying about climate change solutions?"
""")
st.sidebar.header("🔧 Functions Available")
st.sidebar.markdown("""
- **Web Search**: Real-time search across the internet
- **Webpage Scraping**: Extract content from any URL
- **AI Analysis**: GPT-4 powered insights and summaries
"")
Node.js Implementation
Implementation example using Node.js and the OpenAI JavaScript SDK:
Node.js OpenAI Assistant with SearchHive
const OpenAI = require('openai');
const axios = require('axios');
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY
});
const SEARCHHIVE_API_KEY = process.env.SEARCHHIVE_API_KEY;
// SearchHive function implementations
async function searchWeb(query, maxResults = 10) {
try {
const response = await axios.post(
'https://www.searchhive.dev/api/v1/swiftsearch',
{
query: query,
max_results: maxResults
},
{
headers: {
'Authorization': `Bearer ${SEARCHHIVE_API_KEY}`
}
}
);
return response.data;
} catch (error) {
return { error: error.message };
}
}
async function scrapeWebpage(url) {
try {
const response = await axios.post(
'https://www.searchhive.dev/api/v1/scrapeforge',
{
url: url,
extract_options: ['title', 'text', 'metadata']
},
{
headers: {
'Authorization': `Bearer ${SEARCHHIVE_API_KEY}`
}
}
);
return response.data;
} catch (error) {
return { error: error.message };
}
}
// Create assistant
async function createResearchAssistant() {
const assistant = await openai.beta.assistants.create({
name: "Web Research Assistant",
instructions: `You are a research assistant with real-time web access.
Use search_web to find current information and scrape_webpage to get detailed content from URLs.
Always cite your sources and provide accurate, up-to-date information.`,
model: "gpt-4-turbo-preview",
tools: [
{
type: "function",
function: {
name: "search_web",
description: "Search the web for current information",
parameters: {
type: "object",
properties: {
query: { type: "string", description: "Search query" },
maxResults: { type: "integer", description: "Max results (default: 10)" }
},
required: ["query"]
}
}
},
{
type: "function",
function: {
name: "scrape_webpage",
description: "Extract content from a webpage URL",
parameters: {
type: "object",
properties: {
url: { type: "string", description: "URL to scrape" }
},
required: ["url"]
}
}
}
]
});
return assistant;
}
// Handle conversation with function calls
async function runConversation(assistantId, userMessage) {
// Create thread
const thread = await openai.beta.threads.create();
// Add message
await openai.beta.threads.messages.create(thread.id, {
role: "user",
content: userMessage
});
// Run assistant
let run = await openai.beta.threads.runs.create(thread.id, {
assistant_id: assistantId
});
// Handle function calls
while (run.status === 'queued' || run.status === 'in_progress' || run.status === 'requires_action') {
if (run.status === 'requires_action') {
const toolCalls = run.required_action.submit_tool_outputs.tool_calls;
const toolOutputs = [];
for (const toolCall of toolCalls) {
const functionName = toolCall.function.name;
const args = JSON.parse(toolCall.function.arguments);
let result;
if (functionName === 'search_web') {
result = await searchWeb(args.query, args.maxResults);
} else if (functionName === 'scrape_webpage') {
result = await scrapeWebpage(args.url);
}
toolOutputs.push({
tool_call_id: toolCall.id,
output: JSON.stringify(result)
});
}
run = await openai.beta.threads.runs.submitToolOutputs(
thread.id,
run.id,
{ tool_outputs: toolOutputs }
);
}
await new Promise(resolve => setTimeout(resolve, 1000));
run = await openai.beta.threads.runs.retrieve(thread.id, run.id);
}
// Get response
const messages = await openai.beta.threads.messages.list(thread.id);
return messages.data[0].content[0].text.value;
}
// Example usage
async function main() {
const assistant = await createResearchAssistant();
console.log(`Assistant created: ${assistant.id}`);
const response = await runConversation(
assistant.id,
"What are the latest developments in quantum computing? Please search for recent news and provide a comprehensive summary."
);
console.log("Assistant Response:", response);
}
main().catch(console.error);
Use Cases
Create AI assistants that can research any topic with real-time web access
Build support bots that can search knowledge bases and documentation
Assistants that gather information for content writing and fact-checking
AI that can gather and analyze data from multiple web sources