IntelliHire / main.py
amartyasaran's picture
Readme Updated
b2647bd
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from openai import AzureOpenAI as moa
from serpapi import GoogleSearch
import settings
import os
import re
import faiss
from llama_index.core import Document, VectorStoreIndex, StorageContext
from llama_index.vector_stores.faiss import FaissVectorStore
from dotenv import load_dotenv
load_dotenv()
app = FastAPI(title="IntelliHire")
client = moa(
api_version=os.getenv('AZURE_VERSION'),
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT'),
api_key=os.getenv('AZURE_API_KEY'),
)
class QueryRequest(BaseModel):
query: str
def generate_search_query(user_query: str) -> str:
response = client.chat.completions.create(
model='agile4',
messages=[
{
"role": "system",
"content": "You are a helpful assistant.",
},
{
"role": "user",
"content": f"""Convert the given query to google search for linked site ex: 'Find devops engineer in Berlin' → site:linkedin.com/in/ 'devops engineer' Berlin". Query: {user_query}, Response: <query>...</query>""",
}
],
max_completion_tokens=800,
temperature=0.7,
)
text = response.choices[0].message.content
pattern = r'<query>(.*?)</query>'
matches = re.findall(pattern, text)
if not matches:
raise ValueError("Failed to parse search query from model response")
return matches[0]
def search_google(query: str) -> list[Document]:
params = {
"api_key": "cf0f20c18d0e9ed6f60e66765ca6a582bf5a6297261a22e177b5cbed254c0f54",
"engine": "google",
"q": query,
"google_domain": "google.co.in",
"gl": "in",
"hl": "en",
"location": "India",
"num": "50"
}
search = GoogleSearch(params)
results = search.get_dict()
docs = []
for data in results.get('organic_results', []):
docs.append(Document(
text=data.get('snippet', ''),
metadata={
'name': ' '.join(data.get('title', '').split()[:2]),
'link': data.get('link')
}
))
return docs
def create_index(docs: list[Document]) -> VectorStoreIndex:
d = 1536
faiss_index = faiss.IndexFlatL2(d)
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
return VectorStoreIndex.from_documents(
docs,
storage_context=storage_context,
show_progress=True,
)
@app.post("/search")
def search(query_request: QueryRequest):
try:
search_query = generate_search_query(query_request.query)
docs = search_google(search_query)
if not docs:
return {"message": "No documents found"}
index = create_index(docs)
query_engine = index.as_query_engine(similarity_top_k=10)
response = query_engine.query(query_request.query)
# results=[]
# for node in response.source_nodes:
# results.append({
# "metadata": node.metadata,
# "text": node.text
# })
results = [
{"metadata": node.metadata, "text": node.text}
for node in response.source_nodes
]
print(results)
return {"results": results}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))