from fastapi import FastAPI, HTTPException from pydantic import BaseModel from openai import AzureOpenAI as moa from serpapi import GoogleSearch import settings import os import re import faiss from llama_index.core import Document, VectorStoreIndex, StorageContext from llama_index.vector_stores.faiss import FaissVectorStore from dotenv import load_dotenv load_dotenv() app = FastAPI(title="IntelliHire") client = moa( api_version=os.getenv('AZURE_VERSION'), azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT'), api_key=os.getenv('AZURE_API_KEY'), ) class QueryRequest(BaseModel): query: str def generate_search_query(user_query: str) -> str: response = client.chat.completions.create( model='agile4', messages=[ { "role": "system", "content": "You are a helpful assistant.", }, { "role": "user", "content": f"""Convert the given query to google search for linked site ex: 'Find devops engineer in Berlin' → site:linkedin.com/in/ 'devops engineer' Berlin". Query: {user_query}, Response: ...""", } ], max_completion_tokens=800, temperature=0.7, ) text = response.choices[0].message.content pattern = r'(.*?)' matches = re.findall(pattern, text) if not matches: raise ValueError("Failed to parse search query from model response") return matches[0] def search_google(query: str) -> list[Document]: params = { "api_key": "cf0f20c18d0e9ed6f60e66765ca6a582bf5a6297261a22e177b5cbed254c0f54", "engine": "google", "q": query, "google_domain": "google.co.in", "gl": "in", "hl": "en", "location": "India", "num": "50" } search = GoogleSearch(params) results = search.get_dict() docs = [] for data in results.get('organic_results', []): docs.append(Document( text=data.get('snippet', ''), metadata={ 'name': ' '.join(data.get('title', '').split()[:2]), 'link': data.get('link') } )) return docs def create_index(docs: list[Document]) -> VectorStoreIndex: d = 1536 faiss_index = faiss.IndexFlatL2(d) vector_store = FaissVectorStore(faiss_index=faiss_index) storage_context = StorageContext.from_defaults(vector_store=vector_store) return VectorStoreIndex.from_documents( docs, storage_context=storage_context, show_progress=True, ) @app.post("/search") def search(query_request: QueryRequest): try: search_query = generate_search_query(query_request.query) docs = search_google(search_query) if not docs: return {"message": "No documents found"} index = create_index(docs) query_engine = index.as_query_engine(similarity_top_k=10) response = query_engine.query(query_request.query) # results=[] # for node in response.source_nodes: # results.append({ # "metadata": node.metadata, # "text": node.text # }) results = [ {"metadata": node.metadata, "text": node.text} for node in response.source_nodes ] print(results) return {"results": results} except Exception as e: raise HTTPException(status_code=500, detail=str(e))