Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| from openai import AzureOpenAI as moa | |
| from serpapi import GoogleSearch | |
| import settings | |
| import os | |
| import re | |
| import faiss | |
| from llama_index.core import Document, VectorStoreIndex, StorageContext | |
| from llama_index.vector_stores.faiss import FaissVectorStore | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| app = FastAPI(title="IntelliHire") | |
| client = moa( | |
| api_version=os.getenv('AZURE_VERSION'), | |
| azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT'), | |
| api_key=os.getenv('AZURE_API_KEY'), | |
| ) | |
| class QueryRequest(BaseModel): | |
| query: str | |
| def generate_search_query(user_query: str) -> str: | |
| response = client.chat.completions.create( | |
| model='agile4', | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "You are a helpful assistant.", | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"""Convert the given query to google search for linked site ex: 'Find devops engineer in Berlin' → site:linkedin.com/in/ 'devops engineer' Berlin". Query: {user_query}, Response: <query>...</query>""", | |
| } | |
| ], | |
| max_completion_tokens=800, | |
| temperature=0.7, | |
| ) | |
| text = response.choices[0].message.content | |
| pattern = r'<query>(.*?)</query>' | |
| matches = re.findall(pattern, text) | |
| if not matches: | |
| raise ValueError("Failed to parse search query from model response") | |
| return matches[0] | |
| def search_google(query: str) -> list[Document]: | |
| params = { | |
| "api_key": "cf0f20c18d0e9ed6f60e66765ca6a582bf5a6297261a22e177b5cbed254c0f54", | |
| "engine": "google", | |
| "q": query, | |
| "google_domain": "google.co.in", | |
| "gl": "in", | |
| "hl": "en", | |
| "location": "India", | |
| "num": "50" | |
| } | |
| search = GoogleSearch(params) | |
| results = search.get_dict() | |
| docs = [] | |
| for data in results.get('organic_results', []): | |
| docs.append(Document( | |
| text=data.get('snippet', ''), | |
| metadata={ | |
| 'name': ' '.join(data.get('title', '').split()[:2]), | |
| 'link': data.get('link') | |
| } | |
| )) | |
| return docs | |
| def create_index(docs: list[Document]) -> VectorStoreIndex: | |
| d = 1536 | |
| faiss_index = faiss.IndexFlatL2(d) | |
| vector_store = FaissVectorStore(faiss_index=faiss_index) | |
| storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
| return VectorStoreIndex.from_documents( | |
| docs, | |
| storage_context=storage_context, | |
| show_progress=True, | |
| ) | |
| def search(query_request: QueryRequest): | |
| try: | |
| search_query = generate_search_query(query_request.query) | |
| docs = search_google(search_query) | |
| if not docs: | |
| return {"message": "No documents found"} | |
| index = create_index(docs) | |
| query_engine = index.as_query_engine(similarity_top_k=10) | |
| response = query_engine.query(query_request.query) | |
| # results=[] | |
| # for node in response.source_nodes: | |
| # results.append({ | |
| # "metadata": node.metadata, | |
| # "text": node.text | |
| # }) | |
| results = [ | |
| {"metadata": node.metadata, "text": node.text} | |
| for node in response.source_nodes | |
| ] | |
| print(results) | |
| return {"results": results} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |