File size: 3,481 Bytes
b997da5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2647bd
b997da5
 
 
 
 
 
 
 
 
 
 
 
97a5f0e
 
 
 
 
 
 
 
 
 
b997da5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from openai import AzureOpenAI as moa
from serpapi import GoogleSearch
import settings

import os
import re
import faiss
from llama_index.core import Document, VectorStoreIndex, StorageContext
from llama_index.vector_stores.faiss import FaissVectorStore

from dotenv import load_dotenv

load_dotenv()

app = FastAPI(title="IntelliHire")

client = moa(
    api_version=os.getenv('AZURE_VERSION'),
    azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT'),
    api_key=os.getenv('AZURE_API_KEY'),
)

class QueryRequest(BaseModel):
    query: str

def generate_search_query(user_query: str) -> str:
    response = client.chat.completions.create(
        model='agile4',
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant.",
            },
            {
                "role": "user",
                "content": f"""Convert the given query to google search for linked site ex: 'Find devops engineer in Berlin' → site:linkedin.com/in/ 'devops engineer' Berlin". Query: {user_query}, Response: <query>...</query>""",
            }
        ],
        max_completion_tokens=800,
        temperature=0.7,
    )
    text = response.choices[0].message.content
    pattern = r'<query>(.*?)</query>'
    matches = re.findall(pattern, text)
    if not matches:
        raise ValueError("Failed to parse search query from model response")
    return matches[0]

def search_google(query: str) -> list[Document]:
    params = {
        "api_key": "cf0f20c18d0e9ed6f60e66765ca6a582bf5a6297261a22e177b5cbed254c0f54",
        "engine": "google",
        "q": query,
        "google_domain": "google.co.in",
        "gl": "in",
        "hl": "en",
        "location": "India",
        "num": "50"
    }
    search = GoogleSearch(params)
    results = search.get_dict()
    docs = []
    for data in results.get('organic_results', []):
        docs.append(Document(
            text=data.get('snippet', ''),
            metadata={
                'name': ' '.join(data.get('title', '').split()[:2]),
                'link': data.get('link')
            }
        ))
    return docs

def create_index(docs: list[Document]) -> VectorStoreIndex:
    d = 1536
    faiss_index = faiss.IndexFlatL2(d)
    vector_store = FaissVectorStore(faiss_index=faiss_index)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    return VectorStoreIndex.from_documents(
        docs,
        storage_context=storage_context,
        show_progress=True,
    )

@app.post("/search")
def search(query_request: QueryRequest):
    try:
        search_query = generate_search_query(query_request.query)
        docs = search_google(search_query)
        if not docs:
            return {"message": "No documents found"}
        index = create_index(docs)
        query_engine = index.as_query_engine(similarity_top_k=10)
        response = query_engine.query(query_request.query)
        # results=[]
        # for node in response.source_nodes:
        #     results.append({
        #         "metadata": node.metadata,
        #         "text": node.text
        #     })
        results = [
            {"metadata": node.metadata, "text": node.text}
            for node in response.source_nodes
        ]
        print(results)
        return {"results": results}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))