yash bhaskar commited on
Commit
5f1a181
·
1 Parent(s): 386f2a5

Adding DocumentCollection

Browse files
Agents/rankerAgent.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from together import Together
3
+
4
+ def rerank_best_answer(json_files, config_file='config.json', model="meta-llama/Llama-3-8b-chat-hf"):
5
+ # Load API key from configuration file
6
+ with open(config_file, 'r') as file:
7
+ config = json.load(file)
8
+
9
+ together_ai_key = config.get("TOGETHER_AI")
10
+ if not together_ai_key:
11
+ raise ValueError("TOGETHER_AI key not found in the config file.")
12
+
13
+ # Initialize Together client
14
+ client = Together(api_key=together_ai_key)
15
+
16
+ # Combine all JSON files into a single structure
17
+ combined_prompts = {}
18
+ for json_file in json_files:
19
+ with open(json_file, 'r') as file:
20
+ data = json.load(file)
21
+
22
+ # Format the input for the prompt
23
+ for item in data:
24
+ query_id = item['query_id']
25
+ if query_id not in combined_prompts:
26
+ combined_prompts[query_id] = {
27
+ "question": item['input'],
28
+ "answers": {}
29
+ }
30
+ combined_prompts[query_id]["answers"][json_file] = item['response']
31
+
32
+ responses = []
33
+
34
+ for query_id, prompt in combined_prompts.items():
35
+ # Generate the prompt text
36
+ prompt_text = f"""Input JSON:
37
+ {json.dumps(prompt, indent=4)}
38
+
39
+ For the above question, identify which model gave the best response based on accuracy. Ensure the chosen response is an answer and not a follow-up question. Provide the output in the format:
40
+ {{
41
+ "best_model": "<model_name>",
42
+ "best_answer": "<answer>"
43
+ }}
44
+ Just output this JSON and nothing else.
45
+ """
46
+
47
+ # Generate response from Together API
48
+ response = client.chat.completions.create(
49
+ model=model,
50
+ messages=[{"role": "user", "content": prompt_text}],
51
+ )
52
+ response_content = response.choices[0].message.content
53
+ # print(response_content)
54
+
55
+ prompt_text_extract_bestModel = f"""Input JSON:
56
+ {json.dumps(response_content, indent=4)}
57
+
58
+ Just Output the best_model from above JSON and nothing else.
59
+ """
60
+ prompt_text_extract_bestAnswer = f"""Input JSON:
61
+ {json.dumps(response_content, indent=4)}
62
+
63
+ Just Output the best_answer from above JSON and nothing else.
64
+ """
65
+ response_bestModel = client.chat.completions.create(
66
+ model=model,
67
+ messages=[{"role": "user", "content": prompt_text_extract_bestModel}],
68
+ )
69
+ response_bestAnswer = client.chat.completions.create(
70
+ model=model,
71
+ messages=[{"role": "user", "content": prompt_text_extract_bestAnswer}],
72
+ )
73
+
74
+ # print({"query_id": query_id, "question": prompt["question"], "Ranker_Output": response.choices[0].message.content})
75
+ responses.append({"query_id": query_id, "question": prompt["question"], "best_model": response_bestModel.choices[0].message.content, "best_answer": response_bestAnswer.choices[0].message.content})
76
+
77
+ print(response_bestModel.choices[0].message.content)
78
+
79
+ return responses
80
+
81
+
82
+ def rankerAgent(prompt, config_file='config.json', model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"):
83
+ # Load API key from configuration file
84
+ with open(config_file, 'r') as file:
85
+ config = json.load(file)
86
+
87
+ together_ai_key = config.get("TOGETHER_AI")
88
+ if not together_ai_key:
89
+ raise ValueError("TOGETHER_AI key not found in the config file.")
90
+
91
+ # Initialize Together client
92
+ client = Together(api_key=together_ai_key)
93
+
94
+ prompt_text = f"""Input JSON:
95
+ {json.dumps(prompt, indent=4)}
96
+
97
+ For the above question, identify which model gave the best response based on accuracy. Ensure the chosen response is an answer and not a follow-up question. Provide the output in the format:
98
+ {{
99
+ "best_model": "<model_name>",
100
+ "best_answer": "<answer>"
101
+ }}
102
+ Just output this JSON and nothing else.
103
+ """
104
+
105
+ # Generate response from Together API
106
+ response = client.chat.completions.create(
107
+ model=model,
108
+ messages=[{"role": "user", "content": prompt_text}],
109
+ )
110
+ response_content = response.choices[0].message.content
111
+ # print(response_content)
112
+
113
+ prompt_text_extract_bestModel = f"""Input JSON:
114
+ {json.dumps(response_content, indent=4)}
115
+
116
+ Just Output the best_model from above JSON and nothing else.
117
+ """
118
+ prompt_text_extract_bestAnswer = f"""Input JSON:
119
+ {json.dumps(response_content, indent=4)}
120
+
121
+ Just Output the best_answer from above JSON and nothing else.
122
+ """
123
+ response_bestModel = client.chat.completions.create(
124
+ model=model,
125
+ messages=[{"role": "user", "content": prompt_text_extract_bestModel}],
126
+ )
127
+ response_bestAnswer = client.chat.completions.create(
128
+ model=model,
129
+ messages=[{"role": "user", "content": prompt_text_extract_bestAnswer}],
130
+ )
131
+
132
+ return response_bestModel.choices[0].message.content, response_bestAnswer.choices[0].message.content
133
+
134
+
135
+ # # Usage example
136
+ # json_files = ["../QnA_Eval/Responses/BOW_1_2_top_100_response.json",
137
+ # "../QnA_Eval/Responses/BOW_1_2_top_100_modified_response.json",
138
+ # "../QnA_Eval/Responses/tf-idf_1_2_top_100_response.json",
139
+ # "../QnA_Eval/Responses/tf-idf_1_2_top_100_modified_response.json",
140
+ # "../QnA_Eval/Responses/bm25_1_2_top_100_response.json",
141
+ # "../QnA_Eval/Responses/bm25_1_2_top_100_modified_response.json",
142
+ # "../QnA_Eval/Responses/open_source_1_2_top_100_response.json",
143
+ # "../QnA_Eval/Responses/open_source_1_2_top_100_modified_response.json",
144
+ # "../QnA_Eval/Responses/vision_1_2_top_100_response.json",
145
+ # "../QnA_Eval/Responses/vision_1_2_top_100_modified_response.json",
146
+ # "../QnA_Eval/Responses/ZeroShot_response.json",
147
+ # "../QnA_Eval/Responses/WikiAgent_response.json",
148
+ # "../QnA_Eval/Responses/WikiAgent_response_modified.json",
149
+ # "../QnA_Eval/Responses/LlamaAgent_response.json",
150
+ # "../QnA_Eval/Responses/LlamaAgent_response_modified.json",
151
+ # "../QnA_Eval/Responses/tf_idf_bm25_open_1_2_top_100_combined_response.json", "../QnA_Eval/Responses/tf_idf_bm25_open_1_2_top_100_combined_modified_response.json", "../QnA_Eval/Responses/tf_idf_bm25_open_1_2_top_100_combined_both_response.json"]
152
+
153
+ # config_file = "../config.json"
154
+
155
+ # result = rerank_best_answer(json_files, config_file)
156
+
157
+ # with open("reranked_best_answers_1_2.json", 'w') as file:
158
+ # json.dump(result, file, indent=4, ensure_ascii=False)
Agents/togetherAIAgent.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from together import Together
3
+
4
+ def generate_article_from_query(query, config_file='config.json', model="meta-llama/Llama-3-8b-chat-hf"):
5
+ """
6
+ Generates an article based on the given query using the Together API.
7
+
8
+ Parameters:
9
+ - query (str): The input query for generating the article.
10
+ - config_file (str): Path to the JSON file containing the API key. Default is 'config.json'.
11
+ - model (str): The Together AI model to use. Default is "meta-llama/Llama-3-8b-chat-hf".
12
+
13
+ Returns:
14
+ - str: The generated article content.
15
+ """
16
+ # Load API key from configuration file
17
+ with open(config_file, 'r') as file:
18
+ config = json.load(file)
19
+
20
+ together_ai_key = config.get("TOGETHER_AI")
21
+ if not together_ai_key:
22
+ raise ValueError("TOGETHER_AI key not found in the config file.")
23
+
24
+ # Initialize Together client
25
+ client = Together(api_key=together_ai_key)
26
+
27
+ # Create the prompt
28
+ prompt = f"""Using the query provided, generate a well-researched and informative short article. The article should be detailed, accurate, and structured to cover various aspects of the topic in an engaging way. Focus on presenting key facts, historical context, notable insights, and any relevant background information that adds value to the reader’s understanding. Ensure the tone is neutral and informative. Keep the article short. Here’s the query:
29
+
30
+ Query: {query}"""
31
+
32
+ # Generate response
33
+ response = client.chat.completions.create(
34
+ model=model,
35
+ messages=[{"role": "user", "content": prompt}],
36
+ )
37
+
38
+ return response.choices[0].message.content
39
+
40
+ # # Example usage
41
+ # if __name__ == "__main__":
42
+ # query = "I feel anxious about my health and stressed at work."
43
+ # article = generate_article_from_query(query)
44
+ # print(article)
Agents/wikiAgent.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import wikipediaapi
2
+ from typing import List, Dict
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from datetime import datetime
6
+
7
+ @dataclass
8
+ class WikiSearchResult:
9
+ """Data class to store Wikipedia article information"""
10
+ title: str
11
+ summary: str
12
+ full_text: str
13
+ url: str
14
+ last_modified: datetime
15
+ categories: List[str]
16
+
17
+ def initialize_wikipedia_client(language: str = 'en', user_agent: str = 'WikipediaSearcher/1.0') -> wikipediaapi.Wikipedia:
18
+ """
19
+ Initialize Wikipedia API client
20
+
21
+ Args:
22
+ language: Language code (e.g., 'en' for English)
23
+ user_agent: User agent string for API requests
24
+
25
+ Returns:
26
+ Wikipedia API client instance
27
+ """
28
+ return wikipediaapi.Wikipedia(
29
+ language=language,
30
+ extract_format=wikipediaapi.ExtractFormat.WIKI,
31
+ user_agent=user_agent
32
+ )
33
+
34
+ def process_page(page: wikipediaapi.WikipediaPage) -> WikiSearchResult:
35
+ """Process a Wikipedia page and extract relevant information"""
36
+ categories = [cat.title for cat in page.categories.values()]
37
+
38
+ return WikiSearchResult(
39
+ title=page.title,
40
+ summary=page.summary,
41
+ full_text=page.text,
42
+ url=page.fullurl,
43
+ last_modified=datetime.strptime(page.touched, '%Y-%m-%dT%H:%M:%SZ'),
44
+ categories=categories
45
+ )
46
+
47
+ def search_wikipedia(client: wikipediaapi.Wikipedia, query: str, results_limit: int = 3) -> List[WikiSearchResult]:
48
+ """
49
+ Search Wikipedia and get detailed information for matching articles
50
+
51
+ Args:
52
+ client: Wikipedia API client instance
53
+ query: Search query string
54
+ results_limit: Maximum number of results to return
55
+
56
+ Returns:
57
+ List of WikiSearchResult objects containing article information
58
+ """
59
+ try:
60
+ page = client.page(query)
61
+
62
+ if not page.exists():
63
+ logging.warning(f"No exact match found for: {query}")
64
+ return []
65
+
66
+ results = [process_page(page)]
67
+
68
+ # Get related pages through links (if we want more results)
69
+ if results_limit > 1:
70
+ for link_title in list(page.links.keys())[:results_limit - 1]:
71
+ link_page = client.page(link_title)
72
+ if link_page.exists():
73
+ results.append(process_page(link_page))
74
+
75
+ return results
76
+
77
+ except Exception as e:
78
+ logging.error(f"Error searching Wikipedia: {e}")
79
+ return []
80
+
81
+ def format_result(result: WikiSearchResult, include_full_text: bool = False) -> str:
82
+ """
83
+ Format a search result for display
84
+
85
+ Args:
86
+ result: WikiSearchResult object to format
87
+ include_full_text: Whether to include the full article text
88
+
89
+ Returns:
90
+ Formatted string containing article information
91
+ """
92
+ formatted = f"""
93
+ Title: {result.title}
94
+ URL: {result.url}
95
+ Last Modified: {result.last_modified}
96
+ Categories: {', '.join(result.categories[:5])}{'...' if len(result.categories) > 5 else ''}
97
+
98
+ Summary:
99
+ {result.summary}
100
+ """
101
+ if include_full_text:
102
+ formatted += f"\nFull Text:\n{result.full_text}"
103
+
104
+ return formatted
105
+
106
+ def get_wiki_data(query: str, results_limit: int = 3) -> List[str]:
107
+ """
108
+ Get Wikipedia data for a given query. If the search returns no results,
109
+ try using n-grams of decreasing size until a result is found or all attempts fail.
110
+
111
+ Args:
112
+ query: Search query string
113
+ results_limit: Maximum number of results to return
114
+
115
+ Returns:
116
+ List of summaries from Wikipedia search results, or None if no results are found.
117
+ """
118
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
119
+ client = initialize_wikipedia_client()
120
+
121
+ def get_search_result(query):
122
+ """Helper function to get search result summary."""
123
+ result = search_wikipedia(client, query, results_limit)
124
+ if result:
125
+ return result[0].summary # Return the first result's summary if available
126
+ return None
127
+
128
+ # Check the search results with the full query
129
+ summary = get_search_result(query)
130
+ if summary:
131
+ return [summary]
132
+
133
+ # If no result, try reducing the query by n-grams
134
+ n = len(query.split()) # Starting with the number of words in the query
135
+ for i in range(n, 1, -1): # Try from n-grams down to 2-grams
136
+ # Generate n-grams for the current iteration
137
+ n_grams_query = ' '.join(query.split()[:i])
138
+ logging.info(f"Trying n-gram query: {n_grams_query}")
139
+ summary = get_search_result(n_grams_query)
140
+ if summary:
141
+ return [summary]
142
+
143
+ # If no results found after all n-gram reductions, return None
144
+ logging.info("No results found for any query variations.")
145
+ return None
146
+
147
+ # # Example usage
148
+ # if __name__ == "__main__":
149
+ # query = "Clash of Clans"
150
+ # results = get_wiki_data(query, results_limit=3)
151
+
152
+ # if not results:
153
+ # print(f"No results found for query: {query}")
154
+ # else:
155
+ # for idx, result in enumerate(results, 1):
156
+ # print(f"\nResult {idx}:")
157
+ # print("-" * 60)
158
+ # print(format_result(result))