Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| import faiss | |
| import numpy as np | |
| # Load models | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| model_name = "google/flan-t5-base" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| llm = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200) | |
| # Hardcoded transcript (5-10 lines) | |
| transcript = """ | |
| The meeting started at 10 AM. The team discussed the new project timeline. | |
| John mentioned that the deadline is tight but achievable. Sarah suggested adding more resources. | |
| The team agreed to meet again tomorrow to finalize the plan. | |
| """ | |
| # Preprocess and chunk the transcript | |
| def preprocess_transcript(text): | |
| return ' '.join(text.split()) # Remove extra whitespace | |
| def chunk_text(text, chunk_size=300, overlap=50): | |
| words = text.split() | |
| chunks = [] | |
| for i in range(0, len(words), chunk_size - overlap): | |
| chunk = ' '.join(words[i:i + chunk_size]) | |
| chunks.append(chunk) | |
| return chunks | |
| chunks = chunk_text(preprocess_transcript(transcript)) | |
| # Generate embeddings and create FAISS index | |
| embeddings = embedder.encode(chunks) | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(np.array(embeddings)) | |
| # Query the FAISS index | |
| def query_faiss(query, index, embedder, chunks, top_k=2): | |
| query_vector = embedder.encode([query]) | |
| D, I = index.search(np.array(query_vector), top_k) | |
| retrieved_chunks = [chunks[i] for i in I[0]] | |
| return "\n\n".join(retrieved_chunks) | |
| # Build prompt and generate answer | |
| def chat_with_transcript(query): | |
| context = query_faiss(query, index, embedder, chunks) | |
| prompt = f"""You are an AI assistant. Use the following context to answer the question. | |
| Context: | |
| {context} | |
| Question: {query} | |
| Provide your answer below: | |
| """ | |
| response = llm(prompt)[0]['generated_text'] | |
| print("Raw model response:", response) # Debug statement | |
| return response.strip() | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 📄 Chat with a Transcript") | |
| query_input = gr.Textbox(label="Ask a question about the transcript") | |
| answer_output = gr.Textbox(label="Answer") | |
| query_input.submit( | |
| chat_with_transcript, | |
| inputs=[query_input], | |
| outputs=[answer_output] | |
| ) | |
| demo.launch() |